def test_set_brute_force(
    data_dic,
    start,
    time_range_obs=30,
    time_range_test=360,
    obs_freq=10,
    prediction_freq=30
    ):

    '''
    time_range_obs is in days
    time_range_test is in minutes
    obs_freq is in minutes
    prediction_freq is in minutes
    '''

    # Time range for the observations
    start_td = data_fetching_utils.time_in_datetime(start)
    end_td = start_td + timedelta(days=time_range_obs)
    end = data_fetching_utils.time_in_string(end_td)
    n_obs = data_fetching_utils.calculate_observations(start, end, obs_freq)

    # Time range for the train X cols
    end_obs_td = start_td + timedelta(minutes=time_range_test)
    end_obs = data_fetching_utils.time_in_string(end_obs_td)
    n_train = data_fetching_utils.calculate_observations(start, end_obs, obs_freq)

    # Initializing dfs
    df_X = pd.DataFrame(columns = COLS)
    df_Y = pd.DataFrame(columns = ['time', 'label'])

    # Building up the df
    print('\nStarting to build up the df...')
    current = start_td
    while current <= end_td:

        current_str = data_fetching_utils.time_in_string(current)
        label = get_label(
            data_dic,
            current_str,
            prediction_freq
        )

        if label is not None: # only append if label is not None

            i_X = len(df_X)
            row_X = build_row(data_dic, current_str)
            df_X.loc[i_X] = row_X

            i_Y = len(df_Y)
            row_Y = [current_str, label]
            df_Y.loc[i_Y] = row_Y

        current = current + timedelta(minutes=obs_freq)

        if i_X:
            if i_X % 500 == 0:
                print('Progress: ' + str(round(i_X/n_obs*100)) + '%')

    return df_X, df_Y
def subset_for_testing(data_dic, start, freq=10, time_range=360):

    '''
    receives the data dict and start time and returns a set
    of observations within the time range specified (forward-looking)
    and in the frequencies defined
    freq and time_range are defined in minutes
    '''

    start_td = data_fetching_utils.time_in_datetime(start)
    end_td = start_td + timedelta(minutes=time_range)

    current = start_td
    data_return = []

    while current <= end_td:

        current_str = data_fetching_utils.time_in_string(current)

        if current_str in data_dic:
            data_return.append(data_dic[current_str])
        else:
            # if one of the times is not in the dic,
            # we return a None
            return None

        current = current + timedelta(minutes=freq)

    return data_return
def subset_for_training(data_dic, end, freq=10, time_range=60):

    '''
    receives the data dict and an end time and returns a set
    of observations within the time range specified (backwards)
    and in the frequencies defined
    freq and time_range are defined in minutes
    '''

    end_td = data_fetching_utils.time_in_datetime(end)
    start_td = end_td - timedelta(minutes=time_range)

    current = end_td
    data_return = []

    while current >= start_td:

        current_str = data_fetching_utils.time_in_string(current)

        if current_str in data_dic:
            data_return.append(data_dic[current_str])

        else: # we take the data of the most recent previous obs
            #print('\nWarning: Data for obs ' + current_str + ' not found')
            not_in_dic = True
            freq2 = 0 + freq
            while not_in_dic:
                current2 = current - timedelta(minutes=freq2)
                current_str2 = data_fetching_utils.time_in_string(current2)
                if current_str2 in data_dic:
                    #print('Using data from ' + current_str2 + ' instead')
                    data_return.append(data_dic[current_str2])
                    not_in_dic = False
                else:
                    freq2 += freq

        current = current - timedelta(minutes=freq)

    return data_return
def price_increased(data_dic, time_now, minutes):

    '''
    Checks if the price increased from the price X minutes ago
    '''

    end_time = data_fetching_utils.time_in_datetime(time_now)
    start_time = end_time - timedelta(minutes=minutes)
    time_before = data_fetching_utils.time_in_string(start_time)

    price_now = data_dic[time_now]['price_close']
    price_before = data_dic[time_before]['price_close']

    if price_now > price_before:
        return 1
    else:
        return 0
def latest_time(data_dic):

    '''
    Evaluates every key of the input data dictionary and returns
    the latest time
    '''

    latest = None

    for time_str in data_dic:
        time = data_fetching_utils.time_in_datetime(time_str)

        if latest == None or time > latest:
            latest = time

    time_str = data_fetching_utils.time_in_string(latest)

    return time_str
def get_label(data_dic, time, time_range=30):

    '''
    returns a binary value indicating if the price went up
    after the time specified in time_range
    time_range is in minutes
    '''

    time_td = data_fetching_utils.time_in_datetime(time)
    after_td = time_td + timedelta(minutes=time_range)
    after = data_fetching_utils.time_in_string(after_td)

    if time in data_dic and after in data_dic:
        initial_price = data_dic[time]['price_close']
        final_price = data_dic[after]['price_close']

        if final_price > initial_price:
            return 1

        else:
            return 0

    else:
        return None
from datetime import datetime
import sys
import pickle

sys.path.insert(1, '../utils')
import data_fetching_utils as dfu
import feature_engineering_utils as feu

data_dic_path = '../../data/working/total_data.txt'
with open(data_dic_path, 'rb') as f:
    data_dic = pickle.load(f)

now = datetime.now()
end = dfu.time_in_string(now)
start = feu.latest_time(data_dic)
freq = 10  # in minutes
crypto = 'BTC'

latest_data = dfu.get_data(crypto=crypto,
                           period=str(freq) + 'MIN',
                           start=start,
                           end=end)

file = '../../data/raw/data_' + crypto + '_' + str(freq) + \
       'min_' + start[:10] + '_' + end[:10] + '.txt'

print('\nSaving data...')
with open(file, 'wb') as f:
    pickle.dump(latest_data, f)

print('\nData saved in', file)