Exemple #1
0
def generate_cnn_dataset(data_folder, bitcoin_file, get_class_name):
    btc_df = file_processor(bitcoin_file)

    n = len(btc_df) - slice_size_1week

    for epoch in range(int(1e4)):
        st = time()

        i = np.random.choice(n) + slice_size_12hours

        #btc_slice_4hours = btc_df[i-slice_size_4hours:i]
        btc_slice_12hours = btc_df[i-slice_size_12hours:i]
        #btc_slice_2days = btc_df[i-slice_size_2days:i]
        #btc_slice_1week = btc_df[i-slice_size_1week:i]

        if btc_slice_12hours.isnull().values.any():
            raise Exception('NaN values detected. Please remove them.')

        class_name = get_class_name(btc_df, btc_slice_12hours, i)
        save_dir = os.path.join(data_folder, 'train', class_name)
        if i>(n-(slice_size_1week*12)):
            save_dir = os.path.join(data_folder, 'test', class_name)
        mkdir_p(save_dir)
        fid = uuid4()
        filename = save_dir + '/' + str(fid) + '.png'
        #filenamen = save_dir + '/' + str(fid) + 'n.png'
        save_to_file(btc_slice_12hours, filename=filename)
        #save_to_file(btc_df[i:i + slice_size+slice_size], filename=filenamen)
        print('epoch = {0}, time = {1:.3f}, filename = {2}'.format(str(epoch).zfill(8), time() - st, filename))
def generate_cnn_dataset(data_folder, bitcoin_file, get_class_name):
    btc_df = file_processor(bitcoin_file)
    btc_df, levels = add_returns_in_place(btc_df)

    print('-' * 80)
    print('Those values should be roughly equal to 1/len(levels):')
    for ii in range(len(levels)):
        print(ii, np.mean((btc_df['close_price_returns_labels'] == ii).values))
    print(levels)
    print('-' * 80)

    slice_size = 40
    test_every_steps = 10
    n = len(btc_df) - slice_size

    shutil.rmtree(data_folder, ignore_errors=True)
    for epoch in range(int(1e6)):
        st = time()

        i = np.random.choice(n)
        btc_slice = btc_df[i:i + slice_size]

        if btc_slice.isnull().values.any():
            # sometimes prices are discontinuous and nothing happened in one 5min bucket.
            # in that case, we consider this slice as wrong and we raise an exception.
            # it's likely to happen at the beginning of the data set where the volumes are low.
            raise Exception('NaN values detected. Please remove them.')

        class_name = get_class_name(btc_df, btc_slice, i, slice_size)
        save_dir = os.path.join(data_folder, 'train', class_name)
        if epoch % test_every_steps == 0:
            save_dir = os.path.join(data_folder, 'test', class_name)
        mkdir_p(save_dir)
        filename = save_dir + '/' + str(uuid4()) + '.png'
        save_to_file(btc_slice, filename=filename)
        print('epoch = {0}, time = {1:.3f}, filename = {2}'.format(
            str(epoch).zfill(8),
            time() - st, filename))
def generate_bins(bitcoin_file):
    p = file_processor(bitcoin_file)
    print(add_returns_in_place(p))