def generate_cnn_dataset(data_folder, bitcoin_file, get_class_name): btc_df = file_processor(bitcoin_file) n = len(btc_df) - slice_size_1week for epoch in range(int(1e4)): st = time() i = np.random.choice(n) + slice_size_12hours #btc_slice_4hours = btc_df[i-slice_size_4hours:i] btc_slice_12hours = btc_df[i-slice_size_12hours:i] #btc_slice_2days = btc_df[i-slice_size_2days:i] #btc_slice_1week = btc_df[i-slice_size_1week:i] if btc_slice_12hours.isnull().values.any(): raise Exception('NaN values detected. Please remove them.') class_name = get_class_name(btc_df, btc_slice_12hours, i) save_dir = os.path.join(data_folder, 'train', class_name) if i>(n-(slice_size_1week*12)): save_dir = os.path.join(data_folder, 'test', class_name) mkdir_p(save_dir) fid = uuid4() filename = save_dir + '/' + str(fid) + '.png' #filenamen = save_dir + '/' + str(fid) + 'n.png' save_to_file(btc_slice_12hours, filename=filename) #save_to_file(btc_df[i:i + slice_size+slice_size], filename=filenamen) print('epoch = {0}, time = {1:.3f}, filename = {2}'.format(str(epoch).zfill(8), time() - st, filename))
def generate_cnn_dataset(data_folder, bitcoin_file, get_class_name): btc_df = file_processor(bitcoin_file) btc_df, levels = add_returns_in_place(btc_df) print('-' * 80) print('Those values should be roughly equal to 1/len(levels):') for ii in range(len(levels)): print(ii, np.mean((btc_df['close_price_returns_labels'] == ii).values)) print(levels) print('-' * 80) slice_size = 40 test_every_steps = 10 n = len(btc_df) - slice_size shutil.rmtree(data_folder, ignore_errors=True) for epoch in range(int(1e6)): st = time() i = np.random.choice(n) btc_slice = btc_df[i:i + slice_size] if btc_slice.isnull().values.any(): # sometimes prices are discontinuous and nothing happened in one 5min bucket. # in that case, we consider this slice as wrong and we raise an exception. # it's likely to happen at the beginning of the data set where the volumes are low. raise Exception('NaN values detected. Please remove them.') class_name = get_class_name(btc_df, btc_slice, i, slice_size) save_dir = os.path.join(data_folder, 'train', class_name) if epoch % test_every_steps == 0: save_dir = os.path.join(data_folder, 'test', class_name) mkdir_p(save_dir) filename = save_dir + '/' + str(uuid4()) + '.png' save_to_file(btc_slice, filename=filename) print('epoch = {0}, time = {1:.3f}, filename = {2}'.format( str(epoch).zfill(8), time() - st, filename))
def generate_bins(bitcoin_file): p = file_processor(bitcoin_file) print(add_returns_in_place(p))