def __load_ohlcv(self, bin_size): """ Read the data. :return: """ start_time = datetime.now(timezone.utc) - 1 * timedelta(days=121) end_time = datetime.now(timezone.utc) file = OHLC_FILENAME.format(self.pair, bin_size) self.bin_size = bin_size if os.path.exists(file): self.df_ohlcv = load_data(file) self.df_ohlcv.set_index(self.df_ohlcv.columns[0], inplace=True) if (self.update_data): self.df_ohlcv = self.df_ohlcv[:-1] # exclude last candle data = self.download_data( bin_size, dateutil.parser.isoparse(self.df_ohlcv.iloc[-1].name), end_time) self.df_ohlcv = pd.concat([self.df_ohlcv, data]) self.save_csv(self.df_ohlcv, file) # self.df_ohlcv.reset_index(inplace=True) self.df_ohlcv = load_data(file) else: data = self.download_data(bin_size, start_time, end_time) self.save_csv(data, file) self.df_ohlcv = load_data(file) if self.check_candles_flag: self.check_candles(self.df_ohlcv)
def test_validate_continuous(self): file = os.path.join(os.path.dirname(__file__), "./ohlc/discontinuous.csv") data = load_data(file) assert not validate_continuous(data, '5m')[0] file = os.path.join(os.path.dirname(__file__), "./ohlc/continuous.csv") data = load_data(file) assert validate_continuous(data, '5m')[0]
def __load_ohlcv(self, bin_size): """ Read the data. :return: """ start_time = datetime.now(timezone.utc) - 1 * timedelta(days=self.days) end_time = datetime.now(timezone.utc) file = OHLC_FILENAME.format("binance_futures", self.pair, bin_size) # Force minute granularity if multiple timeframes are used if len(bin_size) > 1: self.minute_granularity = True if self.minute_granularity and "1m" not in bin_size: bin_size.append( '1m' ) # add 1m timeframe to the list in case we need minute granularity self.bin_size = bin_size warmup = None # warmup needed for each timeframe in munutes for t in bin_size: if self.warmup_tf == None: warmup = allowed_range_minute_granularity[t][3] self.warmup_tf = t elif warmup < allowed_range_minute_granularity[t][3]: warmup = allowed_range_minute_granularity[t][3] self.warmup_tf = t else: continue if os.path.exists(file): self.df_ohlcv = load_data(file) self.df_ohlcv.set_index(self.df_ohlcv.columns[0], inplace=True) if self.update_data: self.df_ohlcv = self.df_ohlcv[:-1] # exclude last candle data = self.download_data( bin_size, dateutil.parser.isoparse(self.df_ohlcv.iloc[-1].name), end_time) self.df_ohlcv = pd.concat([self.df_ohlcv, data]) self.save_csv(self.df_ohlcv, file) self.df_ohlcv.reset_index(inplace=True) self.df_ohlcv = load_data(file) else: data = self.download_data(bin_size, start_time, end_time) self.save_csv(data, file) self.df_ohlcv = load_data(file) if self.check_candles_flag: self.check_candles(self.df_ohlcv)
def __load_ohlcv(self, bin_size): """ データを読み込む。 :return: """ start_time = datetime.now(timezone.utc) - 1 * timedelta(days=31) end_time = datetime.now(timezone.utc) file = OHLC_FILENAME.format(bin_size) if os.path.exists(file): self.df_ohlcv = load_data(file) else: self.download_data(file, bin_size, start_time, end_time) self.df_ohlcv = load_data(file)
def do_reduce(idx, params, test_mode=True): start = time.time() if idx % 100 == 0 or test_mode: print(str(idx) + " " + str(datetime.datetime.now())) # reduce data data_path = os.path.join('cache', 'data_reduced', (params['data_hash_id'] + '.pkl')) if not os.path.isfile(data_path): print('applying data reduction') data = src.load_data(params['dataset']) # make dim reduction reducer = getattr(src.reducers, params['reducer']) data_reduced = reducer(data, params) # pickle reduced data pickle.dump(data_reduced, open(data_path, 'wb')) total_time = round(time.time() - start, ndigits=2) time_out = { 'data_hash_id': params['data_hash_id'], 'time_reduce': total_time } time_out = pd.DataFrame(time_out, index=[idx]) time_out.to_csv( os.path.join('cache', 'data_reduced_time', (params['data_hash_id'] + '.csv')))
def __load_ohlcv(self, bin_size): logger.info('bitmex_backtest >> __load_ohlcv >> %s ' % bin_size) """ データを読み込む。 :return: """ start_time = datetime.now(timezone.utc) - 1 * timedelta(days=31) end_time = datetime.now(timezone.utc) file = OHLC_FILENAME.format(bin_size) start = time.time() # 시작 시간 저장 if os.path.exists(file): self.df_ohlcv = load_data(file) else: self.download_data(file, bin_size, start_time, end_time) self.df_ohlcv = load_data(file) logger.info('bitmex_backtest >> __load_ohlcv >> loading data time : %s' % str(time.time() - start)) print('load_data time:', time.time() - start)
def test_load_file(self): bitmex = BitMexBackTest() end_time = datetime.datetime.now(datetime.timezone.utc) start_time = end_time - 5 * datetime.timedelta(hours=2) with tempfile.TemporaryDirectory() as dir: file = dir + "/tmp.csv" bitmex.download_data(file, '2h', start_time, end_time) data_frame = load_data(file) now = datetime.datetime.now(datetime.timezone.utc) assert data_frame.iloc[0].name < now
def test_load_data(): from src import load_data n_features = np.random.randint(5, 20) n_samples = np.random.randint(50, 150) features, targets, attribute_names = write_random_csv_file( n_features, n_samples) _features, _targets, _attribute_names = load_data('tests/test.csv') assert attribute_names == _attribute_names assert np.allclose(features, _features) and np.allclose(targets, _targets)
def test_information_gain(): from src import load_data from src import information_gain _features, _targets, _attribute_names = load_data('data/PlayTennis.csv') iGHumidity = information_gain(_features, 2, _targets) iGWind = information_gain(_features, 3, _targets) realIGHumidity = 0.1515 realIGWind = 0.048 assert np.abs(iGHumidity - realIGHumidity) < 1e-3 assert np.abs(iGWind - realIGWind) < 1e-3
def main(): data_generator = load_data() _history = [] device = None model = None criterion = None fold_index = 0 for TEXT, LABEL, train_data, val_data in data_generator.get_fold_data(num_folds=args['num_folds']): logger.info("***** Running Training *****") logger.info(f"Now fold: {fold_index + 1} / {args['num_folds']}") TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.300d") logger.info(f'Embedding size: {TEXT.vocab.vectors.size()}.') LABEL.build_vocab(train_data) # For converting str into float labels. model = Model(len(TEXT.vocab), args['embedding_dim'], args['hidden_dim'], args['output_dim'], args['num_layers'], args['dropout'], TEXT.vocab.vectors, args["embedding_trainable"]) optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() if args['gpu'] is True and args['gpu_number'] is not None: torch.cuda.set_device(args['gpu_number']) device = torch.device('cuda') model = model.to(device) criterion = criterion.to(device) else: device = torch.device('cpu') model = model.to(device) criterion = criterion.to(device) train_iterator = data.Iterator(train_data, batch_size=args['batch_size'], sort_key=lambda x: len(x.text), device=device) val_iterator = data.Iterator(val_data, batch_size=args['batch_size'], sort_key=lambda x: len(x.text), device=device) for epoch in range(args['epochs']): train_loss, train_acc = train_run(model, train_iterator, optimizer, criterion) logger.info(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%') val_loss, val_acc = eval_run(model, val_iterator, criterion) logger.info(f'Val. Loss: {val_loss:.3f} | Val. Acc: {val_acc*100:.2f}% |') _history.append([val_loss, val_acc]) fold_index += 1 _history = np.asarray(_history) loss = np.mean(_history[:, 0]) acc = np.mean(_history[:, 1]) logger.info('***** Cross Validation Result *****') logger.info(f'LOSS: {loss}, ACC: {acc}')
def prep_data(DATA_FILE, PERCENTAGE_TRAIN): def pad(sentence): if (len(sentence) > MAX_SEQ_LEN): return sentence[:MAX_SEQ_LEN] else: num_pads = MAX_SEQ_LEN - len(sentence) return sentence + [""] * num_pads def process_rows(data, glove_vectors): """ Generate input x matrix M[num_data_points][glove_dimension][max_seq_length] """ inputs, outputs = [], [] for sentence, mbti_hot in data: x = [] ### Pad sentence padded_sentence = pad(sentence) ### Convert each word in sentence to glove vector for w in padded_sentence: embedding = np.array([0.0] * GLOVE_DIMENSION) if (w in glove_vectors): embedding = glove_vectors[w] x.append(embedding) inputs.append(x) outputs.append(mbti_hot) return inputs, outputs train_data, test_data = load_data(DATA_FILE, PERCENTAGE_TRAIN) glove_vectors = load_word_vectors("../data/glove.6B.50d.txt", GLOVE_DIMENSION) train_x, train_y = process_rows(train_data, glove_vectors) test_x, test_y = process_rows(test_data, glove_vectors) return np.array(train_x), np.array(test_x), np.array(train_y), np.array(test_y) for i, x in enumerate(train_x): if (i < 10): print("x", x) print("y", train_y[i])
# It must be used the prefix "nn__module__" when setting hyperparameters for the Neural Net params = { 'nn__max_epochs': [10, 20], 'nn__lr': [0.1, 0.01], 'nn__module__num_units': [5, 10], 'nn__module__dropout': [0.1, 0.5], 'nn__optimizer': [optim.Adam, optim.SGD, optim.RMSprop] } # The grid search module is instantiated gs = GridSearchCV(pipeline, params, refit=False, cv=3, scoring='balanced_accuracy', verbose=1) # Initialize grid search gs.fit(self.x, self.y) pass if __name__ == "__main__": x, y = load_data() run = Run(x, y) # run.simple_training() # run.simple_pipeline_training() # run.simple_pipeline_training_with_callbacks() run.grid_search_pipeline_training()
#!/usr/local/bin/python3 # -*- coding: utf-8 -*- from src.cus_string import * from src.check_crpyto import * from src.cryptomodule import * from src.listdir import * from src.load_data import * from src.out_data import * import timeit, sys sys.setrecursionlimit(20000) if __name__ == "__main__": path = os.getcwd() file = list_dir(path) list = load_data(path, file) encrypt_list, decrypt_list, enc_flag, dec_flag = check_list(list) while True: if enc_flag: result = [] result_rec = [] print("Encrypted Data!") attribute = True for string in encrypt_list: result_string = "" start = timeit.default_timer() result.append(encrypt_string(string.rstrip("\n"))) end = timeit.default_timer() print("lteractive call execute time : %f" % (end - start))
from src.bubble_sort import * from src.heap_sort import * from src.insert_sort import * from src.merge_sort import * from src.quick_sort import * from src.select_sort import * from src.load_data import * from src.out_data import * from src.interface import * from src.listdir import * import timeit if __name__ == "__main__": path = os.getcwd() file_name = list_dir(path) orignal, field = load_data(path, file_name) data = orignal res = None attribute = 0 try: column = int(input("Input column in Data>> ")) if column > len(field) or column < 1: print("Error : invaild column ") exit(0) column = column - 1 except ValueError: print("Error : invaild value ...") exit(0)
'deep': True, 'nonlinear': 'relu' }, { 'bn': True, 'dropout': True, 'branched': True, 'deep': True, 'nonlinear': 'elu' }] for model_name, model_config in zip(model_names, model_configs): for subject in subjects: print 'working on subject : ', subject x, y, t, tr = load_data(subject, channels=range(29), frequency=dataset) x = preprocessing(x, frequency=dataset) metrics, histories, cnf_matrices = cross_validator( (x, y, t, tr), subject, n_splits=5, epochs=epochs, batch_size=batch_size, lr=lr, model_name=model_name, model_config=model_config, early_stopping=early_stopping, patience=patience) super_final_results = save_resutls(metrics,