def main(): parser = argparse.ArgumentParser(description='Stock trading ML model.') parser.add_argument('--model_suffix', default=None, help='Model to load') parser.add_argument('--data_files', required=True, nargs='+', help='Data to train on.') parser.add_argument('--start_date', default=None, help='Start date of the data.') parser.add_argument('--end_date', default=None, help='End date of the data.') parser.add_argument('--action', default='dev', choices=['dev', 'train', 'eval', 'cont']) args = parser.parse_args() utils.logging_config() ml = ML(args.data_files, args.start_date, args.end_date, args.model_suffix) if args.action == 'train': ml.train(save_model=True) elif args.action == 'dev': ml.k_fold_cross_validation(5) elif args.action == 'eval': ml.evaluate() elif args.action == 'cont': ml.continuous_training(20, 1, 1) else: raise ValueError('Invalid action')
def __init__(self, alpaca, start_date=None, end_date=None): self.root_dir = os.path.dirname(os.path.realpath(__file__)) self.output_dir = os.path.join( self.root_dir, utils.OUTPUTS_DIR, 'simulate', datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')) os.makedirs(self.output_dir, exist_ok=True) utils.logging_config(os.path.join(self.output_dir, 'result.txt')) super(TradingSimulate, self).__init__(alpaca, start_date=start_date, end_date=end_date) self.start_date = (start_date or self.history_dates[utils.DAYS_IN_A_YEAR + 1].strftime('%F')) self.end_date = end_date or utils.get_business_day(1) self.start_point, self.end_point = 0, self.history_length - 1 while (self.start_point < self.history_length and pd.to_datetime( self.start_date) > self.history_dates[self.start_point]): self.start_point += 1 while (self.end_point > 0 and pd.to_datetime(self.end_date) < self.history_dates[self.end_point]): self.end_point -= 1 self.values = { 'Total': ([self.history_dates[self.start_point - 1]], [1.0]) } self.win_trades, self.lose_trades = 0, 0 signal.signal(signal.SIGINT, self.safe_exit)
def __init__(self, alpaca, start_date=None, end_date=None, model=None, data_files=None, write_data=False): self.root_dir = os.path.dirname(os.path.realpath(__file__)) self.output_dir = os.path.join( self.root_dir, utils.OUTPUTS_DIR, 'simulate', datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')) os.makedirs(self.output_dir, exist_ok=True) utils.logging_config(os.path.join(self.output_dir, 'result.txt')) self.write_data = write_data period = None if data_files: self.data_df = pd.concat( [pd.read_csv(data_file) for data_file in data_files]) year_diff = (datetime.datetime.today().date().year - pd.to_datetime(self.data_df.iloc[0]['Date']).year + 1) period = '%dy' % (year_diff, ) super(TradingSimulate, self).__init__(alpaca, period=period, start_date=start_date, end_date=end_date, model=model, load_history=not bool(data_files)) self.data_files = data_files if self.data_files: self.start_date = start_date or self.data_df.iloc[0].Date self.end_date = end_date or self.data_df.iloc[-1].Date self.values = { 'Total': ([self.get_prev_market_date(pd.to_datetime(self.start_date))], [1.0]) } else: self.start_date = (start_date or self.history_dates[utils.DAYS_IN_A_YEAR + 1].strftime('%F')) self.end_date = end_date or datetime.datetime.today().strftime( '%F') self.start_point, self.end_point = 0, self.history_length - 1 while (self.start_point < self.history_length and pd.to_datetime( self.start_date) > self.history_dates[self.start_point]): self.start_point += 1 while (self.end_point > 0 and pd.to_datetime(self.end_date) < self.history_dates[self.end_point]): self.end_point -= 1 if self.write_data: stats_cols = ['Symbol', 'Date'] + utils.ML_FEATURES + ['Gain'] self.stats = pd.DataFrame(columns=stats_cols) self.values = { 'Total': ([self.history_dates[self.start_point - 1]], [1.0]) } self.win_trades, self.lose_trades = 0, 0 signal.signal(signal.SIGINT, self.safe_exit)
def __init__(self, alpaca, polygon): self.root_dir = os.path.dirname(os.path.realpath(__file__)) output_dir = os.path.join(self.root_dir, utils.OUTPUTS_DIR, 'realtime', utils.get_business_day(0)) os.makedirs(output_dir, exist_ok=True) utils.logging_config(os.path.join(output_dir, 'log.txt')) super(TradingRealTime, self).__init__(alpaca) self.active = True self.equity, self.cash = 0, 0 self.polygon = polygon self.update_account() self.lock = threading.RLock() self.thresholds = {} self.prices = {} self.ordered_symbols = [] self.errors = [] self.price_cache_file = os.path.join(output_dir, 'prices.json') self.drop_low_volume_symbols() read_cache = os.path.isfile(self.price_cache_file) if read_cache: logging.info('Reading cached stock prices...') with open(self.price_cache_file) as f: self.prices = json.loads(f.read()) else: logging.info('Loading current stock prices...') self.update_prices(self.closes.keys(), use_tqdm=True) for symbol in self.closes.keys(): threshold = self.get_threshold(symbol) self.thresholds[symbol] = threshold self.update_ordered_symbols() self.update_frequencies = [(10, 120), (100, 600), (len(self.ordered_symbols), 2400)] self.last_updates = ({self.update_frequencies[-1][0]: datetime.datetime.now()} if not read_cache else {}) self.trading_list = [] self.next_market_close = self.alpaca.get_clock().next_close.timestamp()
def __init__(self, alpaca): self.root_dir = os.path.dirname(os.path.realpath(__file__)) output_dir = os.path.join(self.root_dir, utils.OUTPUTS_DIR, 'realtime', utils.get_business_day(0)) os.makedirs(output_dir, exist_ok=True) utils.logging_config(os.path.join(output_dir, 'log.txt')) super(TradingRealTime, self).__init__(alpaca) self.active = True self.equity, self.cash = 0, 0 self.update_account() self.lock = threading.RLock() self.prices = {} self.position_file = os.path.join(output_dir, 'positions.json') self.drop_low_volume_symbols() for symbol in self.closes.keys(): self.closes[symbol] = np.append(self.closes[symbol], 0) self.opens[symbol] = np.append(self.opens[symbol], 0) self.volumes[symbol] = np.append(self.volumes[symbol], 0) self.price_cache_file = os.path.join(output_dir, 'prices.json') read_cache = os.path.isfile(self.price_cache_file) if read_cache: logging.info('Reading cached stock prices...') with open(self.price_cache_file) as f: self.prices = json.loads(f.read()) self.embed_prices_to_closes() self.last_update = None else: logging.info('Loading current stock prices...') self.update_prices(self.closes.keys(), use_tqdm=True) self.last_update = datetime.datetime.now() self.trading_list = [] self.next_market_close = self.alpaca.get_clock().next_close.timestamp()
type=int, default=100, metavar='N', help='report interval') parser.add_argument( '--save_dir', type=str, default='transformer_out', help='directory path to save the final model and training log') parser.add_argument( '--gpus', type=str, help='list of gpus to run, e.g. 0 or 0,2,5. empty means using cpu.' '(using single gpu is suggested)') args = parser.parse_args() logging_config(args.save_dir) logging.info(args) def cache_dataset(dataset, prefix): """Cache the processed npy dataset the dataset into a npz Parameters ---------- dataset : SimpleDataset file_path : str """ if not os.path.exists(_C.CACHE_PATH): os.makedirs(_C.CACHE_PATH) src_data = np.array([ele[0] for ele in dataset]) tgt_data = np.array([ele[1] for ele in dataset])
train_data_loader = prepare_data_loader(args, train_dataset, vocab) val_data_loader = prepare_data_loader(args, val_dataset, vocab, test=True) model = NLIModel(len(vocab), args.embedding_size, args.hidden_size, args.dropout, args.intra_attention) train_model(model, train_data_loader, val_data_loader, vocab.embedding, ctx, args) elif args.mode == 'test': model_args = argparse.Namespace(**json.load( open(os.path.join(args.model_dir, 'config.json')))) vocab = nlp.Vocab.from_json( open(os.path.join(args.model_dir, 'vocab.jsons')).read()) val_dataset = read_dataset(args, 'test_file') val_data_loader = prepare_data_loader(args, val_dataset, vocab, test=True) model = NLIModel(len(vocab), model_args.embedding_size, model_args.hidden_size, 0., model_args.intra_attention) model.load_parameters(os.path.join( args.model_dir, 'checkpoints', 'valid_best.params'), ctx=ctx) loss_func = gluon.loss.SoftmaxCrossEntropyLoss() logger.info('Test on {}'.format(args.test_file)) loss, acc = test_model(model, val_data_loader, loss_func, ctx) logger.info('loss={:.4f} acc={:.4f}'.format(loss, acc)) if __name__ == '__main__': args = parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) logging_config(os.path.join(args.output_dir, 'main.log')) main(args)
dropout=args.dropout, max_seq_len=max_length) model.initialize(mx.init.Xavier(), ctx=ctx) if not args.random_embedding: model.embedding.weight.set_data( vocab.embedding.idx_to_vec ) ## set the embedding layer parameters to pre-trained embedding elif args.fixed_embedding: model.embedding.collect_params().setattr('grad_req', 'null') return model if __name__ == '__main__': logging_config(args.log_dir, 'train', level=logging.INFO) ctx = mx.cpu() ## or mx.gpu(N) if GPU device N is available # for cross-validation vocab, train_dataset, transformer, max_length = load_dataset( args.train_file) print('sentence length is: ', max_length) # k-fold cross validation for training and testing different models k_fold_cross_valid(5, transformer, train_dataset, vocab, max_length) # for predicting the test labels, uncomment the codes below and comment the above lines for cross-validation # vocab, train_dataset, test_dataset, train_transformer, test_transformer, max_length = load_dataset(args.train_file, # cross_validation=False, # test_file='cleaned_entity_type_test.tsv') # random.shuffle(train_dataset) # print('sentence length is: ', max_length)
from handlers import MainHandler from workers import GpsModuleWorker PORT = 12345 # replace this with random port when register will be working def make_app(coords): return Application([ (r'/', MainHandler, { "coords": coords }), (r'/healthcheck', HealthCheckHandler), ]) if __name__ == '__main__': os.environ['GPS_ROOT'] = os.getcwd() logging_config() logging.info("starting gps app") coords = Coords() #gps_worker = GpsModuleWorker(coords) methods = [ method for method in dir(coords) if callable(getattr(coords, method)) and method[0] != '_' ] service = Service('192.168.1.22', PORT, 'GPS', methods) service.register_interface() app = make_app(coords) app.listen(PORT) IOLoop.current().start()
train_model(model, train_data_loader, val_data_loader, vocab.embedding, ctx, args) elif args.mode == 'test': model_args = argparse.Namespace( **json.load(open(os.path.join(args.model_dir, 'config.json')))) vocab = nlp.Vocab.from_json( open(os.path.join(args.model_dir, 'vocab.jsons')).read()) val_dataset = read_dataset(args, 'test_file') val_data_loader = prepare_data_loader(args, val_dataset, vocab, test=True) model = build_model(model_args, vocab) model.load_parameters(os.path.join(args.model_dir, 'checkpoints', 'valid_best.params'), ctx=ctx) loss_func = gluon.loss.SoftmaxCrossEntropyLoss() logger.info('Test on {}'.format(args.test_file)) loss, acc = test_model(model, val_data_loader, loss_func, ctx) logger.info('loss={:.4f} acc={:.4f}'.format(loss, acc)) if __name__ == '__main__': args = parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) logging_config(os.path.join(args.output_dir, 'main.log')) main(args)
import json import os from botocore.exceptions import ClientError from error_response import error_response from utils import assume_role, genpass, configure_user_client, \ configure_iam_resource, logging_config logger = logging_config() def lambda_handler(event, context): logger.info(event) account_id = event['pathParameters']['account-id'] event_body = json.loads(event["body"]) username = event_body["username"] try: response = _reset_user_password_cloudwatch_account( account_id, username) except ClientError as error: logger.exception(error) return error_response(error) return {'statusCode': 200, 'body': json.dumps(response)} def _reset_user_password_cloudwatch_account(account_id, username): session = assume_role(account_id, os.environ['FUNCTION_POLICY'])
def train_eval(args): logging_config(folder=args.save_dir, name='log{:d}'.format(args.save_id), no_console=False) logging.info(args) ### check context use_cuda = args.gpu >= 0 and th.cuda.is_available() if use_cuda: th.cuda.set_device(args.gpu) ### load data dataset = DataLoader(data_name=args.data_name, seed=args.seed) print(dataset) model = Model(use_KG=True, input_node_dim=args.entity_embed_dim, gnn_model=args.gnn_model, num_gnn_layers=args.gnn_num_layer, n_hidden=args.gnn_hidden_size, dropout=args.dropout_rate, n_entities=dataset.n_KG_entity, n_relations=dataset.n_KG_relation, relation_dim=args.relation_embed_dim, reg_lambda_kg=args.regs, reg_lambda_gnn=args.regs) if use_cuda: model.cuda() logging.info(model) ### optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr) valid_metric_logger = MetricLogger( ['epoch', 'recall', 'ndcg', 'is_best'], ['%d', '%.5f', '%.5f', '%d'], os.path.join(args.save_dir, 'valid{:d}.csv'.format(args.save_id))) test_metric_logger = MetricLogger( ['epoch', 'recall', 'ndcg'], ['%d', '%.5f', '%.5f'], os.path.join(args.save_dir, 'test{:d}.csv'.format(args.save_id))) best_epoch = -1 best_recall = 0.0 train_g = dataset.train_g nid_th = th.LongTensor(train_g.ndata["id"]) etype_th = th.LongTensor(train_g.edata["type"]) if use_cuda: nid_th, etype_th = nid_th.cuda(), etype_th.cuda() train_g.ndata['id'] = nid_th train_g.edata['type'] = etype_th test_g = dataset.test_g nid_th = th.LongTensor(test_g.ndata["id"]) etype_th = th.LongTensor(test_g.edata["type"]) if use_cuda: nid_th, etype_th = nid_th.cuda(), etype_th.cuda() test_g.ndata['id'] = nid_th test_g.edata['type'] = etype_th item_id_range = th.LongTensor(dataset.item_id_range).cuda() if use_cuda \ else th.LongTensor(dataset.item_id_range) for epoch in range(1, args.max_epoch + 1): ### train kg time1 = time() kg_sampler = dataset.KG_sampler(batch_size=args.batch_size_kg) iter = 0 total_loss = 0.0 for h, r, pos_t, neg_t, _ in kg_sampler: iter += 1 model.train() h_th = th.LongTensor(h) r_th = th.LongTensor(r) pos_t_th = th.LongTensor(pos_t) neg_t_th = th.LongTensor(neg_t) if use_cuda: h_th, r_th, pos_t_th, neg_t_th = h_th.cuda(), r_th.cuda( ), pos_t_th.cuda(), neg_t_th.cuda() loss = model.transR(h_th, r_th, pos_t_th, neg_t_th) loss.backward() optimizer.step() optimizer.zero_grad() total_loss += loss.item() if (iter % args.print_every) == 0 or iter == 1: logging.info("Epoch {:04d} Iter {:04d} | Loss {:.4f} ".format( epoch, iter, total_loss / iter)) logging.info('Time for KGE: {:.1f}s, loss {:.4f}'.format( time() - time1, total_loss / iter)) ### train GNN if args.use_attention: time1 = time() print("Compute attention weight in train ...") with th.no_grad(): A_w = model.compute_attention(train_g) train_g.edata['w'] = A_w print("Time: {:.2f}s".format(time() - time1)) time1 = time() cf_sampler = dataset.CF_pair_sampler(batch_size=args.batch_size) iter = 0 total_loss = 0.0 for user_ids, item_pos_ids, item_neg_ids, _ in cf_sampler: iter += 1 model.train() user_ids_th = th.LongTensor(user_ids) item_pos_ids_th = th.LongTensor(item_pos_ids) item_neg_ids_th = th.LongTensor(item_neg_ids) if use_cuda: user_ids_th, item_pos_ids_th, item_neg_ids_th = \ user_ids_th.cuda(), item_pos_ids_th.cuda(), item_neg_ids_th.cuda() embedding = model.gnn(train_g, train_g.ndata['id']) loss = model.get_loss(embedding, user_ids_th, item_pos_ids_th, item_neg_ids_th) loss.backward() # th.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm) # clip gradients optimizer.step() optimizer.zero_grad() total_loss += loss.item() if (iter % args.print_every) == 0 or iter == 1: logging.info("Epoch {:04d} Iter {:04d} | Loss {:.4f} ".format( epoch, iter, total_loss / iter)) logging.info('Time for GNN: {:.1f}s, loss {:.4f}'.format( time() - time1, total_loss / iter)) if epoch % args.evaluate_every == 0: time1 = time() val_recall, val_ndcg = eval(model, train_g, dataset.train_user_dict, dataset.valid_user_dict, item_id_range, use_cuda, args.use_attention) info = "Epoch{}, [{:.1f}s] val recall:{:.5f}, val ndcg:{:.5f}".format( epoch, time() - time1, val_recall, val_ndcg) # save best model if val_recall > best_recall: valid_metric_logger.log(epoch=epoch, recall=val_recall, ndcg=val_ndcg, is_best=1) best_recall = val_recall #best_ndcg = val_ndcg best_epoch = epoch time1 = time() test_recall, test_ndcg = eval(model, test_g, dataset.train_valid_user_dict, dataset.test_user_dict, item_id_range, use_cuda, args.use_attention) test_metric_logger.log(epoch=epoch, recall=test_recall, ndcg=test_ndcg) info += "\t[{:.1f}s] test recall:{:.5f}, test ndcg:{:.5f}".format( time() - time1, test_recall, test_ndcg) #th.save({'state_dict': model.state_dict(), 'epoch': epoch}, model_state_file) else: valid_metric_logger.log(epoch=epoch, recall=val_recall, ndcg=val_ndcg, is_best=0) recall, ndcg = eval(model, test_g, dataset.train_valid_user_dict, dataset.test_user_dict, item_id_range, use_cuda, args.use_attention) print("test recall:{}, test_ndcg: {}".format(recall, ndcg)) logging.info(info) logging.info( "Final test recall:{:.5f}, test ndcg:{:.5f}, best epoch:{}".format( test_recall, test_ndcg, best_epoch))
parser.add_argument('--bleu', type=str, default='tweaked', help='Schemes for computing bleu score. It can be: ' '"tweaked": it uses similar steps in get_ende_bleu.sh in tensor2tensor ' 'repository, where compound words are put in ATAT format; ' '"13a": This uses official WMT tokenization and produces the same results' ' as official script (mteval-v13a.pl) used by WMT; ' '"intl": This use international tokenization in mteval-v14a.pl') parser.add_argument('--log_interval', type=int, default=100, metavar='N', help='report interval') parser.add_argument('--save_dir', type=str, default='transformer_out', help='directory path to save the final model and training log') parser.add_argument('--gpus', type=str, help='list of gpus to run, e.g. 0 or 0,2,5. empty means using cpu.' '(using single gpu is suggested)') args = parser.parse_args() logging_config(args.save_dir) logging.info(args) data_train, data_val, data_test, val_tgt_sentences, test_tgt_sentences, src_vocab, tgt_vocab \ = dataprocessor.load_translation_data(dataset=args.dataset, bleu=args.bleu, args=args) dataprocessor.write_sentences(val_tgt_sentences, os.path.join(args.save_dir, 'val_gt.txt')) dataprocessor.write_sentences(test_tgt_sentences, os.path.join(args.save_dir, 'test_gt.txt')) data_train = data_train.transform(lambda src, tgt: (src, tgt, len(src), len(tgt)), lazy=False) data_val = gluon.data.SimpleDataset([(ele[0], ele[1], len(ele[0]), len(ele[1]), i) for i, ele in enumerate(data_val)]) data_test = gluon.data.SimpleDataset([(ele[0], ele[1], len(ele[0]), len(ele[1]), i) for i, ele in enumerate(data_test)])