Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser(description='Stock trading ML model.')
    parser.add_argument('--model_suffix', default=None, help='Model to load')
    parser.add_argument('--data_files',
                        required=True,
                        nargs='+',
                        help='Data to train on.')
    parser.add_argument('--start_date',
                        default=None,
                        help='Start date of the data.')
    parser.add_argument('--end_date',
                        default=None,
                        help='End date of the data.')
    parser.add_argument('--action',
                        default='dev',
                        choices=['dev', 'train', 'eval', 'cont'])
    args = parser.parse_args()
    utils.logging_config()
    ml = ML(args.data_files, args.start_date, args.end_date, args.model_suffix)
    if args.action == 'train':
        ml.train(save_model=True)
    elif args.action == 'dev':
        ml.k_fold_cross_validation(5)
    elif args.action == 'eval':
        ml.evaluate()
    elif args.action == 'cont':
        ml.continuous_training(20, 1, 1)
    else:
        raise ValueError('Invalid action')
Esempio n. 2
0
    def __init__(self, alpaca, start_date=None, end_date=None):
        self.root_dir = os.path.dirname(os.path.realpath(__file__))
        self.output_dir = os.path.join(
            self.root_dir, utils.OUTPUTS_DIR, 'simulate',
            datetime.datetime.now().strftime('%Y-%m-%d-%H-%M'))
        os.makedirs(self.output_dir, exist_ok=True)
        utils.logging_config(os.path.join(self.output_dir, 'result.txt'))

        super(TradingSimulate, self).__init__(alpaca,
                                              start_date=start_date,
                                              end_date=end_date)

        self.start_date = (start_date
                           or self.history_dates[utils.DAYS_IN_A_YEAR +
                                                 1].strftime('%F'))
        self.end_date = end_date or utils.get_business_day(1)
        self.start_point, self.end_point = 0, self.history_length - 1
        while (self.start_point < self.history_length and pd.to_datetime(
                self.start_date) > self.history_dates[self.start_point]):
            self.start_point += 1
        while (self.end_point > 0 and pd.to_datetime(self.end_date) <
               self.history_dates[self.end_point]):
            self.end_point -= 1
        self.values = {
            'Total': ([self.history_dates[self.start_point - 1]], [1.0])
        }
        self.win_trades, self.lose_trades = 0, 0
        signal.signal(signal.SIGINT, self.safe_exit)
Esempio n. 3
0
    def __init__(self,
                 alpaca,
                 start_date=None,
                 end_date=None,
                 model=None,
                 data_files=None,
                 write_data=False):
        self.root_dir = os.path.dirname(os.path.realpath(__file__))
        self.output_dir = os.path.join(
            self.root_dir, utils.OUTPUTS_DIR, 'simulate',
            datetime.datetime.now().strftime('%Y-%m-%d-%H-%M'))
        os.makedirs(self.output_dir, exist_ok=True)
        utils.logging_config(os.path.join(self.output_dir, 'result.txt'))
        self.write_data = write_data

        period = None
        if data_files:
            self.data_df = pd.concat(
                [pd.read_csv(data_file) for data_file in data_files])
            year_diff = (datetime.datetime.today().date().year -
                         pd.to_datetime(self.data_df.iloc[0]['Date']).year + 1)
            period = '%dy' % (year_diff, )
        super(TradingSimulate,
              self).__init__(alpaca,
                             period=period,
                             start_date=start_date,
                             end_date=end_date,
                             model=model,
                             load_history=not bool(data_files))
        self.data_files = data_files
        if self.data_files:
            self.start_date = start_date or self.data_df.iloc[0].Date
            self.end_date = end_date or self.data_df.iloc[-1].Date
            self.values = {
                'Total':
                ([self.get_prev_market_date(pd.to_datetime(self.start_date))],
                 [1.0])
            }
        else:
            self.start_date = (start_date
                               or self.history_dates[utils.DAYS_IN_A_YEAR +
                                                     1].strftime('%F'))
            self.end_date = end_date or datetime.datetime.today().strftime(
                '%F')
            self.start_point, self.end_point = 0, self.history_length - 1
            while (self.start_point < self.history_length and pd.to_datetime(
                    self.start_date) > self.history_dates[self.start_point]):
                self.start_point += 1
            while (self.end_point > 0 and pd.to_datetime(self.end_date) <
                   self.history_dates[self.end_point]):
                self.end_point -= 1
            if self.write_data:
                stats_cols = ['Symbol', 'Date'] + utils.ML_FEATURES + ['Gain']
                self.stats = pd.DataFrame(columns=stats_cols)
            self.values = {
                'Total': ([self.history_dates[self.start_point - 1]], [1.0])
            }
        self.win_trades, self.lose_trades = 0, 0
        signal.signal(signal.SIGINT, self.safe_exit)
Esempio n. 4
0
    def __init__(self, alpaca, polygon):
        self.root_dir = os.path.dirname(os.path.realpath(__file__))
        output_dir = os.path.join(self.root_dir, utils.OUTPUTS_DIR, 'realtime',
                                  utils.get_business_day(0))
        os.makedirs(output_dir, exist_ok=True)
        utils.logging_config(os.path.join(output_dir, 'log.txt'))
        super(TradingRealTime, self).__init__(alpaca)
        self.active = True
        self.equity, self.cash = 0, 0
        self.polygon = polygon
        self.update_account()
        self.lock = threading.RLock()
        self.thresholds = {}
        self.prices = {}
        self.ordered_symbols = []
        self.errors = []

        self.price_cache_file = os.path.join(output_dir, 'prices.json')
        self.drop_low_volume_symbols()

        read_cache = os.path.isfile(self.price_cache_file)
        if read_cache:
            logging.info('Reading cached stock prices...')
            with open(self.price_cache_file) as f:
                self.prices = json.loads(f.read())
        else:
            logging.info('Loading current stock prices...')
            self.update_prices(self.closes.keys(), use_tqdm=True)

        for symbol in self.closes.keys():
            threshold = self.get_threshold(symbol)
            self.thresholds[symbol] = threshold

        self.update_ordered_symbols()

        self.update_frequencies = [(10, 120), (100, 600),
                                   (len(self.ordered_symbols), 2400)]
        self.last_updates = ({self.update_frequencies[-1][0]: datetime.datetime.now()}
                             if not read_cache else {})
        self.trading_list = []
        self.next_market_close = self.alpaca.get_clock().next_close.timestamp()
Esempio n. 5
0
    def __init__(self, alpaca):
        self.root_dir = os.path.dirname(os.path.realpath(__file__))
        output_dir = os.path.join(self.root_dir, utils.OUTPUTS_DIR, 'realtime',
                                  utils.get_business_day(0))
        os.makedirs(output_dir, exist_ok=True)
        utils.logging_config(os.path.join(output_dir, 'log.txt'))
        super(TradingRealTime, self).__init__(alpaca)
        self.active = True
        self.equity, self.cash = 0, 0
        self.update_account()
        self.lock = threading.RLock()
        self.prices = {}
        self.position_file = os.path.join(output_dir, 'positions.json')

        self.drop_low_volume_symbols()

        for symbol in self.closes.keys():
            self.closes[symbol] = np.append(self.closes[symbol], 0)
            self.opens[symbol] = np.append(self.opens[symbol], 0)
            self.volumes[symbol] = np.append(self.volumes[symbol], 0)

        self.price_cache_file = os.path.join(output_dir, 'prices.json')
        read_cache = os.path.isfile(self.price_cache_file)
        if read_cache:
            logging.info('Reading cached stock prices...')
            with open(self.price_cache_file) as f:
                self.prices = json.loads(f.read())
            self.embed_prices_to_closes()
            self.last_update = None
        else:
            logging.info('Loading current stock prices...')
            self.update_prices(self.closes.keys(), use_tqdm=True)
            self.last_update = datetime.datetime.now()

        self.trading_list = []
        self.next_market_close = self.alpaca.get_clock().next_close.timestamp()
Esempio n. 6
0
                    type=int,
                    default=100,
                    metavar='N',
                    help='report interval')
parser.add_argument(
    '--save_dir',
    type=str,
    default='transformer_out',
    help='directory path to save the final model and training log')
parser.add_argument(
    '--gpus',
    type=str,
    help='list of gpus to run, e.g. 0 or 0,2,5. empty means using cpu.'
    '(using single gpu is suggested)')
args = parser.parse_args()
logging_config(args.save_dir)
logging.info(args)


def cache_dataset(dataset, prefix):
    """Cache the processed npy dataset  the dataset into a npz

    Parameters
    ----------
    dataset : SimpleDataset
    file_path : str
    """
    if not os.path.exists(_C.CACHE_PATH):
        os.makedirs(_C.CACHE_PATH)
    src_data = np.array([ele[0] for ele in dataset])
    tgt_data = np.array([ele[1] for ele in dataset])
Esempio n. 7
0
        train_data_loader = prepare_data_loader(args, train_dataset, vocab)
        val_data_loader = prepare_data_loader(args, val_dataset, vocab, test=True)

        model = NLIModel(len(vocab), args.embedding_size, args.hidden_size,
                         args.dropout, args.intra_attention)
        train_model(model, train_data_loader, val_data_loader, vocab.embedding, ctx, args)
    elif args.mode == 'test':
        model_args = argparse.Namespace(**json.load(
            open(os.path.join(args.model_dir, 'config.json'))))
        vocab = nlp.Vocab.from_json(
            open(os.path.join(args.model_dir, 'vocab.jsons')).read())
        val_dataset = read_dataset(args, 'test_file')
        val_data_loader = prepare_data_loader(args, val_dataset, vocab, test=True)
        model = NLIModel(len(vocab), model_args.embedding_size,
                         model_args.hidden_size, 0., model_args.intra_attention)
        model.load_parameters(os.path.join(
            args.model_dir, 'checkpoints', 'valid_best.params'), ctx=ctx)
        loss_func = gluon.loss.SoftmaxCrossEntropyLoss()
        logger.info('Test on {}'.format(args.test_file))
        loss, acc = test_model(model, val_data_loader, loss_func, ctx)
        logger.info('loss={:.4f} acc={:.4f}'.format(loss, acc))

if __name__ == '__main__':
    args = parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    logging_config(os.path.join(args.output_dir, 'main.log'))

    main(args)
                               dropout=args.dropout,
                               max_seq_len=max_length)
    model.initialize(mx.init.Xavier(), ctx=ctx)
    if not args.random_embedding:
        model.embedding.weight.set_data(
            vocab.embedding.idx_to_vec
        )  ## set the embedding layer parameters to pre-trained embedding
    elif args.fixed_embedding:
        model.embedding.collect_params().setattr('grad_req', 'null')

    return model


if __name__ == '__main__':

    logging_config(args.log_dir, 'train', level=logging.INFO)
    ctx = mx.cpu()  ## or mx.gpu(N) if GPU device N is available

    # for cross-validation
    vocab, train_dataset, transformer, max_length = load_dataset(
        args.train_file)
    print('sentence length is: ', max_length)
    # k-fold cross validation for training and testing different models
    k_fold_cross_valid(5, transformer, train_dataset, vocab, max_length)

    # for predicting the test labels, uncomment the codes below and comment the above lines for cross-validation
    # vocab, train_dataset, test_dataset, train_transformer, test_transformer, max_length = load_dataset(args.train_file,
    #                                                                                                    cross_validation=False,
    #                                                                                                    test_file='cleaned_entity_type_test.tsv')
    # random.shuffle(train_dataset)
    # print('sentence length is: ', max_length)
Esempio n. 9
0
from handlers import MainHandler
from workers import GpsModuleWorker

PORT = 12345  # replace this with random port when register will be working


def make_app(coords):
    return Application([
        (r'/', MainHandler, {
            "coords": coords
        }),
        (r'/healthcheck', HealthCheckHandler),
    ])


if __name__ == '__main__':
    os.environ['GPS_ROOT'] = os.getcwd()
    logging_config()
    logging.info("starting gps app")
    coords = Coords()
    #gps_worker = GpsModuleWorker(coords)
    methods = [
        method for method in dir(coords)
        if callable(getattr(coords, method)) and method[0] != '_'
    ]
    service = Service('192.168.1.22', PORT, 'GPS', methods)
    service.register_interface()
    app = make_app(coords)
    app.listen(PORT)
    IOLoop.current().start()
Esempio n. 10
0
        train_model(model, train_data_loader, val_data_loader, vocab.embedding,
                    ctx, args)
    elif args.mode == 'test':
        model_args = argparse.Namespace(
            **json.load(open(os.path.join(args.model_dir, 'config.json'))))
        vocab = nlp.Vocab.from_json(
            open(os.path.join(args.model_dir, 'vocab.jsons')).read())
        val_dataset = read_dataset(args, 'test_file')
        val_data_loader = prepare_data_loader(args,
                                              val_dataset,
                                              vocab,
                                              test=True)
        model = build_model(model_args, vocab)
        model.load_parameters(os.path.join(args.model_dir, 'checkpoints',
                                           'valid_best.params'),
                              ctx=ctx)
        loss_func = gluon.loss.SoftmaxCrossEntropyLoss()
        logger.info('Test on {}'.format(args.test_file))
        loss, acc = test_model(model, val_data_loader, loss_func, ctx)
        logger.info('loss={:.4f} acc={:.4f}'.format(loss, acc))


if __name__ == '__main__':
    args = parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    logging_config(os.path.join(args.output_dir, 'main.log'))

    main(args)
Esempio n. 11
0
import json
import os
from botocore.exceptions import ClientError
from error_response import error_response
from utils import assume_role, genpass, configure_user_client, \
    configure_iam_resource, logging_config

logger = logging_config()


def lambda_handler(event, context):
    logger.info(event)

    account_id = event['pathParameters']['account-id']

    event_body = json.loads(event["body"])

    username = event_body["username"]

    try:
        response = _reset_user_password_cloudwatch_account(
            account_id, username)
    except ClientError as error:
        logger.exception(error)
        return error_response(error)

    return {'statusCode': 200, 'body': json.dumps(response)}


def _reset_user_password_cloudwatch_account(account_id, username):
    session = assume_role(account_id, os.environ['FUNCTION_POLICY'])
Esempio n. 12
0
def train_eval(args):
    logging_config(folder=args.save_dir,
                   name='log{:d}'.format(args.save_id),
                   no_console=False)
    logging.info(args)

    ### check context
    use_cuda = args.gpu >= 0 and th.cuda.is_available()
    if use_cuda:
        th.cuda.set_device(args.gpu)

    ### load data
    dataset = DataLoader(data_name=args.data_name, seed=args.seed)
    print(dataset)
    model = Model(use_KG=True,
                  input_node_dim=args.entity_embed_dim,
                  gnn_model=args.gnn_model,
                  num_gnn_layers=args.gnn_num_layer,
                  n_hidden=args.gnn_hidden_size,
                  dropout=args.dropout_rate,
                  n_entities=dataset.n_KG_entity,
                  n_relations=dataset.n_KG_relation,
                  relation_dim=args.relation_embed_dim,
                  reg_lambda_kg=args.regs,
                  reg_lambda_gnn=args.regs)
    if use_cuda:
        model.cuda()
    logging.info(model)
    ### optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    valid_metric_logger = MetricLogger(
        ['epoch', 'recall', 'ndcg', 'is_best'], ['%d', '%.5f', '%.5f', '%d'],
        os.path.join(args.save_dir, 'valid{:d}.csv'.format(args.save_id)))
    test_metric_logger = MetricLogger(
        ['epoch', 'recall', 'ndcg'], ['%d', '%.5f', '%.5f'],
        os.path.join(args.save_dir, 'test{:d}.csv'.format(args.save_id)))
    best_epoch = -1
    best_recall = 0.0

    train_g = dataset.train_g
    nid_th = th.LongTensor(train_g.ndata["id"])
    etype_th = th.LongTensor(train_g.edata["type"])
    if use_cuda:
        nid_th, etype_th = nid_th.cuda(), etype_th.cuda()
    train_g.ndata['id'] = nid_th
    train_g.edata['type'] = etype_th

    test_g = dataset.test_g
    nid_th = th.LongTensor(test_g.ndata["id"])
    etype_th = th.LongTensor(test_g.edata["type"])
    if use_cuda:
        nid_th, etype_th = nid_th.cuda(), etype_th.cuda()
    test_g.ndata['id'] = nid_th
    test_g.edata['type'] = etype_th

    item_id_range = th.LongTensor(dataset.item_id_range).cuda() if use_cuda \
        else th.LongTensor(dataset.item_id_range)

    for epoch in range(1, args.max_epoch + 1):
        ### train kg
        time1 = time()
        kg_sampler = dataset.KG_sampler(batch_size=args.batch_size_kg)
        iter = 0
        total_loss = 0.0
        for h, r, pos_t, neg_t, _ in kg_sampler:
            iter += 1
            model.train()
            h_th = th.LongTensor(h)
            r_th = th.LongTensor(r)
            pos_t_th = th.LongTensor(pos_t)
            neg_t_th = th.LongTensor(neg_t)
            if use_cuda:
                h_th, r_th, pos_t_th, neg_t_th = h_th.cuda(), r_th.cuda(
                ), pos_t_th.cuda(), neg_t_th.cuda()
            loss = model.transR(h_th, r_th, pos_t_th, neg_t_th)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            total_loss += loss.item()
            if (iter % args.print_every) == 0 or iter == 1:
                logging.info("Epoch {:04d} Iter {:04d} | Loss {:.4f} ".format(
                    epoch, iter, total_loss / iter))
        logging.info('Time for KGE: {:.1f}s, loss {:.4f}'.format(
            time() - time1, total_loss / iter))

        ### train GNN
        if args.use_attention:
            time1 = time()
            print("Compute attention weight in train ...")
            with th.no_grad():
                A_w = model.compute_attention(train_g)
            train_g.edata['w'] = A_w
            print("Time: {:.2f}s".format(time() - time1))
        time1 = time()
        cf_sampler = dataset.CF_pair_sampler(batch_size=args.batch_size)
        iter = 0
        total_loss = 0.0
        for user_ids, item_pos_ids, item_neg_ids, _ in cf_sampler:
            iter += 1
            model.train()
            user_ids_th = th.LongTensor(user_ids)
            item_pos_ids_th = th.LongTensor(item_pos_ids)
            item_neg_ids_th = th.LongTensor(item_neg_ids)
            if use_cuda:
                user_ids_th, item_pos_ids_th, item_neg_ids_th = \
                    user_ids_th.cuda(), item_pos_ids_th.cuda(), item_neg_ids_th.cuda()
            embedding = model.gnn(train_g, train_g.ndata['id'])
            loss = model.get_loss(embedding, user_ids_th, item_pos_ids_th,
                                  item_neg_ids_th)
            loss.backward()
            # th.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm)  # clip gradients
            optimizer.step()
            optimizer.zero_grad()
            total_loss += loss.item()
            if (iter % args.print_every) == 0 or iter == 1:
                logging.info("Epoch {:04d} Iter {:04d} | Loss {:.4f} ".format(
                    epoch, iter, total_loss / iter))
        logging.info('Time for GNN: {:.1f}s, loss {:.4f}'.format(
            time() - time1, total_loss / iter))

        if epoch % args.evaluate_every == 0:
            time1 = time()
            val_recall, val_ndcg = eval(model, train_g,
                                        dataset.train_user_dict,
                                        dataset.valid_user_dict, item_id_range,
                                        use_cuda, args.use_attention)

            info = "Epoch{}, [{:.1f}s] val recall:{:.5f}, val ndcg:{:.5f}".format(
                epoch,
                time() - time1, val_recall, val_ndcg)
            # save best model
            if val_recall > best_recall:
                valid_metric_logger.log(epoch=epoch,
                                        recall=val_recall,
                                        ndcg=val_ndcg,
                                        is_best=1)
                best_recall = val_recall
                #best_ndcg = val_ndcg
                best_epoch = epoch
                time1 = time()
                test_recall, test_ndcg = eval(model, test_g,
                                              dataset.train_valid_user_dict,
                                              dataset.test_user_dict,
                                              item_id_range, use_cuda,
                                              args.use_attention)
                test_metric_logger.log(epoch=epoch,
                                       recall=test_recall,
                                       ndcg=test_ndcg)

                info += "\t[{:.1f}s] test recall:{:.5f}, test ndcg:{:.5f}".format(
                    time() - time1, test_recall, test_ndcg)
                #th.save({'state_dict': model.state_dict(), 'epoch': epoch}, model_state_file)
            else:
                valid_metric_logger.log(epoch=epoch,
                                        recall=val_recall,
                                        ndcg=val_ndcg,
                                        is_best=0)
                recall, ndcg = eval(model, test_g,
                                    dataset.train_valid_user_dict,
                                    dataset.test_user_dict, item_id_range,
                                    use_cuda, args.use_attention)
                print("test recall:{}, test_ndcg: {}".format(recall, ndcg))
            logging.info(info)

    logging.info(
        "Final test recall:{:.5f}, test ndcg:{:.5f}, best epoch:{}".format(
            test_recall, test_ndcg, best_epoch))
parser.add_argument('--bleu', type=str, default='tweaked',
                    help='Schemes for computing bleu score. It can be: '
                    '"tweaked": it uses similar steps in get_ende_bleu.sh in tensor2tensor '
                    'repository, where compound words are put in ATAT format; '
                    '"13a": This uses official WMT tokenization and produces the same results'
                    ' as official script (mteval-v13a.pl) used by WMT; '
                    '"intl": This use international tokenization in mteval-v14a.pl')
parser.add_argument('--log_interval', type=int, default=100, metavar='N',
                    help='report interval')
parser.add_argument('--save_dir', type=str, default='transformer_out',
                    help='directory path to save the final model and training log')
parser.add_argument('--gpus', type=str,
                    help='list of gpus to run, e.g. 0 or 0,2,5. empty means using cpu.'
                         '(using single gpu is suggested)')
args = parser.parse_args()
logging_config(args.save_dir)
logging.info(args)


data_train, data_val, data_test, val_tgt_sentences, test_tgt_sentences, src_vocab, tgt_vocab \
    = dataprocessor.load_translation_data(dataset=args.dataset, bleu=args.bleu, args=args)

dataprocessor.write_sentences(val_tgt_sentences, os.path.join(args.save_dir, 'val_gt.txt'))
dataprocessor.write_sentences(test_tgt_sentences, os.path.join(args.save_dir, 'test_gt.txt'))

data_train = data_train.transform(lambda src, tgt: (src, tgt, len(src), len(tgt)), lazy=False)
data_val = gluon.data.SimpleDataset([(ele[0], ele[1], len(ele[0]), len(ele[1]), i)
                                     for i, ele in enumerate(data_val)])
data_test = gluon.data.SimpleDataset([(ele[0], ele[1], len(ele[0]), len(ele[1]), i)
                                      for i, ele in enumerate(data_test)])