Ejemplo n.º 1
0
def main(config):

    if config.validate:
        output_len = config.output_seq_length
        config = pickle.load(
            open('saved_models/' + config.model_name + '/config.p', "rb"))
        config.validate = True
        config.output_seq_length = output_len
        config.num_layers = 6
        print(config)

    elif config.backtest:
        config = pickle.load(
            open('saved_models/' + config.model_name + '/config.p', "rb"))
        config.backtest = True
        print(config)

    t1 = time.time()
    data_folder = os.path.abspath(os.path.abspath("../../../../")) + '/data/'
    dataset = load_data(data_folder, config)

    t2 = time.time()
    print('Finished loading the dataset: ' + str(t2 - t1) + ' sec \n')
    model = PricePredictor(config, dataset)

    if config.validate:
        model._validate(steps=config.output_seq_length, epoch=40)
        # model._make_figs(steps = config.output_seq_length, epoch=40)
    if config.backtest:
        model._backtest2(epoch=180)
    else:
        model._train()
Ejemplo n.º 2
0
def main(config):

    if config.validate:
        config = pickle.load(
            open('saved_models/' + config.model_name + '/config.p', "rb"))
        config.validate = True
        config.simulate = False
        print(config)

    t1 = time.time()
    data_folder = os.path.abspath(os.path.abspath("../../../")) + '/data/'
    dataset = load_data(data_folder, config)

    t2 = time.time()
    print('Finished loading the dataset: ' + str(t2 - t1) + ' sec \n')

    model = PricePredictor(config, dataset)
    if config.validate:
        # model._validate( epoch=70)
        # model._make_figs(epoch=70)
        model._make_figs2(epoch=70)

    elif config.tsne:
        model._tsne(epoch=70)
    else:
        model._train()
Ejemplo n.º 3
0
def main(config):

    
    if config.validate:
        output_len = config.output_seq_length
        l = config.num_layers
        loss = config.loss
        sl = config.num_stochastic_layers
        config = pickle.load( open( 'saved_models/'+config.model_name+'/config.p', "rb" ))
        config.validate = True
        config.simulate = False
        config.output_seq_length = output_len
        config.num_layers = l
        config.num_stochastic_layers = sl
        config.loss = loss
        print(config)

    t1 = time.time()
    data_folder =  os.path.abspath(os.path.abspath("../../../../"))+'/data/'
    dataset = load_data(data_folder, config)

    t2 = time.time()
    print('Finished loading the dataset: ' + str(t2-t1) +' sec \n')

    model = PricePredictor(config, dataset)
    if config.validate:
        # model._make_figs(steps = config.output_seq_length, epoch=200)
        # model._validate(steps = config.output_seq_length, epoch=160)
        model._backtest(epoch=160)
    else:
        model._train()
Ejemplo n.º 4
0
def main(config):

    if config.validate:
        output_len = config.output_seq_length
        file_path = config.file_path
        seed = config.seed
        loss = config.loss
        target = config.target
        config = pickle.load(
            open('saved_models/' + config.model_name + '/config.p', "rb"))
        config.validate = True
        config.file_path = file_path
        config.output_seq_length = output_len
        config.seed = seed
        config.loss = loss
        config.backtest_target = 'close'
        config.target = 'NDX'
        print(config)

    t1 = time.time()
    data_folder = os.path.abspath(os.path.abspath("../../../../")) + '/data/'
    dataset = load_data(data_folder, config)

    t2 = time.time()
    print('Finished loading the dataset: ' + str(t2 - t1) + ' sec \n')

    model = PricePredictor(config, dataset)

    if config.validate:
        model._backtest(epoch=150)
        # model._validate(steps = config.output_seq_length, epoch=150)
    else:
        model._train()
Ejemplo n.º 5
0
def main(config):

    if config.validate:
        output_len = config.output_seq_length
        file_path = config.file_path
        seed = config.seed
        loss = config.loss
        l = config.num_layers
        sl = config.num_stochastic_layers
        config = pickle.load(
            open('saved_models/' + config.model_name + '/config.p', "rb"))
        config.validate = True
        config.file_path = file_path
        config.output_seq_length = output_len
        config.seed = seed
        config.loss = loss
        config.num_layers = l
        config.num_stochastic_layers = sl
        config.backtest_target = 'close_btc'
        config.target = 'lr_btc'
        config.model_name = 'vaegan_mv_hour_'
        print(config)

    elif config.backtest:
        config = pickle.load(
            open('saved_models/' + config.model_name + '/config.p', "rb"))
        config.backtest = True
        config.validate = False
        print(config)

    t1 = time.time()
    data_folder = os.path.abspath(os.path.abspath("../../../../")) + '/data/'
    dataset = load_data(data_folder, config)

    t2 = time.time()
    print('Finished loading the dataset: ' + str(t2 - t1) + ' sec \n')
    model = PricePredictor(config, dataset)

    if config.validate:
        model._validate(steps=config.output_seq_length, epoch=500)
        # model._make_figs(steps = config.output_seq_length, epoch=200)
        # model._backtest(epoch=500)

    else:
        model._train()
Ejemplo n.º 6
0
data = DataPreparator(path=dataset_path,
                      col_names=column_names,
                      sep=separator,
                      na_val=na_value,
                      comment=comment,
                      result_col=result_column_name)

data.dataset["Origin"] = data.dataset["Origin"].map({
    1: "USA",
    2: "Europe",
    3: "Japan"
})
data.get_dummies()

predictor = PricePredictor(data)

history = predictor.history()
loss, mae, mse = predictor.evaluate()
test_predictions = predictor.predict()
errors = test_predictions - predictor.test_labels

train_val = _smooth(history.history['mae'], std=2)
value = _smooth(history.history['val_mae'], std=2)

fig, (axs1, axs2, axs3) = plt.subplots(1, 3)

axs1.plot(history.epoch, train_val, label="Помилка %")
axs1.plot(history.epoch, value, "--", label="Помилка значення")
axs1.set(xlabel="Епоха", ylabel="Абсолютне значення помилки")
axs1.legend()
Ejemplo n.º 7
0
def build_model(schema, mappings, trie, args):
    import tensorflow as tf
    from cocoa.model.word_embedder import WordEmbedder
    from cocoa.model.encdec import BasicEncoder, BasicDecoder, Sampler
    from price_predictor import PricePredictor
    from encdec import BasicEncoderDecoder, PriceDecoder, PriceEncoder, ContextDecoder, AttentionDecoder, LM, SlotFillingDecoder, ContextEncoder, TrieDecoder, ClassifyDecoder, CandidateSelector, IRSelector
    from ranker import IRRanker, CheatRanker, EncDecRanker, SlotFillingRanker
    from context_embedder import ContextEmbedder
    from preprocess import markers
    from cocoa.model.sequence_embedder import get_sequence_embedder

    check_model_args(args)

    tf.reset_default_graph()
    tf.set_random_seed(args.random_seed)

    with tf.variable_scope('GlobalDropout'):
        if args.test:
            keep_prob = tf.constant(1.)
        else:
            # When test on dev set, we need to feed in keep_prob = 1.0
            keep_prob = tf.placeholder_with_default(tf.constant(1. -
                                                                args.dropout),
                                                    shape=[],
                                                    name='keep_prob')

    vocab = mappings['vocab']
    pad = vocab.to_ind(markers.PAD)

    # Word embeddings
    word_embeddings = None
    context_word_embeddings = None
    if args.pretrained_wordvec is not None:
        word_embeddings = vocab.load_embeddings(args.pretrained_wordvec,
                                                args.word_embed_size)
        if args.context:
            context_word_embeddings = mappings['kb_vocab'].load_embeddings(
                args.pretrained_wordvec, args.word_embed_size)

    with tf.variable_scope('EncoderWordEmbedder'):
        encoder_word_embedder = WordEmbedder(vocab.size, args.word_embed_size,
                                             word_embeddings, pad)
    with tf.variable_scope('DecoderWordEmbedder'):
        decoder_word_embedder = WordEmbedder(vocab.size, args.word_embed_size,
                                             word_embeddings, pad)

    if args.decoding[0] == 'sample':
        sample_t = float(args.decoding[1])
        sampler = Sampler(sample_t, trie=trie)
    else:
        raise ('Unknown decoding method')

    opts = vars(args)
    opts['vocab_size'] = vocab.size
    opts['keep_prob'] = keep_prob
    opts['embed_size'] = args.rnn_size
    encoder_seq_embedder = get_sequence_embedder(args.encoder, **opts)
    decoder_seq_embedder = get_sequence_embedder(args.decoder, **opts)

    if args.context is not None:
        context_opts = dict(opts)
        context_opts['vocab_size'] = mappings['kb_vocab'].size
        context_opts['embed_size'] = args.context_size

        with tf.variable_scope('ContextWordEmbedder'):
            context_word_embedder = WordEmbedder(context_opts['vocab_size'],
                                                 context_opts['embed_size'],
                                                 context_word_embeddings,
                                                 pad=pad)

        with tf.variable_scope('CategoryWordEmbedder'):
            category_word_embedder = WordEmbedder(mappings['cat_vocab'].size,
                                                  10,
                                                  pad=pad)
        context_seq_embedder = get_sequence_embedder(args.context_encoder,
                                                     **context_opts)
        context_embedder = ContextEmbedder(mappings['cat_vocab'].size,
                                           context_word_embedder,
                                           category_word_embedder,
                                           context_seq_embedder, pad)

    if args.predict_price:
        price_predictor = PricePredictor(args.price_predictor_hidden_size,
                                         args.price_hist_len, pad)

    def get_decoder(args):
        prompt_len = 2  # <role> <category>
        if args.decoder == 'rnn':
            if args.context is not None:
                decoder = ContextDecoder(decoder_word_embedder,
                                         decoder_seq_embedder,
                                         context_embedder,
                                         args.context,
                                         pad,
                                         keep_prob,
                                         vocab.size,
                                         sampler,
                                         args.sampled_loss,
                                         args.tied,
                                         prompt_len=prompt_len)
            else:
                decoder = BasicDecoder(decoder_word_embedder,
                                       decoder_seq_embedder,
                                       pad,
                                       keep_prob,
                                       vocab.size,
                                       sampler,
                                       args.sampled_loss,
                                       args.tied,
                                       prompt_len=prompt_len)
        else:
            decoder = AttentionDecoder(decoder_word_embedder,
                                       decoder_seq_embedder,
                                       pad,
                                       keep_prob,
                                       vocab.size,
                                       sampler,
                                       args.sampled_loss,
                                       context_embedder=context_embedder,
                                       attention_memory=args.attention_memory,
                                       prompt_len=prompt_len)

        if args.predict_price:
            decoder = PriceDecoder(decoder, price_predictor)

        if args.slot_filling:
            decoder = SlotFillingDecoder(decoder)

        # Retrieval-based models
        if args.model == 'selector':
            decoder = ClassifyDecoder(decoder)

        #decoder = TrieDecoder(decoder)
        return decoder

    def get_encoder(args):
        if args.num_context > 0:
            encoder = ContextEncoder(encoder_word_embedder,
                                     encoder_seq_embedder, args.num_context,
                                     pad, keep_prob)
        else:
            encoder = BasicEncoder(encoder_word_embedder, encoder_seq_embedder,
                                   pad, keep_prob)
        if args.predict_price:
            encoder = PriceEncoder(encoder, price_predictor)
        return encoder

    if args.model == 'encdec' or args.ranker == 'encdec':
        decoder = get_decoder(args)
        encoder = get_encoder(args)
        model = BasicEncoderDecoder(encoder,
                                    decoder,
                                    pad,
                                    keep_prob,
                                    stateful=args.stateful)
    elif args.model == 'selector':
        decoder = get_decoder(args)
        encoder = get_encoder(args)
        model = CandidateSelector(encoder, decoder, pad, keep_prob)
    elif args.model == 'ir':
        model = IRSelector()
    elif args.model == 'lm':
        decoder = get_decoder(args)
        model = LM(decoder, pad)
    elif args.model is not None:
        raise ValueError('Unknown model')

    if args.ranker == 'cheat':
        model = CheatRanker()
    if args.ranker == 'ir':
        model = IRRanker()
    elif args.ranker == 'encdec':
        model = EncDecRanker(model, args.temperature)
    elif args.ranker == 'sf':
        model = SlotFillingRanker(model)

    return model