def main(config): if config.validate: output_len = config.output_seq_length config = pickle.load( open('saved_models/' + config.model_name + '/config.p', "rb")) config.validate = True config.output_seq_length = output_len config.num_layers = 6 print(config) elif config.backtest: config = pickle.load( open('saved_models/' + config.model_name + '/config.p', "rb")) config.backtest = True print(config) t1 = time.time() data_folder = os.path.abspath(os.path.abspath("../../../../")) + '/data/' dataset = load_data(data_folder, config) t2 = time.time() print('Finished loading the dataset: ' + str(t2 - t1) + ' sec \n') model = PricePredictor(config, dataset) if config.validate: model._validate(steps=config.output_seq_length, epoch=40) # model._make_figs(steps = config.output_seq_length, epoch=40) if config.backtest: model._backtest2(epoch=180) else: model._train()
def main(config): if config.validate: config = pickle.load( open('saved_models/' + config.model_name + '/config.p', "rb")) config.validate = True config.simulate = False print(config) t1 = time.time() data_folder = os.path.abspath(os.path.abspath("../../../")) + '/data/' dataset = load_data(data_folder, config) t2 = time.time() print('Finished loading the dataset: ' + str(t2 - t1) + ' sec \n') model = PricePredictor(config, dataset) if config.validate: # model._validate( epoch=70) # model._make_figs(epoch=70) model._make_figs2(epoch=70) elif config.tsne: model._tsne(epoch=70) else: model._train()
def main(config): if config.validate: output_len = config.output_seq_length l = config.num_layers loss = config.loss sl = config.num_stochastic_layers config = pickle.load( open( 'saved_models/'+config.model_name+'/config.p', "rb" )) config.validate = True config.simulate = False config.output_seq_length = output_len config.num_layers = l config.num_stochastic_layers = sl config.loss = loss print(config) t1 = time.time() data_folder = os.path.abspath(os.path.abspath("../../../../"))+'/data/' dataset = load_data(data_folder, config) t2 = time.time() print('Finished loading the dataset: ' + str(t2-t1) +' sec \n') model = PricePredictor(config, dataset) if config.validate: # model._make_figs(steps = config.output_seq_length, epoch=200) # model._validate(steps = config.output_seq_length, epoch=160) model._backtest(epoch=160) else: model._train()
def main(config): if config.validate: output_len = config.output_seq_length file_path = config.file_path seed = config.seed loss = config.loss target = config.target config = pickle.load( open('saved_models/' + config.model_name + '/config.p', "rb")) config.validate = True config.file_path = file_path config.output_seq_length = output_len config.seed = seed config.loss = loss config.backtest_target = 'close' config.target = 'NDX' print(config) t1 = time.time() data_folder = os.path.abspath(os.path.abspath("../../../../")) + '/data/' dataset = load_data(data_folder, config) t2 = time.time() print('Finished loading the dataset: ' + str(t2 - t1) + ' sec \n') model = PricePredictor(config, dataset) if config.validate: model._backtest(epoch=150) # model._validate(steps = config.output_seq_length, epoch=150) else: model._train()
def main(config): if config.validate: output_len = config.output_seq_length file_path = config.file_path seed = config.seed loss = config.loss l = config.num_layers sl = config.num_stochastic_layers config = pickle.load( open('saved_models/' + config.model_name + '/config.p', "rb")) config.validate = True config.file_path = file_path config.output_seq_length = output_len config.seed = seed config.loss = loss config.num_layers = l config.num_stochastic_layers = sl config.backtest_target = 'close_btc' config.target = 'lr_btc' config.model_name = 'vaegan_mv_hour_' print(config) elif config.backtest: config = pickle.load( open('saved_models/' + config.model_name + '/config.p', "rb")) config.backtest = True config.validate = False print(config) t1 = time.time() data_folder = os.path.abspath(os.path.abspath("../../../../")) + '/data/' dataset = load_data(data_folder, config) t2 = time.time() print('Finished loading the dataset: ' + str(t2 - t1) + ' sec \n') model = PricePredictor(config, dataset) if config.validate: model._validate(steps=config.output_seq_length, epoch=500) # model._make_figs(steps = config.output_seq_length, epoch=200) # model._backtest(epoch=500) else: model._train()
data = DataPreparator(path=dataset_path, col_names=column_names, sep=separator, na_val=na_value, comment=comment, result_col=result_column_name) data.dataset["Origin"] = data.dataset["Origin"].map({ 1: "USA", 2: "Europe", 3: "Japan" }) data.get_dummies() predictor = PricePredictor(data) history = predictor.history() loss, mae, mse = predictor.evaluate() test_predictions = predictor.predict() errors = test_predictions - predictor.test_labels train_val = _smooth(history.history['mae'], std=2) value = _smooth(history.history['val_mae'], std=2) fig, (axs1, axs2, axs3) = plt.subplots(1, 3) axs1.plot(history.epoch, train_val, label="Помилка %") axs1.plot(history.epoch, value, "--", label="Помилка значення") axs1.set(xlabel="Епоха", ylabel="Абсолютне значення помилки") axs1.legend()
def build_model(schema, mappings, trie, args): import tensorflow as tf from cocoa.model.word_embedder import WordEmbedder from cocoa.model.encdec import BasicEncoder, BasicDecoder, Sampler from price_predictor import PricePredictor from encdec import BasicEncoderDecoder, PriceDecoder, PriceEncoder, ContextDecoder, AttentionDecoder, LM, SlotFillingDecoder, ContextEncoder, TrieDecoder, ClassifyDecoder, CandidateSelector, IRSelector from ranker import IRRanker, CheatRanker, EncDecRanker, SlotFillingRanker from context_embedder import ContextEmbedder from preprocess import markers from cocoa.model.sequence_embedder import get_sequence_embedder check_model_args(args) tf.reset_default_graph() tf.set_random_seed(args.random_seed) with tf.variable_scope('GlobalDropout'): if args.test: keep_prob = tf.constant(1.) else: # When test on dev set, we need to feed in keep_prob = 1.0 keep_prob = tf.placeholder_with_default(tf.constant(1. - args.dropout), shape=[], name='keep_prob') vocab = mappings['vocab'] pad = vocab.to_ind(markers.PAD) # Word embeddings word_embeddings = None context_word_embeddings = None if args.pretrained_wordvec is not None: word_embeddings = vocab.load_embeddings(args.pretrained_wordvec, args.word_embed_size) if args.context: context_word_embeddings = mappings['kb_vocab'].load_embeddings( args.pretrained_wordvec, args.word_embed_size) with tf.variable_scope('EncoderWordEmbedder'): encoder_word_embedder = WordEmbedder(vocab.size, args.word_embed_size, word_embeddings, pad) with tf.variable_scope('DecoderWordEmbedder'): decoder_word_embedder = WordEmbedder(vocab.size, args.word_embed_size, word_embeddings, pad) if args.decoding[0] == 'sample': sample_t = float(args.decoding[1]) sampler = Sampler(sample_t, trie=trie) else: raise ('Unknown decoding method') opts = vars(args) opts['vocab_size'] = vocab.size opts['keep_prob'] = keep_prob opts['embed_size'] = args.rnn_size encoder_seq_embedder = get_sequence_embedder(args.encoder, **opts) decoder_seq_embedder = get_sequence_embedder(args.decoder, **opts) if args.context is not None: context_opts = dict(opts) context_opts['vocab_size'] = mappings['kb_vocab'].size context_opts['embed_size'] = args.context_size with tf.variable_scope('ContextWordEmbedder'): context_word_embedder = WordEmbedder(context_opts['vocab_size'], context_opts['embed_size'], context_word_embeddings, pad=pad) with tf.variable_scope('CategoryWordEmbedder'): category_word_embedder = WordEmbedder(mappings['cat_vocab'].size, 10, pad=pad) context_seq_embedder = get_sequence_embedder(args.context_encoder, **context_opts) context_embedder = ContextEmbedder(mappings['cat_vocab'].size, context_word_embedder, category_word_embedder, context_seq_embedder, pad) if args.predict_price: price_predictor = PricePredictor(args.price_predictor_hidden_size, args.price_hist_len, pad) def get_decoder(args): prompt_len = 2 # <role> <category> if args.decoder == 'rnn': if args.context is not None: decoder = ContextDecoder(decoder_word_embedder, decoder_seq_embedder, context_embedder, args.context, pad, keep_prob, vocab.size, sampler, args.sampled_loss, args.tied, prompt_len=prompt_len) else: decoder = BasicDecoder(decoder_word_embedder, decoder_seq_embedder, pad, keep_prob, vocab.size, sampler, args.sampled_loss, args.tied, prompt_len=prompt_len) else: decoder = AttentionDecoder(decoder_word_embedder, decoder_seq_embedder, pad, keep_prob, vocab.size, sampler, args.sampled_loss, context_embedder=context_embedder, attention_memory=args.attention_memory, prompt_len=prompt_len) if args.predict_price: decoder = PriceDecoder(decoder, price_predictor) if args.slot_filling: decoder = SlotFillingDecoder(decoder) # Retrieval-based models if args.model == 'selector': decoder = ClassifyDecoder(decoder) #decoder = TrieDecoder(decoder) return decoder def get_encoder(args): if args.num_context > 0: encoder = ContextEncoder(encoder_word_embedder, encoder_seq_embedder, args.num_context, pad, keep_prob) else: encoder = BasicEncoder(encoder_word_embedder, encoder_seq_embedder, pad, keep_prob) if args.predict_price: encoder = PriceEncoder(encoder, price_predictor) return encoder if args.model == 'encdec' or args.ranker == 'encdec': decoder = get_decoder(args) encoder = get_encoder(args) model = BasicEncoderDecoder(encoder, decoder, pad, keep_prob, stateful=args.stateful) elif args.model == 'selector': decoder = get_decoder(args) encoder = get_encoder(args) model = CandidateSelector(encoder, decoder, pad, keep_prob) elif args.model == 'ir': model = IRSelector() elif args.model == 'lm': decoder = get_decoder(args) model = LM(decoder, pad) elif args.model is not None: raise ValueError('Unknown model') if args.ranker == 'cheat': model = CheatRanker() if args.ranker == 'ir': model = IRRanker() elif args.ranker == 'encdec': model = EncDecRanker(model, args.temperature) elif args.ranker == 'sf': model = SlotFillingRanker(model) return model