def setUpClass(cls): try: cls.config = Config('config.toml') engine = create_engine(cls.config.db_uri_test) SessionMaker = sessionmaker(bind=engine) cls.session = SessionMaker() create_tables(engine) cls.session.query(Close).delete() cls.session.query(Price).delete() cls.session.query(PriceSeq).delete() cls.session.commit() dir_resources = Path(cls.config.dir_resources) dir_prices = dir_resources / Path('pseudo-data') / Path('prices') missing_rics = ['.TEST'] logger = create_logger(Path('test.log'), is_debug=False, is_temporary=True) # insert database insert_prices(cls.session, dir_prices, missing_rics, dir_resources, logger) except: # noqa: E722 raise unittest.SkipTest('Cannot establish connection')
def main() -> None: args = parse_args() if not args.is_debug: warnings.simplefilter(action='ignore', category=FutureWarning) config = Config(args.dest_config) device = torch.device(args.device) now = datetime.today().strftime('reporter-%Y-%m-%d-%H-%M-%S') dest_dir = config.dir_output / Path(now) \ if args.output_subdir is None \ else config.dir_output / Path(args.output_subdir) dest_log = dest_dir / Path('reporter.log') logger = create_logger(dest_log, is_debug=args.is_debug) config.write_log(logger) message = 'start main (is_debug: {}, device: {})'.format(args.is_debug, args.device) logger.info(message) # === Alignment === has_all_alignments = \ reduce(lambda x, y: x and y, [(config.dir_output / Path('alignment-{}.json'.format(phase.value))).exists() for phase in list(Phase)]) if not has_all_alignments: engine = create_engine(config.db_uri) SessionMaker = sessionmaker(bind=engine) pg_session = SessionMaker() create_tables(engine) prepare_resources(config, pg_session, logger) for phase in list(Phase): config.dir_output.mkdir(parents=True, exist_ok=True) dest_alignments = config.dir_output / Path('alignment-{}.json'.format(phase.value)) alignments = load_alignments_from_db(pg_session, phase, logger) with dest_alignments.open(mode='w') as f: writer = jsonlines.Writer(f) writer.write_all(alignments) pg_session.close() # === Dataset === (vocab, train, valid, test) = create_dataset(config, device) vocab_size = len(vocab) dest_vocab = dest_dir / Path('reporter.vocab') with dest_vocab.open(mode='wb') as f: torch.save(vocab, f) seqtypes = [] attn = setup_attention(config, seqtypes) encoder = Encoder(config, device) decoder = Decoder(config, vocab_size, attn, device) model = EncoderDecoder(encoder, decoder, device) optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) criterion = torch.nn.NLLLoss(reduction='elementwise_mean', ignore_index=vocab.stoi[SpecialToken.Padding.value]) # === Train === dest_model = dest_dir / Path('reporter.model') prev_valid_bleu = 0.0 max_bleu = 0.0 best_epoch = 0 early_stop_counter = 0 for epoch in range(config.n_epochs): logger.info('start epoch {}'.format(epoch)) train_result = run(train, vocab, model, optimizer, criterion, Phase.Train, logger) train_bleu = calc_bleu(train_result.gold_sents, train_result.pred_sents) valid_result = run(valid, vocab, model, optimizer, criterion, Phase.Valid, logger) valid_bleu = calc_bleu(valid_result.gold_sents, valid_result.pred_sents) s = ' | '.join(['epoch: {0:4d}'.format(epoch), 'training loss: {:.2f}'.format(train_result.loss), 'training BLEU: {:.4f}'.format(train_bleu), 'validation loss: {:.2f}'.format(valid_result.loss), 'validation BLEU: {:.4f}'.format(valid_bleu)]) logger.info(s) if max_bleu < valid_bleu: torch.save(model.state_dict(), str(dest_model)) max_bleu = valid_bleu best_epoch = epoch early_stop_counter = early_stop_counter + 1 \ if prev_valid_bleu > valid_bleu \ else 0 if early_stop_counter == config.patience: logger.info('EARLY STOPPING') break prev_valid_bleu = valid_bleu # === Test === with dest_model.open(mode='rb') as f: model.load_state_dict(torch.load(f)) test_result = run(test, vocab, model, optimizer, criterion, Phase.Test, logger) test_bleu = calc_bleu(test_result.gold_sents, test_result.pred_sents) s = ' | '.join(['epoch: {:04d}'.format(best_epoch), 'Test Loss: {:.2f}'.format(test_result.loss), 'Test BLEU: {:.10f}'.format(test_bleu)]) logger.info(s) export_results_to_csv(dest_dir, test_result)