def run_seq2seq_model(args, model_params=None): source_vocab = seq2seq_util.gen_vocab( args.source_corpus, args.unk_threshold, ) target_vocab = seq2seq_util.gen_vocab( args.target_corpus, args.unk_threshold, ) logger.info('Source vocab size {}'.format(len(source_vocab))) logger.info('Target vocab size {}'.format(len(target_vocab))) batches = gen_batches(args.source_corpus, args.target_corpus, source_vocab, target_vocab, model_params['batch_size'], args.max_length) logger.info('Number of training batches {}'.format(len(batches))) batches_eval = gen_batches(args.source_corpus_eval, args.target_corpus_eval, source_vocab, target_vocab, model_params['batch_size'], args.max_length) logger.info('Number of eval batches {}'.format(len(batches_eval))) with Seq2SeqModelCaffe2( model_params=model_params, source_vocab_size=len(source_vocab), target_vocab_size=len(target_vocab), num_gpus=args.num_gpus, num_cpus=20, ) as model_obj: model_obj.initialize_from_scratch() for i in range(args.epochs): logger.info('Epoch {}'.format(i)) total_loss = 0 for batch in batches: total_loss += model_obj.step( batch=batch, forward_only=False, ) logger.info('\ttraining loss {}'.format(total_loss)) total_loss = 0 for batch in batches_eval: total_loss += model_obj.step( batch=batch, forward_only=False, ) logger.info('\teval loss {}'.format(total_loss)) if args.checkpoint is not None: checkpoint_path = '{0}-{1}'.format(args.checkpoint, i) assert workspace.RunOperatorOnce(core.CreateOperator( 'Save', model_obj.model.GetAllParams(), [], absolute_path=True, db=checkpoint_path, db_type='minidb', )) logger.info('Model saved to ' + checkpoint_path)
def run_seq2seq_model(args, model_params=None): source_vocab = seq2seq_util.gen_vocab( args.source_corpus, args.unk_threshold, ) target_vocab = seq2seq_util.gen_vocab( args.target_corpus, args.unk_threshold, ) logger.info('Source vocab size {}'.format(len(source_vocab))) logger.info('Target vocab size {}'.format(len(target_vocab))) batches = gen_batches(args.source_corpus, args.target_corpus, source_vocab, target_vocab, model_params['batch_size'], args.max_length) logger.info('Number of training batches {}'.format(len(batches))) batches_eval = gen_batches(args.source_corpus_eval, args.target_corpus_eval, source_vocab, target_vocab, model_params['batch_size'], args.max_length) logger.info('Number of eval batches {}'.format(len(batches_eval))) with Seq2SeqModelCaffe2( model_params=model_params, source_vocab_size=len(source_vocab), target_vocab_size=len(target_vocab), num_gpus=args.num_gpus, num_cpus=20, ) as model_obj: model_obj.initialize_from_scratch() for i in range(args.epochs): logger.info('Epoch {}'.format(i)) total_loss = 0 for batch in batches: total_loss += model_obj.step( batch=batch, forward_only=False, ) logger.info('\ttraining loss {}'.format(total_loss)) total_loss = 0 for batch in batches_eval: total_loss += model_obj.step( batch=batch, forward_only=True, ) logger.info('\teval loss {}'.format(total_loss)) if args.checkpoint is not None: model_obj.save(args.checkpoint, i)
def run_seq2seq_beam_decoder(args, model_params, decoding_params): source_vocab = seq2seq_util.gen_vocab( args.source_corpus, args.unk_threshold, ) logger.info('Source vocab size {}'.format(len(source_vocab))) target_vocab = seq2seq_util.gen_vocab( args.target_corpus, args.unk_threshold, ) inversed_target_vocab = {v: k for (k, v) in viewitems(target_vocab)} logger.info('Target vocab size {}'.format(len(target_vocab))) decoder = Seq2SeqModelCaffe2EnsembleDecoder( translate_params=dict( ensemble_models=[dict( source_vocab=source_vocab, target_vocab=target_vocab, model_params=model_params, model_file=args.checkpoint, )], decoding_params=decoding_params, ), ) decoder.load_models() for line in sys.stdin: numerized_source_sentence = seq2seq_util.get_numberized_sentence( line, source_vocab, ) translation, alignment, _ = decoder.decode( numerized_source_sentence, 2 * len(numerized_source_sentence) + 5, ) print(' '.join([inversed_target_vocab[tid] for tid in translation]))
def run_seq2seq_beam_decoder(args, model_params, decoding_params): source_vocab = seq2seq_util.gen_vocab( args.source_corpus, args.unk_threshold, ) logger.info('Source vocab size {}'.format(len(source_vocab))) target_vocab = seq2seq_util.gen_vocab( args.target_corpus, args.unk_threshold, ) inversed_target_vocab = {v: k for (k, v) in viewitems(target_vocab)} logger.info('Target vocab size {}'.format(len(target_vocab))) decoder = Seq2SeqModelCaffe2EnsembleDecoder(translate_params=dict( ensemble_models=[ dict( source_vocab=source_vocab, target_vocab=target_vocab, model_params=model_params, model_file=args.checkpoint, ) ], decoding_params=decoding_params, ), ) decoder.load_models() for line in sys.stdin: numerized_source_sentence = seq2seq_util.get_numberized_sentence( line, source_vocab, ) translation, alignment, _ = decoder.decode( numerized_source_sentence, 2 * len(numerized_source_sentence) + 5, ) print(' '.join([inversed_target_vocab[tid] for tid in translation]))