def test_train_dev_loss_equal(self): layer_dim = 512 batcher = SrcBatcher(batch_size=5, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = MLELoss() train_args['model'] = DefaultTranslator(src_reader=PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.ja.vocab")), trg_reader=PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.en.vocab")), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder(input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) train_args['dev_tasks'] = [LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher)] train_args['trainer'] = DummyTrainer() train_args['batcher'] = batcher train_args['run_for_epochs'] = 1 training_regimen = regimens.SimpleTrainingRegimen(**train_args) training_regimen.run_training(save_fct = lambda: None) self.assertAlmostEqual(training_regimen.train_loss_tracker.epoch_loss.sum_factors() / training_regimen.train_loss_tracker.epoch_words, training_regimen.dev_loss_tracker.dev_score.loss, places=5)
def setUp(self): layer_dim = 512 events.clear() ParamManager.init_param_col() src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab") self.model = DefaultTranslator( src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja"))
def setUp(self): xnmt.events.clear() ParamManager.init_param_col() self.src_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.ja.vocab")) self.trg_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.en.vocab")) self.src_data = list(self.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list(self.trg_reader.read_sents("examples/data/head.en"))
class TestSimultaneousTranslation(unittest.TestCase): def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 32 xnmt.events.clear() ParamManager.init_param_col() self.src_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.ja.vocab")) self.trg_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.en.vocab")) self.layer_dim = layer_dim self.src_data = list(self.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list(self.trg_reader.read_sents("examples/data/head.en")) self.input_vocab_size = len(self.src_reader.vocab.i2w) self.output_vocab_size = len(self.trg_reader.vocab.i2w) self.loss_calculator = loss_calculators.MLELoss() self.model = SimultaneousTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=self.input_vocab_size), encoder=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder(input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=self.output_vocab_size, input_dim=layer_dim), embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=self.output_vocab_size), bridge=NoBridge(dec_dim=layer_dim, dec_layers=1)), policy_train_oracle=False, policy_test_oracle=False, read_before_write=True, ) event_trigger.set_train(True) my_batcher = batchers.TrgBatcher(batch_size=3) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True) def test_train_nll(self): event_trigger.set_train(True) mle_loss = loss_calculators.MLELoss() mle_loss.calc_loss(self.model, self.src[0], self.trg[0]) event_trigger.set_train(False) self.model.generate(batchers.mark_as_batch([self.src_data[0]]), GreedySearch()) def test_simult_beam(self): event_trigger.set_train(False) mle_loss = loss_calculators.MLELoss() mle_loss.calc_loss(self.model, self.src[0], self.trg[0]) self.model.generate(batchers.mark_as_batch([self.src_data[0]]), BeamSearch(beam_size=2))
def setUp(self): events.clear() ParamManager.init_param_col() src_vocab = Vocab(vocab_file="test/data/head.ja.vocab") trg_vocab = Vocab(vocab_file="test/data/head.en.vocab") self.src_reader = PlainTextReader(vocab=src_vocab) self.trg_reader = PlainTextReader(vocab=trg_vocab) self.src_data = list(self.src_reader.read_sents("test/data/head.ja")) self.trg_data = list(self.trg_reader.read_sents("test/data/head.en"))
def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 32 xnmt.events.clear() ParamManager.init_param_col() self.src_reader = PlainTextReader(vocab=Vocab( vocab_file="test/data/head.ja.vocab")) self.trg_reader = PlainTextReader(vocab=Vocab( vocab_file="test/data/head.en.vocab")) self.layer_dim = layer_dim self.src_data = list(self.src_reader.read_sents("test/data/head.ja")) self.trg_data = list(self.trg_reader.read_sents("test/data/head.en")) self.input_vocab_size = len(self.src_reader.vocab.i2w) self.output_vocab_size = len(self.trg_reader.vocab.i2w) self.loss_calculator = MLELoss() self.model = SimultaneousTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=self.input_vocab_size), encoder=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=self.output_vocab_size, input_dim=layer_dim), embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=self.output_vocab_size), bridge=NoBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(True) my_batcher = batchers.TrgBatcher(batch_size=3) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)
def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 32 xnmt.events.clear() ParamManager.init_param_col() src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") self.src_reader = CompoundReader(readers=[ PlainTextReader(vocab=src_vocab), SimultActionTextReader() ], vocab=src_vocab) self.trg_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.en.vocab")) self.layer_dim = layer_dim self.src_data = list(self.src_reader.read_sents(["examples/data/head.ja", "examples/data/simult/head.jaen.actions"])) self.trg_data = list(self.trg_reader.read_sents("examples/data/head.en")) self.input_vocab_size = len(self.src_reader.vocab.i2w) self.output_vocab_size = len(self.trg_reader.vocab.i2w) self.loss_calculator = loss_calculators.MLELoss() self.model = SimultaneousTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=self.input_vocab_size), encoder=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder(input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=self.output_vocab_size, input_dim=layer_dim), embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=self.output_vocab_size), bridge=NoBridge(dec_dim=layer_dim, dec_layers=1)), policy_network = network.PolicyNetwork(transforms.MLP(2*self.layer_dim, self.layer_dim, 2)), policy_train_oracle=True, policy_test_oracle=True ) event_trigger.set_train(True) my_batcher = batchers.TrgBatcher(batch_size=3) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)
def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 64 xnmt.events.clear() ParamManager.init_param_col() self.segment_encoder_bilstm = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim) self.segment_composer = SumComposer() self.src_reader = CharFromWordTextReader(vocab=Vocab(vocab_file="examples/data/head.ja.charvocab")) self.trg_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.en.vocab")) self.loss_calculator = FeedbackLoss(child_loss=MLELoss(), repeat=5) baseline = Linear(input_dim=layer_dim, output_dim=1) policy_network = Linear(input_dim=layer_dim, output_dim=2) self.poisson_prior = PoissonPrior(mu=3.3) self.eps_greedy = EpsilonGreedy(eps_prob=0.0, prior=self.poisson_prior) self.conf_penalty = ConfidencePenalty() self.policy_gradient = PolicyGradient(input_dim=layer_dim, output_dim=2, baseline=baseline, policy_network=policy_network, z_normalization=True, conf_penalty=self.conf_penalty) self.length_prior = PoissonLengthPrior(lmbd=3.3, weight=1) self.segmenting_encoder = SegmentingSeqTransducer( embed_encoder = self.segment_encoder_bilstm, segment_composer = self.segment_composer, final_transducer = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), policy_learning = self.policy_gradient, eps_greedy = self.eps_greedy, length_prior = self.length_prior, ) self.model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=self.segmenting_encoder, attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder(input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=100, input_dim=layer_dim), trg_embed_dim=layer_dim, bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(True) self.layer_dim = layer_dim self.src_data = list(self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list(self.model.trg_reader.read_sents("examples/data/head.en")) my_batcher = batchers.TrgBatcher(batch_size=3) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)
def setUp(self): # Seeding np.random.seed(2) random.seed(2) layer_dim = 4 xnmt.events.clear() ParamManager.init_param_col() self.src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") self.src_char_vocab = CharVocab(vocab_file="examples/data/head.ja.vocab") self.ngram_vocab = Vocab(vocab_file="examples/data/head.ngramcount.ja") self.trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab") self.src_reader = CharFromWordTextReader(vocab= self.src_vocab, char_vocab= self.src_char_vocab) self.trg_reader = PlainTextReader(vocab=self.trg_vocab) self.layer_dim = layer_dim self.src_data = list(self.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list(self.trg_reader.read_sents("examples/data/head.en")) self.src, self.trg = batchers.TrgBatcher(batch_size=3).pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)
def test_overfitting(self): layer_dim = 16 batcher = SrcBatcher(batch_size=10, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = MLELoss() train_args['model'] = DefaultTranslator( src_reader=PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.ja.vocab")), trg_reader=PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.en.vocab")), src_embedder=LookupEmbedder(vocab_size=100, emb_dim=layer_dim), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) train_args['dev_tasks'] = [ LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher) ] train_args['run_for_epochs'] = 1 train_args['trainer'] = AdamTrainer(alpha=0.1) train_args['batcher'] = batcher training_regimen = regimens.SimpleTrainingRegimen(**train_args)
def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 4 xnmt.events.clear() ParamManager.init_param_col() self.segment_composer = SumComposer() self.src_reader = CharFromWordTextReader(vocab=Vocab( vocab_file="examples/data/head.ja.charvocab")) self.trg_reader = PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.en.vocab")) self.loss_calculator = FeedbackLoss(child_loss=MLELoss(), repeat=5) self.segmenting_encoder = SegmentingSeqTransducer( segment_composer=self.segment_composer, final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), ) self.model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=self.segmenting_encoder, attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=100, input_dim=layer_dim), embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(True) self.layer_dim = layer_dim self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) my_batcher = batchers.TrgBatcher(batch_size=3) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)
class PretrainedSimpleWordEmbedderSanityTest(unittest.TestCase): def setUp(self): events.clear() self.input_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.ja.vocab")) list(self.input_reader.read_sents('examples/data/head.ja')) ParamManager.init_param_col() def test_load(self): """ Checks that the embeddings can be loaded, have the right dimension, and that one line matches. """ embedder = LookupEmbedder(init_fastext='examples/data/wiki.ja.vec.small', emb_dim=300, vocab=self.input_reader.vocab) # self.assertEqual(embedder.embeddings.shape()[::-1], (self.input_reader.vocab_size(), 300)) with open('examples/data/wiki.ja.vec.small', encoding='utf-8') as vecfile: test_line = next(islice(vecfile, 9, None)).split() # Select the vector for '日' test_word = test_line[0] test_id = self.input_reader.vocab.w2i[test_word] test_emb = test_line[1:] self.assertTrue(np.allclose(embedder.embeddings.batch([test_id]).npvalue().tolist(), np.array(test_emb, dtype=float).tolist(), rtol=1e-5))
def run(self): seed = 13 random.seed(seed) np.random.seed(seed) EXP_DIR = os.path.dirname(__file__) EXP = "annot" model_file = f"{EXP_DIR}/results/{EXP}.mod" log_file = f"{EXP_DIR}/results/{EXP}.log" xnmt.tee.utils.dy.DynetParams().set_mem( 1024) #Doesnt work figure out how to set memory xnmt.tee.set_out_file(log_file, exp_name=EXP) ParamManager.init_param_col() ParamManager.param_col.model_file = model_file pre_runner = PreprocRunner( tasks=[ PreprocTokenize( in_files= [ #f'{EXP_DIR}/conala-corpus/conala-trainnodev.snippet', #f'{EXP_DIR}/conala-corpus/conala-trainnodev.intent', #f'{EXP_DIR}/conala-corpus/conala-dev.intent', #f'{EXP_DIR}/conala-corpus/conala-dev.snippet', #f'{EXP_DIR}/conala-corpus/conala-test.intent', #f'{EXP_DIR}/conala-corpus/conala-test.snippet', f'{EXP_DIR}/conala-corpus/attack_code_train.txt', f'{EXP_DIR}/conala-corpus/attack_text_train.txt', f'{EXP_DIR}/conala-corpus/attack_code_test.txt', f'{EXP_DIR}/conala-corpus/attack_text_test.txt' #f'{EXP_DIR}/conala-corpus/all.code', #f'{EXP_DIR}/conala-corpus/all.anno' ], out_files= [ #f'{EXP_DIR}/conala-corpus/conala-trainnodev.tmspm4000.snippet', #f'{EXP_DIR}/conala-corpus/conala-trainnodev.tmspm4000.intent', #f'{EXP_DIR}/conala-corpus/conala-dev.tmspm4000.intent', #f'{EXP_DIR}/conala-corpus/conala-dev.tmspm4000.snippet', #f'{EXP_DIR}/conala-corpus/conala-test.tmspm4000.intent', #f'{EXP_DIR}/conala-corpus/conala-test.tmspm4000.snippet', f'{EXP_DIR}/conala-corpus/attack-train.tmspm4000.snippet', f'{EXP_DIR}/conala-corpus/attack-train.tmspm4000.intent', f'{EXP_DIR}/conala-corpus/attack-test.tmspm4000.snippet', f'{EXP_DIR}/conala-corpus/attack-test.tmspm4000.intent' #f'{EXP_DIR}/conala-corpus/django.tmspm4000.snippet', #f'{EXP_DIR}/conala-corpus/django.tmspm4000.intent' ], specs=[{ 'filenum': 'all', 'tokenizers': [ SentencepieceTokenizer( hard_vocab_limit=False, train_files=[ f'{EXP_DIR}/conala-corpus/attack_text_train.txt', f'{EXP_DIR}/conala-corpus/attack_code_train.txt' ], vocab_size=self.vocab_size, model_type=self.model_type, model_prefix= 'conala-corpus/attack-train.tmspm4000.spm') ] }]), PreprocVocab( in_files=[ f'{EXP_DIR}/conala-corpus/attack-train.tmspm4000.intent', f'{EXP_DIR}/conala-corpus/attack-train.tmspm4000.snippet' ], out_files =[ f'{EXP_DIR}/conala-corpus/attack-train.tmspm4000.intent.vocab', f'{EXP_DIR}/conala-corpus/attack-train.tmspm4000.snippet.vocab' ], specs=[{ 'filenum': 'all', 'filters': [VocabFiltererFreq(min_freq=self.min_freq)] }]) ], overwrite=False) src_vocab = Vocab( vocab_file= f"{EXP_DIR}/conala-corpus/attack-train.tmspm4000.intent.vocab") trg_vocab = Vocab( vocab_file= f"{EXP_DIR}/conala-corpus/attack-train.tmspm4000.snippet.vocab") batcher = Batcher(batch_size=64) inference = AutoRegressiveInference(search_strategy=BeamSearch( len_norm=PolynomialNormalization(apply_during_search=True), beam_size=5), post_process='join-piece') layer_dim = self.layer_dim model = DefaultTranslator( src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab=src_vocab), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=self.layers), attender=MlpAttender(hidden_dim=layer_dim, state_dim=layer_dim, input_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab=trg_vocab), decoder=AutoRegressiveDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, ), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=len(trg_vocab), input_dim=layer_dim), trg_embed_dim=layer_dim, input_feeding=False, bridge=CopyBridge(dec_dim=layer_dim)), inference=inference) #decoder = AutoRegressiveDecoder(bridge=CopyBridge(),inference=inference)) train = SimpleTrainingRegimen( name=f"{EXP}", model=model, batcher=WordSrcBatcher(avg_batch_size=64), trainer=AdamTrainer(alpha=self.alpha), patience=3, lr_decay=0.5, restart_trainer=True, run_for_epochs=self.epochs, src_file=f"{EXP_DIR}/conala-corpus/attack-train.tmspm4000.intent", trg_file=f"{EXP_DIR}/conala-corpus/attack-train.tmspm4000.snippet", dev_tasks=[ LossEvalTask( src_file= f"{EXP_DIR}/conala-corpus/attack-test.tmspm4000.intent", ref_file= f'{EXP_DIR}/conala-corpus/attack-test.tmspm4000.snippet', model=model, batcher=WordSrcBatcher(avg_batch_size=64)), AccuracyEvalTask( eval_metrics='bleu', src_file= f'{EXP_DIR}/conala-corpus/attack-test.tmspm4000.intent', ref_file=f'{EXP_DIR}/conala-corpus/attack_text_test.txt', hyp_file=f'results/{EXP}.dev.hyp', model=model) ]) evaluate = [ AccuracyEvalTask( eval_metrics="bleu", #src_file=f"{EXP_DIR}/conala-corpus/conala-test.tmspm4000.intent", src_file= f"{EXP_DIR}/conala-corpus/attack-test.tmspm4000.intent", #ref_file=f"{EXP_DIR}/conala-corpus/all.code", #ref_file = f"{EXP_DIR}/conala-corpus/conala-test.snippet", ref_file=f"{EXP_DIR}/conala-corpus/attack_text_test.txt", hyp_file=f"results/{EXP}.test.hyp", inference=inference, model=model) ] standard_experiment = Experiment(exp_global=ExpGlobal( default_layer_dim=512, dropout=0.3, log_file=log_file, model_file=model_file), name="annot", model=model, train=train, evaluate=evaluate) # run experiment standard_experiment( save_fct=lambda: save_to_file(model_file, standard_experiment)) exit()
class TestTruncatedBatchTraining(unittest.TestCase): def setUp(self): xnmt.events.clear() ParamManager.init_param_col() self.src_reader = PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.ja.vocab")) self.trg_reader = PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.en.vocab")) self.src_data = list( self.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.trg_reader.read_sents("examples/data/head.en")) def assert_single_loss_equals_batch_loss(self, model, pad_src_to_multiple=1): """ Tests whether single loss equals batch loss. Truncating src / trg sents to same length so no masking is necessary """ batch_size = 5 src_sents = self.src_data[:batch_size] src_min = min([x.sent_len() for x in src_sents]) src_sents_trunc = [s.words[:src_min] for s in src_sents] for single_sent in src_sents_trunc: single_sent[src_min - 1] = Vocab.ES while len(single_sent) % pad_src_to_multiple != 0: single_sent.append(Vocab.ES) trg_sents = self.trg_data[:batch_size] trg_min = min([x.sent_len() for x in trg_sents]) trg_sents_trunc = [s.words[:trg_min] for s in trg_sents] for single_sent in trg_sents_trunc: single_sent[trg_min - 1] = Vocab.ES src_sents_trunc = [ sent.SimpleSentence(words=s) for s in src_sents_trunc ] trg_sents_trunc = [ sent.SimpleSentence(words=s) for s in trg_sents_trunc ] single_loss = 0.0 for sent_id in range(batch_size): dy.renew_cg() train_loss, _ = MLELoss().calc_loss( model=model, src=src_sents_trunc[sent_id], trg=trg_sents_trunc[sent_id]).compute() single_loss += train_loss.value() dy.renew_cg() batched_loss, _ = MLELoss().calc_loss( model=model, src=mark_as_batch(src_sents_trunc), trg=mark_as_batch(trg_sents_trunc)).compute() self.assertAlmostEqual(single_loss, np.sum(batched_loss.value()), places=4) def test_loss_model1(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.assert_single_loss_equals_batch_loss(model) def test_loss_model2(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4) def test_loss_model3(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.assert_single_loss_equals_batch_loss(model) def test_loss_model4(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=DotAttender(), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.assert_single_loss_equals_batch_loss(model)
class TestBatchTraining(unittest.TestCase): def setUp(self): xnmt.events.clear() ParamManager.init_param_col() self.src_reader = PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.ja.vocab")) self.trg_reader = PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.en.vocab")) self.src_data = list( self.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.trg_reader.read_sents("examples/data/head.en")) def assert_single_loss_equals_batch_loss(self, model, pad_src_to_multiple=1): """ Tests whether single loss equals batch loss. Here we don't truncate the target side and use masking. """ batch_size = 5 src_sents = self.src_data[:batch_size] src_min = min([x.sent_len() for x in src_sents]) src_sents_trunc = [s.words[:src_min] for s in src_sents] for single_sent in src_sents_trunc: single_sent[src_min - 1] = Vocab.ES while len(single_sent) % pad_src_to_multiple != 0: single_sent.append(Vocab.ES) trg_sents = sorted(self.trg_data[:batch_size], key=lambda x: x.sent_len(), reverse=True) trg_max = max([x.sent_len() for x in trg_sents]) np_arr = np.zeros([batch_size, trg_max]) for i in range(batch_size): for j in range(trg_sents[i].sent_len(), trg_max): np_arr[i, j] = 1.0 trg_masks = Mask(np_arr) trg_sents_padded = [[w for w in s] + [Vocab.ES] * (trg_max - s.sent_len()) for s in trg_sents] src_sents_trunc = [ sent.SimpleSentence(words=s) for s in src_sents_trunc ] trg_sents_padded = [ sent.SimpleSentence(words=s) for s in trg_sents_padded ] single_loss = 0.0 for sent_id in range(batch_size): dy.renew_cg() train_loss, _ = MLELoss().calc_loss( model=model, src=src_sents_trunc[sent_id], trg=trg_sents[sent_id]).compute() single_loss += train_loss.value() dy.renew_cg() batched_loss, _ = MLELoss().calc_loss( model=model, src=mark_as_batch(src_sents_trunc), trg=mark_as_batch(trg_sents_padded, trg_masks)).compute() self.assertAlmostEqual(single_loss, np.sum(batched_loss.value()), places=4) def test_loss_model1(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.assert_single_loss_equals_batch_loss(model) def test_loss_model2(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(layers=3, input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.assert_single_loss_equals_batch_loss(model, pad_src_to_multiple=4) def test_loss_model3(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(layers=3, input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(False) self.assert_single_loss_equals_batch_loss(model)
def setUp(self): events.clear() self.input_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.ja.vocab")) list(self.input_reader.read_sents('examples/data/head.ja')) ParamManager.init_param_col()
class TestEmbedder(unittest.TestCase): def setUp(self): # Seeding np.random.seed(2) random.seed(2) layer_dim = 4 xnmt.events.clear() ParamManager.init_param_col() self.src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") self.src_char_vocab = CharVocab(vocab_file="examples/data/head.ja.vocab") self.ngram_vocab = Vocab(vocab_file="examples/data/head.ngramcount.ja") self.trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab") self.src_reader = CharFromWordTextReader(vocab= self.src_vocab, char_vocab= self.src_char_vocab) self.trg_reader = PlainTextReader(vocab=self.trg_vocab) self.layer_dim = layer_dim self.src_data = list(self.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list(self.trg_reader.read_sents("examples/data/head.en")) self.src, self.trg = batchers.TrgBatcher(batch_size=3).pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True) def test_lookup_composer(self): embedder = LookupEmbedder(emb_dim=self.layer_dim, vocab_size=100) embedder.embed_sent(self.src[1]) embedder.embed(self.src[1][1][1]) def test_sum_composer(self): embedder = CharCompositionEmbedder(emb_dim=self.layer_dim, composer=SumComposer(), char_vocab=self.src_char_vocab) embedder.embed_sent(self.src[1]) def test_avg_composer(self): embedder = CharCompositionEmbedder(emb_dim=self.layer_dim, composer=AverageComposer(), char_vocab=self.src_char_vocab) embedder.embed_sent(self.src[1]) def test_max_composer(self): embedder = CharCompositionEmbedder(emb_dim=self.layer_dim, composer=MaxComposer(), char_vocab=self.src_char_vocab) embedder.embed_sent(self.src[1]) def test_conv_composer(self): composer = ConvolutionComposer(ngram_size=2, transform=NonLinear(self.layer_dim, self.layer_dim, activation="relu"), embed_dim=self.layer_dim, hidden_dim=self.layer_dim) embedder = CharCompositionEmbedder(emb_dim=self.layer_dim, composer=composer, char_vocab=self.src_char_vocab) embedder.embed_sent(self.src[1]) def test_transducer_composer(self): composer = SeqTransducerComposer(seq_transducer=BiLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim)) embedder = CharCompositionEmbedder(emb_dim=self.layer_dim, composer=composer, char_vocab=self.src_char_vocab) event_trigger.set_train(True) event_trigger.start_sent(self.src[1]) embedder.embed_sent(self.src[1]) def test_bagofwords_embedder(self): embedder = BagOfWordsEmbedder(self.layer_dim, char_vocab=self.src_char_vocab, ngram_vocab= self.ngram_vocab, ngram_size=3) event_trigger.set_train(True) event_trigger.start_sent(self.src[1]) embedder.embed_sent(self.src[1]) def test_bagofwords_embedder_with_word_vocab(self): embedder = BagOfWordsEmbedder(self.layer_dim, word_vocab=self.src_vocab, ngram_vocab= self.ngram_vocab, ngram_size=3) event_trigger.set_train(True) event_trigger.start_sent(self.src[1]) embedder.embed_sent(self.src[1]) def test_dyer_composer(self): composer = DyerHeadComposer(fwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim), bwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim), transform=AuxNonLinear(input_dim=self.layer_dim, output_dim=self.layer_dim, aux_input_dim=self.layer_dim)) embedder = CharCompositionEmbedder(emb_dim=self.layer_dim, composer=composer, char_vocab=self.src_char_vocab) event_trigger.set_train(True) event_trigger.start_sent(self.src[1]) embedder.embed_sent(self.src[1]) def test_composite_composer(self): composer = DyerHeadComposer(fwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim), bwd_combinator=UniLSTMSeqTransducer(input_dim=self.layer_dim, hidden_dim=self.layer_dim), transform=AuxNonLinear(input_dim=self.layer_dim, output_dim=self.layer_dim, aux_input_dim=self.layer_dim)) embedder_1 = CharCompositionEmbedder(emb_dim=self.layer_dim, composer=composer, char_vocab=self.src_char_vocab) embedder_2 = LookupEmbedder(emb_dim=self.layer_dim, vocab_size=100) embedder = CompositeEmbedder(embedders=[embedder_1, embedder_2]) event_trigger.set_train(True) event_trigger.start_sent(self.src[1]) embedder.embed_sent(self.src[1]) embedder.embed(self.src[1][0].words[0]) def test_segmented_word(self): a = SegmentedWord([1,2,3], 10) b = SegmentedWord([1,2,3], 10) c = SegmentedWord([2,3,4], 10) d = SegmentedWord([1,2,3], 9) self.assertEqual(a, b) self.assertEqual(a, [1,2,3]) self.assertEqual(a, 10) self.assertNotEqual(a, c) self.assertNotEqual(a, d) self.assertNotEqual(type(self.src[0][0][0]), SegmentedWord) self.assertEqual(type(self.src[0][0].words[0]), SegmentedWord)
xnmt.tee.set_out_file(log_file, EXP) ParamManager.init_param_col() ParamManager.param_col.model_file = model_file src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab") batcher = SrcBatcher(batch_size=64) inference = AutoRegressiveInference(batcher=InOrderBatcher(batch_size=1)) layer_dim = 512 model = DefaultTranslator( src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(src_vocab)), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1), attender=MlpAttender(hidden_dim=layer_dim, state_dim=layer_dim, input_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(trg_vocab)), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim,
class TestEncoder(unittest.TestCase): def setUp(self): events.clear() ParamManager.init_param_col() src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab") trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab") self.src_reader = PlainTextReader(vocab=src_vocab) self.trg_reader = PlainTextReader(vocab=trg_vocab) self.src_data = list( self.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.trg_reader.read_sents("examples/data/head.en")) def assert_in_out_len_equal(self, model): dy.renew_cg() event_trigger.set_train(True) src = self.src_data[0] event_trigger.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder.transduce(embeddings) self.assertEqual(len(embeddings), len(encodings)) def test_bi_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.assert_in_out_len_equal(model) def test_uni_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.assert_in_out_len_equal(model) # TODO: Update this to the new residual LSTM transducer framework # def test_res_lstm_encoder_len(self): # layer_dim = 512 # model = DefaultTranslator( # src_reader=self.src_reader, # trg_reader=self.trg_reader, # src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), # encoder=ResidualLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), # attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), # decoder=AutoRegressiveDecoder(input_dim=layer_dim, # embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), # rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), # transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), # scorer=Softmax(input_dim=layer_dim, vocab_size=100), # bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), # ) # self.assert_in_out_len_equal(model) def test_py_lstm_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=3), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(True) for sent_i in range(10): dy.renew_cg() src = self.src_data[sent_i].create_padded_sent( 4 - (self.src_data[sent_i].sent_len() % 4)) event_trigger.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder.transduce(embeddings) self.assertEqual(int(math.ceil(len(embeddings) / float(4))), len(encodings)) def test_py_lstm_mask(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=PyramidalLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) batcher = batchers.TrgBatcher(batch_size=3) train_src, _ = \ batcher.pack(self.src_data, self.trg_data) event_trigger.set_train(True) for sent_i in range(3): dy.renew_cg() src = train_src[sent_i] event_trigger.start_sent(src) embeddings = model.src_embedder.embed_sent(src) encodings = model.encoder.transduce(embeddings) if train_src[sent_i].mask is None: assert encodings.mask is None else: np.testing.assert_array_almost_equal( train_src[sent_i].mask.np_arr, encodings.mask.np_arr) def test_multihead_attention_encoder_len(self): layer_dim = 512 model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=MultiHeadAttentionSeqTransducer(input_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim * 2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.assert_in_out_len_equal(model)