def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 4 xnmt.events.clear() ParamManager.init_param_col() self.segment_encoder_bilstm = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim) self.segment_composer = SumComposer() self.src_reader = CharFromWordTextReader(vocab=Vocab( vocab_file="examples/data/head.ja.charvocab")) self.trg_reader = PlainTextReader(vocab=Vocab( vocab_file="examples/data/head.en.vocab")) self.loss_calculator = FeedbackLoss(child_loss=MLELoss(), repeat=5) baseline = Linear(input_dim=layer_dim, output_dim=1) policy_network = Linear(input_dim=layer_dim, output_dim=2) self.poisson_prior = PoissonPrior(mu=3.3) self.eps_greedy = EpsilonGreedy(eps_prob=0.0, prior=self.poisson_prior) self.conf_penalty = ConfidencePenalty() self.policy_gradient = PolicyGradient(input_dim=layer_dim, output_dim=2, baseline=baseline, policy_network=policy_network, z_normalization=True, conf_penalty=self.conf_penalty) self.length_prior = PoissonLengthPrior(lmbd=3.3, weight=1) self.segmenting_encoder = SegmentingSeqTransducer( embed_encoder=self.segment_encoder_bilstm, segment_composer=self.segment_composer, final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), policy_learning=self.policy_gradient, eps_greedy=self.eps_greedy, length_prior=self.length_prior, ) self.model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=self.segmenting_encoder, attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=100, input_dim=layer_dim), embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) event_trigger.set_train(True) self.layer_dim = layer_dim self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) my_batcher = batchers.TrgBatcher(batch_size=3) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)
src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(src_vocab)), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=1), attender=MlpAttender(hidden_dim=layer_dim, state_dim=layer_dim, input_dim=layer_dim), decoder=AutoRegressiveDecoder( input_dim=layer_dim, embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(trg_vocab)), rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=len(trg_vocab), input_dim=layer_dim), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), inference=inference) train = SimpleTrainingRegimen( name=f"{EXP}", model=model, batcher=batcher, trainer=AdamTrainer(alpha=0.001), run_for_epochs=2, src_file="examples/data/head.ja",
def run(self): seed = 13 random.seed(seed) np.random.seed(seed) EXP_DIR = os.path.dirname(__file__) EXP = "annotmined" model_file = f"{EXP_DIR}/results/{EXP}.mod" log_file = f"{EXP_DIR}/results/{EXP}.log" xnmt.tee.set_out_file(log_file, exp_name=EXP) xnmt.tee.utils.dy.DynetParams().set_mem( 1024) #Doesnt work figure out how to set memory ParamManager.init_param_col() ParamManager.param_col.model_file = model_file pre_runner = PreprocRunner(tasks=[ PreprocTokenize( in_files=[ f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.snippet', f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.intent', f'{EXP_DIR}/conala-corpus/conala-dev.intent', f'{EXP_DIR}/conala-corpus/conala-dev.snippet', f'{EXP_DIR}/conala-corpus/conala-test.intent', f'{EXP_DIR}/conala-corpus/conala-test.snippet' ], out_files=[ f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.tmspm16000.snippet', f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.tmspm16000.intent', f'{EXP_DIR}/conala-corpus/conala-dev.tmspm16000.intent', f'{EXP_DIR}/conala-corpus/conala-dev.tmspm16000.snippet', f'{EXP_DIR}/conala-corpus/conala-test.tmspm16000.intent', f'{EXP_DIR}/conala-corpus/conala-test.tmspm16000.snippet' ], specs=[{ 'filenum': 'all', 'tokenizers': [ SentencepieceTokenizer( hard_vocab_limit=False, train_files=[ f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.intent', f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.snippet' ], vocab_size=self.vocab_size, model_type=self.model_type, model_prefix='conala-corpus/' + self.mined_data + '.tmspm16000.spm') ] }]), PreprocVocab(in_files=[ f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.tmspm16000.intent', f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.tmspm16000.snippet' ], out_files=[ f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.tmspm16000.intent.vocab', f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.tmspm16000.snippet.vocab' ], specs=[{ 'filenum': 'all', 'filters': [VocabFiltererFreq(min_freq=self.min_freq)] }]) ], overwrite=False) src_vocab = Vocab(vocab_file=f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.tmspm16000.intent.vocab') trg_vocab = Vocab(vocab_file=f'{EXP_DIR}/conala-corpus/' + self.mined_data + '.tmspm16000.snippet.vocab') batcher = Batcher(batch_size=64) inference = AutoRegressiveInference(search_strategy=BeamSearch( len_norm=PolynomialNormalization(apply_during_search=True), beam_size=5), post_process='join-piece') layer_dim = self.layer_dim if self.embedding == 'SimpleWordEmbedding': model = DefaultTranslator( src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(src_vocab)), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=self.layers), attender=MlpAttender(hidden_dim=layer_dim, state_dim=layer_dim, input_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=len(trg_vocab)), decoder=AutoRegressiveDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, ), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=len(trg_vocab), input_dim=layer_dim), trg_embed_dim=layer_dim, input_feeding=False, bridge=CopyBridge(dec_dim=layer_dim)), inference=inference) else: model = DefaultTranslator( src_reader=PlainTextReader(vocab=src_vocab), trg_reader=PlainTextReader(vocab=trg_vocab), src_embedder=PretrainedSimpleWordEmbedder( filename=self.embedding, emb_dim=100, vocab=src_vocab), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, layers=self.layers), attender=MlpAttender(hidden_dim=layer_dim, state_dim=layer_dim, input_dim=layer_dim), trg_embedder=PretrainedSimpleWordEmbedder( filename=self.trg_embedding, emb_dim=100, vocab=trg_vocab), decoder=AutoRegressiveDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, ), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=len(trg_vocab), input_dim=layer_dim), trg_embed_dim=layer_dim, input_feeding=False, bridge=CopyBridge(dec_dim=layer_dim)), inference=inference) #decoder = AutoRegressiveDecoder(bridge=CopyBridge(),inference=inference)) train = SimpleTrainingRegimen( name=f"{EXP}", model=model, batcher=WordSrcBatcher(avg_batch_size=64), trainer=AdamTrainer(alpha=self.alpha), patience=3, lr_decay=0.5, restart_trainer=True, run_for_epochs=self.epochs, src_file=f"{EXP_DIR}/conala-corpus/" + self.mined_data + ".tmspm16000.intent", trg_file=f"{EXP_DIR}/conala-corpus/" + self.mined_data + ".tmspm16000.snippet", dev_tasks=[ LossEvalTask( src_file= f"{EXP_DIR}/conala-corpus/conala-dev.tmspm16000.intent", ref_file= f'{EXP_DIR}/conala-corpus/conala-dev.tmspm16000.snippet', model=model, batcher=WordSrcBatcher(avg_batch_size=64)), AccuracyEvalTask( eval_metrics='bleu', src_file= f'{EXP_DIR}/conala-corpus/conala-dev.tmspm16000.intent', ref_file=f'{EXP_DIR}/conala-corpus/conala-dev.snippet', hyp_file=f'results/{EXP}.dev.hyp', model=model) ]) evaluate = [ AccuracyEvalTask( eval_metrics="bleu", src_file= f"{EXP_DIR}/conala-corpus/conala-test.tmspm16000.intent", ref_file=f"{EXP_DIR}/conala-corpus/conala-test.snippet", hyp_file=f"results/{EXP}.test.hyp", inference=inference, model=model) ] standard_experiment = Experiment(exp_global=ExpGlobal( default_layer_dim=layer_dim, dropout=0.3, log_file=log_file, model_file=model_file), name="annotmined", model=model, train=train, evaluate=evaluate) # run experiment standard_experiment( save_fct=lambda: save_to_file(model_file, standard_experiment)) exit()