def test(opt): log = helpers.Logger(opt.verbose) timer = helpers.Timer() # Load data ========================================================= log.info('Reading corpora') # Read vocabs widss, ids2ws, widst, ids2wt = helpers.get_dictionaries(opt, test=True) # Read test tests_data = np.asarray(data.read_corpus(opt.test_src, widss), dtype=list) # Test output if not opt.test_out: opt.test_out = helpers.exp_filename(opt, 'test.out') # Get target language model lang_model = helpers.get_language_model(opt, None, widst, test=True) # Create model ====================================================== log.info('Creating model') s2s = helpers.build_model(opt, widss, widst, lang_model, test=True) # Print configuration =============================================== if opt.verbose: options.print_config(opt, src_dict_size=len(widss), trg_dict_size=len(widst)) # Start testing ===================================================== log.info('Start running on test set, buckle up!') timer.restart() translations = [] s2s.set_test_mode() for i, x in enumerate(tests_data): y = s2s.translate(x, beam_size=opt.beam_size) translations.append(' '.join([ids2wt[w] for w in y[1:-1]])) np.savetxt(opt.test_out, translations, fmt='%s') translations = np.asarray(translations, dtype=str) BLEU, details = evaluation.bleu_score(opt.test_dst, opt.test_out) log.info('Finished running on test set %.2f elapsed.' % timer.tick()) log.info(details)
def test(opt): # Load data ========================================================= if opt.verbose: print('Reading corpora') # Read vocabs if opt.dic_src: widss, ids2ws = data.load_dic(opt.dic_src) else: widss, ids2ws = data.read_dic(opt.train_src, max_size=opt.src_vocab_size) data.save_dic(opt.exp_name + '_src_dic.txt', widss) if opt.dic_dst: widst, ids2wt = data.load_dic(opt.dic_dst) else: widst, ids2wt = data.read_dic(opt.train_dst, max_size=opt.trg_vocab_size) data.save_dic(opt.exp_name + '_trg_dic.txt', widst) # Read test tests_data = data.read_corpus(opt.test_src, widss) # Create model ====================================================== if opt.verbose: print('Creating model') sys.stdout.flush() s2s = seq2seq.Seq2SeqModel(opt.emb_dim, opt.hidden_dim, opt.att_dim, widss, widst, model_file=opt.model, bidir=opt.bidir, word_emb=opt.word_emb, dropout=opt.dropout_rate, max_len=opt.max_len) if s2s.model_file is not None: s2s.load() s2s.model_file = opt.exp_name + '_model' # Print configuration =============================================== if opt.verbose: options.print_config(opt, src_dict_size=len(widss), trg_dict_size=len(widst)) sys.stdout.flush() # Start testing ===================================================== print('Start running on test set, buckle up!') sys.stdout.flush() test_start = time.time() with open(opt.test_out, 'w+') as of: for x in tests_data: y = s2s.translate(x, beam_size=opt.beam_size) translation = ' '.join([ids2wt[w] for w in y[1:-1]]) of.write(translation+'\n') _, details = evaluation.bleu_score(opt.test_dst, opt.test_out) test_elapsed = time.time()-test_start print('Finished running on test set', test_elapsed, 'elapsed.') print(details) sys.stdout.flush()
import data # Retrieve command line options opt = options.rnn_parser().parse_args() # if opt.model_out is None: if opt.model_in is None: opt.model_out = opt.exp_name + '_model.txt' else: opt.model_out = opt.model_in else: model_out = opt.model_out # print config if opt.verbose: options.print_config(opt) # Load data N_train = 55000 N_dev = 5000 N_test = 10000 train_x, dev_x, test_x, train_y, dev_y, test_y = data.get_mnist(N_dev, shuffle=opt.shuffle, preprocessing=data.whiten) # Model parameters num_classes = len(set(train_y)) # Number of classes input_length = train_x.shape[1] # Dimension of the input dh= opt.hidden_dim di= 1 # Create model model = dy.Model() # DyNet Model
def train(opt): # Load data ========================================================= if opt.verbose: print('Reading corpora') # Read vocabs if opt.dic_src: widss, ids2ws = data.load_dic(opt.dic_src) else: widss, ids2ws = data.read_dic(opt.train_src, max_size=opt.src_vocab_size) data.save_dic(opt.exp_name + '_src_dic.txt', widss) if opt.dic_dst: widst, ids2wt = data.load_dic(opt.dic_dst) else: widst, ids2wt = data.read_dic(opt.train_dst, max_size=opt.trg_vocab_size) data.save_dic(opt.exp_name + '_trg_dic.txt', widst) # Read training trainings_data = data.read_corpus(opt.train_src, widss) trainingt_data = data.read_corpus(opt.train_dst, widst) # Read validation valids_data = data.read_corpus(opt.valid_src, widss) validt_data = data.read_corpus(opt.valid_dst, widst) # Create model ====================================================== if opt.verbose: print('Creating model') sys.stdout.flush() s2s = seq2seq.Seq2SeqModel(opt.emb_dim, opt.hidden_dim, opt.att_dim, widss, widst, model_file=opt.model, bidir=opt.bidir, word_emb=opt.word_emb, dropout=opt.dropout_rate, max_len=opt.max_len) if s2s.model_file is not None: s2s.load() s2s.model_file = opt.exp_name+'_model.txt' # Trainer ========================================================== if opt.trainer == 'sgd': trainer = dy.SimpleSGDTrainer( s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) if opt.trainer == 'clr': trainer = dy.CyclicalSGDTrainer(s2s.model, e0_min=opt.learning_rate / 10, e0_max=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'momentum': trainer = dy.MomentumSGDTrainer( s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'rmsprop': trainer = dy.RMSPropTrainer(s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'adam': trainer = dy.AdamTrainer(s2s.model, opt.learning_rate, edecay=opt.learning_rate_decay) else: print('Trainer name invalid or not provided, using SGD', file=sys.stderr) trainer = dy.SimpleSGDTrainer( s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) if opt.verbose: print('Using '+opt.trainer+' optimizer') trainer.set_clip_threshold(opt.gradient_clip) # Print configuration =============================================== if opt.verbose: options.print_config(opt, src_dict_size=len(widss), trg_dict_size=len(widst)) sys.stdout.flush() # Creat batch loaders =============================================== if opt.verbose: print('Creating batch loaders') sys.stdout.flush() trainbatchloader = data.BatchLoader(trainings_data, trainingt_data, opt.batch_size) devbatchloader = data.BatchLoader(valids_data, validt_data, opt.dev_batch_size) # Start training ==================================================== if opt.verbose: print('starting training') sys.stdout.flush() start = time.time() train_loss = 0 processed = 0 best_bleu = 0 i = 0 for epoch in range(opt.num_epochs): for x, y in trainbatchloader: processed += sum(map(len, y)) bsize = len(y) # Compute loss loss = s2s.calculate_loss(x, y) # Backward pass and parameter update loss.backward() trainer.update() train_loss += loss.scalar_value() * bsize if (i+1) % opt.check_train_error_every == 0: # Check average training error from time to time logloss = train_loss / processed ppl = np.exp(logloss) elapsed = time.time()-start trainer.status() print(" Training_loss=%f, ppl=%f, time=%f s, tokens processed=%d" % (logloss, ppl, elapsed, processed)) start = time.time() train_loss = 0 processed = 0 sys.stdout.flush() if (i+1) % opt.check_valid_error_every == 0: # Check generalization error on the validation set from time to time dev_loss = 0 dev_processed = 0 dev_start = time.time() for x, y in devbatchloader: dev_processed += sum(map(len, y)) bsize = len(y) loss = s2s.calculate_loss(x, y, test=True) dev_loss += loss.scalar_value() * bsize dev_logloss = dev_loss/dev_processed dev_ppl = np.exp(dev_logloss) dev_elapsed = time.time()-dev_start print("[epoch %d] Dev loss=%f, ppl=%f, time=%f s, tokens processed=%d" % (epoch, dev_logloss, dev_ppl, dev_elapsed, dev_processed)) sys.stdout.flush() start = time.time() if (i+1) % opt.valid_bleu_every == 0: # Check BLEU score on the validation set from time to time print('Start translating validation set, buckle up!') sys.stdout.flush() bleu_start = time.time() with open(opt.valid_out, 'w+') as f: for x in valids_data: y_hat = s2s.translate(x, beam_size=opt.beam_size) translation = [ids2wt[w] for w in y_hat[1:-1]] print(' '.join(translation), file=f) bleu, details = evaluation.bleu_score(opt.valid_dst, opt.valid_out) bleu_elapsed = time.time()-bleu_start print('Finished translating validation set', bleu_elapsed, 'elapsed.') print(details) # Early stopping : save the latest best model if bleu > best_bleu: best_bleu = bleu print('Best BLEU score up to date, saving model to', s2s.model_file) s2s.save() sys.stdout.flush() start = time.time() i = i+1 trainer.update_epoch()
def eval_user_adaptation(opt): log = utils.Logger(opt.verbose) timer = utils.Timer() # Read vocabs lexicon = helpers.get_lexicon(opt) # Read data filepairs = load_user_filepairs(opt.usr_file_list) # Get target language model lang_model = None # Load model s2s = helpers.build_model(opt, lexicon, lang_model, test=True) if opt.update_mode == 'mixture_weights' and not opt.user_recognizer == 'fact_voc': log.info('Updating only the mixture weights doesn\'t make sense here') exit() s2s.lm = lexicon.trg_unigrams # s2s.freeze_parameters() # Trainer trainer = helpers.get_trainer(opt, s2s) # print config if opt.verbose: options.print_config(opt, src_dict_size=len(lexicon.w2ids), trg_dict_size=len(lexicon.w2idt)) # This will store translations and gold sentences base_translations = [] adapt_translations = [] gold = [] # Run training for usr_id, (src_file, trg_file) in enumerate(filepairs): log.info('Evaluating on files %s' % os.path.basename(src_file).split()[0]) # Load file pair src_data = data.read_corpus(src_file, lexicon.w2ids, raw=True) trg_data = data.read_corpus(trg_file, lexicon.w2idt, raw=True) # split train/test train_src, test_src, train_trg, test_trg, order = split_user_data( src_data, trg_data, n_test=opt.n_test) # Convert train data to indices train_src = lexicon.sents_to_ids(train_src) train_trg = lexicon.sents_to_ids(train_trg, trg=True) # Save test data for s in test_trg: gold.append(' '.join(s)) # Reset model s2s.load() s2s.reset_usr_vec() # Translate with baseline model base_translations.extend(evaluate_model(s2s, test_src, opt.beam_size)) # Start loop n_train = opt.max_n_train adapt_translations.extend( adapt_user(s2s, trainer, train_src[:n_train], train_trg[:n_train], test_src, opt)) # Temp files temp_gold = utils.exp_temp_filename(opt, 'gold.txt') temp_base = utils.exp_temp_filename(opt, '%s_base.txt' % opt.update_mode) temp_adapt = utils.exp_temp_filename(opt, '%s_adapt.txt' % opt.update_mode) utils.savetxt(temp_gold, gold) utils.savetxt(temp_base, base_translations) utils.savetxt(temp_adapt, adapt_translations) # Evaluate base translations bleu, details = evaluation.bleu_score(temp_gold, temp_base) log.info('Base BLEU score: %.2f' % bleu) # Evaluate base translations bleu, details = evaluation.bleu_score(temp_gold, temp_adapt) log.info('Adaptation BLEU score: %.2f' % bleu) # Compare both temp_bootstrap_gold = utils.exp_temp_filename(opt, 'bootstrap_gold.txt') temp_bootstrap_base = utils.exp_temp_filename(opt, 'bootstrap_base.txt') temp_bootstrap_adapt = utils.exp_temp_filename(opt, 'bootstrap_adapt.txt') bleus = evaluation.paired_bootstrap_resampling( temp_gold, temp_base, temp_adapt, opt.bootstrap_num_samples, opt.bootstrap_sample_size, temp_bootstrap_gold, temp_bootstrap_base, temp_bootstrap_adapt) evaluation.print_paired_stats(bleus) os.remove(temp_bootstrap_gold) os.remove(temp_bootstrap_base) os.remove(temp_bootstrap_adapt)
def train(opt): log = helpers.Logger(opt.verbose) timer = helpers.Timer() # Load data ========================================================= log.info('Reading corpora') # Read vocabs widss, ids2ws, widst, ids2wt = helpers.get_dictionaries(opt) # Read training trainings_data = data.read_corpus(opt.train_src, widss) trainingt_data = data.read_corpus(opt.train_dst, widst) # Read validation valids_data = data.read_corpus(opt.valid_src, widss) validt_data = data.read_corpus(opt.valid_dst, widst) # Validation output if not opt.valid_out: opt.valid_out = helpers.exp_filename(opt, 'valid.out') # Get target language model lang_model = helpers.get_language_model(opt, trainingt_data, widst) # Create model ====================================================== log.info('Creating model') s2s = helpers.build_model(opt, widss, widst, lang_model) # Trainer ========================================================== trainer = helpers.get_trainer(opt, s2s) log.info('Using ' + opt.trainer + ' optimizer') # Print configuration =============================================== if opt.verbose: options.print_config(opt, src_dict_size=len(widss), trg_dict_size=len(widst)) # Creat batch loaders =============================================== log.info('Creating batch loaders') trainbatchloader = data.BatchLoader(trainings_data, trainingt_data, opt.batch_size) devbatchloader = data.BatchLoader(valids_data, validt_data, opt.dev_batch_size) # Start training ==================================================== log.info('starting training') timer.restart() train_loss = 0 processed = 0 best_bleu = -1 deadline = 0 i = 0 for epoch in range(opt.num_epochs): for x, y in trainbatchloader: s2s.set_train_mode() processed += sum(map(len, y)) bsize = len(y) # Compute loss loss = s2s.calculate_loss(x, y) # Backward pass and parameter update loss.backward() trainer.update() train_loss += loss.scalar_value() * bsize if (i + 1) % opt.check_train_error_every == 0: # Check average training error from time to time logloss = train_loss / processed ppl = np.exp(logloss) trainer.status() log.info( " Training_loss=%f, ppl=%f, time=%f s, tokens processed=%d" % (logloss, ppl, timer.tick(), processed)) train_loss = 0 processed = 0 if (i + 1) % opt.check_valid_error_every == 0: # Check generalization error on the validation set from time to time s2s.set_test_mode() dev_loss = 0 dev_processed = 0 timer.restart() for x, y in devbatchloader: dev_processed += sum(map(len, y)) bsize = len(y) loss = s2s.calculate_loss(x, y, test=True) dev_loss += loss.scalar_value() * bsize dev_logloss = dev_loss / dev_processed dev_ppl = np.exp(dev_logloss) log.info( "[epoch %d] Dev loss=%f, ppl=%f, time=%f s, tokens processed=%d" % (epoch, dev_logloss, dev_ppl, timer.tick(), dev_processed)) if (i + 1) % opt.valid_bleu_every == 0: # Check BLEU score on the validation set from time to time s2s.set_test_mode() log.info('Start translating validation set, buckle up!') timer.restart() with open(opt.valid_out, 'w+') as f: for x in valids_data: y_hat = s2s.translate(x, beam_size=opt.beam_size) translation = [ids2wt[w] for w in y_hat[1:-1]] print(' '.join(translation), file=f) bleu, details = evaluation.bleu_score(opt.valid_dst, opt.valid_out) log.info('Finished translating validation set %.2f elapsed.' % timer.tick()) log.info(details) # Early stopping : save the latest best model if bleu > best_bleu: best_bleu = bleu log.info('Best BLEU score up to date, saving model to %s' % s2s.model_file) s2s.save() deadline = 0 else: deadline += 1 if opt.patience > 0 and deadline > opt.patience: log.info('No improvement since %d epochs, early stopping ' 'with best validation BLEU score: %.3f' % (deadline, best_bleu)) exit() i = i + 1 trainer.update_epoch()
def eval_user_adaptation(opt): log = utils.Logger(opt.verbose) timer = utils.Timer() # Read vocabs lexicon = helpers.get_lexicon(opt) # Read data filepairs = load_user_filepairs(opt.usr_file_list) # Get target language model lang_model = None # Load model s2s = helpers.build_model(opt, lexicon, lang_model, test=True) #if not opt.full_training: # s2s.freeze_parameters() # Trainer trainer = helpers.get_trainer(opt, s2s) # print config if opt.verbose: options.print_config(opt, src_dict_size=len(lexicon.w2ids), trg_dict_size=len(lexicon.w2idt)) # This will store translations and gold sentences translations = dict([(i, []) for i in range(opt.min_n_train, opt.max_n_train)]) gold = [] # Run training for usr_id, (src_file, trg_file) in enumerate(filepairs): log.info('Evaluating on files %s' % os.path.basename(src_file).split()[0]) # Load file pair src_data = data.read_corpus(src_file, lexicon.w2ids, raw=True) trg_data = data.read_corpus(trg_file, lexicon.w2idt, raw=True) # split train/test train_src, test_src, train_trg, test_trg, order = split_user_data( src_data, trg_data, n_test=opt.n_test) # Convert train data to indices train_src = lexicon.sents_to_ids(train_src) train_trg = lexicon.sents_to_ids(train_trg, trg=True) # Save test data for s in test_trg: gold.append(' '.join(s)) # Start loop for n_train in range(opt.min_n_train, opt.max_n_train): log.info('Training on %d sentence pairs' % n_train) # Train on n_train first sentences X, Y = train_src[:n_train], train_trg[:n_train] temp_out = utils.exp_temp_filename(opt, str(n_train) + 'out.txt') if opt.full_training: s2s.load() if opt.log_unigram_bias: if opt.use_trg_unigrams: unigrams = lexicon.compute_unigrams(Y, lang='trg') else: unigrams = lexicon.estimate_unigrams(X) log_unigrams = np.log(unigrams + opt.log_unigrams_eps) s2s.reset_usr_vec(log_unigrams) elif n_train > 0: adapt(s2s, trainer, X, Y, opt.num_epochs, opt.check_train_error_every) log.info('Translating test file') s2s.set_test_mode() # Test on test split for x in test_src: y_hat = s2s.translate(x, 0, beam_size=opt.beam_size) translations[n_train].append(y_hat) # Temp files temp_gold = utils.exp_temp_filename(opt, 'gold.txt') np.savetxt(temp_gold, gold, fmt='%s') # Results test_bleus = np.zeros(opt.max_n_train - opt.min_n_train) for n_train in range(opt.min_n_train, opt.max_n_train): log.info('Evaluation for %d sentence pairs' % n_train) temp_out = utils.exp_temp_filename(opt, str(n_train) + 'out.txt') temp_bootstrap_out = utils.exp_temp_filename( opt, str(n_train) + '_bootstrap_out.txt') temp_bootstrap_ref = utils.exp_temp_filename( opt, str(n_train) + '_bootstrap_ref.txt') np.savetxt(temp_out, translations[n_train], fmt='%s') bleu, details = evaluation.bleu_score(temp_gold, temp_out) log.info('BLEU score: %.2f' % bleu) bleus = evaluation.bootstrap_resampling(temp_gold, temp_out, opt.bootstrap_num_samples, opt.bootstrap_sample_size, temp_bootstrap_ref, temp_bootstrap_out) evaluation.print_stats(bleus) test_bleus[n_train - opt.min_n_train] = bleu np.savetxt(utils.exp_filename(opt, 'bleu_scores.txt'), test_bleus, fmt='%.3f')