def __init__(self, pc, model_hyperparams, best_model_path=None): self.hyperparams = model_hyperparams print 'Loading vocabulary from {}:'.format( self.hyperparams['VOCAB_PATH']) self.vocab = Vocab.from_file(self.hyperparams['VOCAB_PATH']) print 'Loading feature vocabulary from {}:'.format( self.hyperparams['FEAT_VOCAB_PATH']) self.feat_vocab = Vocab.from_file(self.hyperparams['FEAT_VOCAB_PATH']) self.BEGIN = self.vocab.w2i[BEGIN_CHAR] self.STOP = self.vocab.w2i[STOP_CHAR] self.UNK = self.vocab.w2i[UNK_CHAR] self.hyperparams['VOCAB_SIZE'] = self.vocab.size() self.hyperparams['FEAT_VOCAB_SIZE'] = self.feat_vocab.size() self.build_model(pc, best_model_path) print 'Model Hypoparameters:' for k, v in self.hyperparams.items(): print '{:20} = {}'.format(k, v) print
lm_model_params = [] ## loading the language models for i,(lm_type,path,order) in enumerate(\ zip(\ arguments['--lm_predictors'].split(','),\ arguments["--lm_paths"].split(','),\ [int(o) for o in arguments["--lm_orders"].split(',')]\ )): lm_model_folder = check_path(path, 'LM_MODEL_FOLDER_{}'.format(i), is_data_path=False) if lm_type=="srilm_char": print '...Loading lm model {} from path {}'.format(i,lm_model_folder) lm_model = SRILM_char_lm_loader(lm_model_folder, order) elif lm_type=="srilm_morph": lm_model = SRILM_morpheme_lm_loader(lm_model_folder,order) assert arguments['--morph_vocab'] != None lm_model.vocab = Vocab.from_file(check_path(arguments['--morph_vocab'], 'morph_vocab', is_data_path=False)) else: print "WARNING -- Could not load language model. Unknown type",lm_type,". Use 'srilm_char' or 'srilm_morph'" lm_models.append(lm_model) lm_number = len(lm_models) output_file_path = os.path.join(model_folder,arguments['--pred_path']) # save best dev model parameters and predictions print 'Evaluating on test..' t = time.clock() if arguments['--indices']: indices = [int(ind) for ind in arguments['--indices'].split(',')] accuracy, test_results = evaluate_syncbeam(test_data.iter(indices), nmt_models, lm_models, weights, int(arguments['--beam']), int(arguments['--output_format']), verbose=arguments['--verbose'], exclude_eow=arguments['--exclude_eow']) else: accuracy, test_results = evaluate_syncbeam(test_data.iter(), nmt_models, lm_models, weights, int(arguments['--beam']), int(arguments['--output_format']), verbose=arguments['--verbose'], exclude_eow=arguments['--exclude_eow'])