Beispiel #1
0
    def __init__(self, pc, model_hyperparams, best_model_path=None):

        self.hyperparams = model_hyperparams

        print 'Loading vocabulary from {}:'.format(
            self.hyperparams['VOCAB_PATH'])
        self.vocab = Vocab.from_file(self.hyperparams['VOCAB_PATH'])
        print 'Loading feature vocabulary from {}:'.format(
            self.hyperparams['FEAT_VOCAB_PATH'])
        self.feat_vocab = Vocab.from_file(self.hyperparams['FEAT_VOCAB_PATH'])

        self.BEGIN = self.vocab.w2i[BEGIN_CHAR]
        self.STOP = self.vocab.w2i[STOP_CHAR]
        self.UNK = self.vocab.w2i[UNK_CHAR]
        self.hyperparams['VOCAB_SIZE'] = self.vocab.size()
        self.hyperparams['FEAT_VOCAB_SIZE'] = self.feat_vocab.size()

        self.build_model(pc, best_model_path)

        print 'Model Hypoparameters:'
        for k, v in self.hyperparams.items():
            print '{:20} = {}'.format(k, v)
        print
    lm_model_params = []
    ## loading the language models
    for i,(lm_type,path,order) in enumerate(\
		zip(\
		    arguments['--lm_predictors'].split(','),\
		    arguments["--lm_paths"].split(','),\
		    [int(o) for o in arguments["--lm_orders"].split(',')]\
		)):
        lm_model_folder =  check_path(path, 'LM_MODEL_FOLDER_{}'.format(i), is_data_path=False)
        if lm_type=="srilm_char":
            print '...Loading lm model {} from path {}'.format(i,lm_model_folder)
            lm_model =  SRILM_char_lm_loader(lm_model_folder, order)
        elif lm_type=="srilm_morph":
            lm_model = SRILM_morpheme_lm_loader(lm_model_folder,order)
            assert arguments['--morph_vocab'] != None
            lm_model.vocab = Vocab.from_file(check_path(arguments['--morph_vocab'], 'morph_vocab', is_data_path=False))
        else:
            print "WARNING -- Could not load language model. Unknown type",lm_type,". Use 'srilm_char' or 'srilm_morph'"
        lm_models.append(lm_model)
    lm_number  = len(lm_models)

    output_file_path = os.path.join(model_folder,arguments['--pred_path'])

    # save best dev model parameters and predictions
    print 'Evaluating on test..'
    t = time.clock()
    if arguments['--indices']:
        indices = [int(ind) for ind in arguments['--indices'].split(',')]
        accuracy, test_results = evaluate_syncbeam(test_data.iter(indices), nmt_models, lm_models, weights, int(arguments['--beam']), int(arguments['--output_format']), verbose=arguments['--verbose'], exclude_eow=arguments['--exclude_eow'])
    else:
        accuracy, test_results = evaluate_syncbeam(test_data.iter(), nmt_models, lm_models, weights, int(arguments['--beam']), int(arguments['--output_format']), verbose=arguments['--verbose'], exclude_eow=arguments['--exclude_eow'])