lm_models= []
    lm_model_params = []
    ## loading the language models
    for i,(lm_type,path,order) in enumerate(\
		zip(\
		    arguments['--lm_predictors'].split(','),\
		    arguments["--lm_path"].split(','),\
		    [int(o) for o in arguments["--lm_order"].split(',')]\
		)):
        print '...Loading lm model {}'.format(i)
        lm_model_folder =  check_path(path, 'LM_MODEL_FOLDER_{}'.format(i), is_data_path=False)
	if lm_type=="srilm_char":
        	lm_model =  SRILM_char_lm_loader(path, order)
	elif lm_type=="srilm_morph":
		lm_model = SRILM_morpheme_lm_loader(path,order)
	else:
		print "WARNING -- Could not load language model. Unknown type",lm_type,". Use 'srilm_char' or 'srilm_morph'"
        lm_models.append(lm_model)
    lm_number  = len(lm_models)

    output_file_path = os.path.join(model_folder,arguments['--pred_path'])

    # save best dev model parameters and predictions
    print 'Evaluating on test..'
    t = time.clock()
    accuracy, test_results = evaluate_syncbeam(test_data.iter(indices=[0]), ed_models, lm_models, lm_weights, int(arguments['--beam']))
    print 'Time: {}'.format(time.clock()-t)
    print 'accuracy: {}'.format(accuracy)
    write_pred_file(output_file_path, test_results)
    write_eval_file(output_file_path, accuracy, test_path)
Exemple #2
0
            if patience == train_hyperparams['PATIENCE']:
                print 'out of patience after {} epochs'.format(epoch)
                train_progress_bar.finish()
                break
            # finished epoch
            train_progress_bar.update(epoch)
                
        print 'finished training.'
        
        ti = SoftAttention(pc, model_hyperparams, best_model_path)
        dev_accuracy, dev_results = ti.evaluate(dev_data.iter(), int(arguments['--beam']))
        print 'Best dev accuracy: {}'.format(dev_accuracy)
        write_param_file(output_file_path, dict(model_hyperparams.items()+train_hyperparams.items()))
        write_pred_file(output_file_path, dev_results)
        write_eval_file(output_file_path, best_dev_accuracy, dev_path)

    elif arguments['test']:
        print '=========EVALUATION ONLY:========='
        # requires test path, model path of pretrained path and results path where to write the results to
        assert arguments['--test_path']!=None

        print 'Loading data...'
        test_path = check_path(arguments['--test_path'], '--test_path')
        data_set = SoftDataSet
        input_format = [int(col) for col in arguments['--input_format'].split(',')]
        test_data = data_set.from_file(test_path,input_format, arguments['--lowercase'])
        print 'Test data has {} examples'.format(test_data.length)

        print 'Checking if any special symbols in data...'
        data = set(test_data.inputs + test_data.outputs)
                   ).format(epoch, avg_train_loss, avg_dev_loss, train_perplexity, dev_perplexity, best_dev_perplexity)

            log_to_file(log_file_name, epoch, train_perplexity, dev_perplexity)

#            if patience == max_patience:
#                print 'out of patience after {} epochs'.format(epoch)
#                train_progress_bar.finish()
#                break
            # finished epoch
            train_progress_bar.update(epoch)
    
        print 'finished training.'
        
        # save best dev model parameters
        write_param_file(output_file_path, dict(model_hyperparams.items()+train_hyperparams.items()))
        write_eval_file(output_file_path, best_dev_perplexity, dev_path, 'Perplexity')

    elif arguments['test']:
        print '=========EVALUATION ONLY:========='
        # requires test path, model path of pretrained path and results path where to write the results to
        assert arguments['--test_path']!=None
        
        print 'Loading data...'
        over_segs = arguments['--segments']
        test_path = check_path(arguments['--test_path'], '--test_path')
        if arguments['--segformat']:
            col_format=3
        elif  arguments['--dictformat']:
            col_format=1
        else:
            col_format=2