def do_eval(dmv_model, m_model, pos, languages, language_map, epoch, options): print "====================================" print 'Do evaluation' if not options.eval_new_language: eval_language_set = languages.keys() eval_languages = languages else: eval_language_set = utils.read_language_list(options.language_path) eval_languages = {l: i for i, l in enumerate(eval_language_set)} eval_file_list = os.listdir(options.dev) eval_file_set = utils.get_file_set(eval_file_list, eval_language_set, False) eval_sentences, eval_language_map = utils.read_multiple_data( options.dev, eval_file_set, True) dmv_model.eval() if options.use_neural: m_model.eval() devpath = os.path.join( options.output, 'eval_pred' + str(epoch + 1) + '_' + str(options.sample_idx)) eval_data_list, _, eval_sentence_map = utils.construct_ml_pos_data( eval_sentences, pos, eval_languages, eval_language_map) eval_batch_data = utils.construct_batch_data(eval_data_list, options.batchsize) parse_results = {} classify_results = np.zeros(len(eval_data_list)) if options.sentence_predict and epoch > options.non_neural_iter: eval_trans_param = np.zeros( (len(eval_data_list), len(pos.keys()), len(pos.keys()), 2, options.c_valency)) else: eval_trans_param = None for batch_id, one_batch in enumerate(eval_batch_data): eval_batch_pos, eval_batch_lan, eval_batch_sen = [ s[0] for s in one_batch ], [s[1] for s in one_batch], [s[2][0] for s in one_batch] eval_batch_sen = np.array(eval_batch_sen) eval_batch_lan = np.array(eval_batch_lan) eval_batch_pos = np.array(eval_batch_pos) if (options.sentence_predict and epoch > options.non_neural_iter ) or options.language_predict: batch_rule_samples = dmv_model.find_predict_samples( eval_batch_pos, eval_batch_lan, eval_batch_sen) batch_predict_data = utils.construct_ml_predict_data( batch_rule_samples) batch_predict_pos_v = torch.LongTensor( batch_predict_data['pos']) batch_predict_pos_index = np.array(batch_predict_data['pos']) batch_predict_dir_v = torch.LongTensor( batch_predict_data['dir']) batch_predict_dir_index = np.array(batch_predict_data['dir']) batch_predict_cvalency_v = torch.LongTensor( batch_predict_data['cvalency']) batch_predict_cvalency_index = np.array( batch_predict_data['cvalency']) batch_predict_lan_v = torch.LongTensor( batch_predict_data['languages']) batch_predict_lan_index = np.array( batch_predict_data['languages']) batch_predict_sen_v = [] for sentence_id in batch_predict_data['sentence']: batch_predict_sen_v.append(eval_sentence_map[sentence_id]) batch_predict_sen_index = np.array( batch_predict_data['sentence']) batch_predict_sen_v = torch.LongTensor(batch_predict_sen_v) batch_predicted, batch_predicted_lan = m_model.forward_( batch_predict_pos_v, batch_predict_dir_v, batch_predict_cvalency_v, None, None, True, 'child', batch_predict_lan_v, batch_predict_sen_v, None) if options.sentence_predict or options.language_predict: # Evaluation of language pediction for i in range(len(batch_predict_sen_v)): sentence_idx = batch_predict_data['sentence'][i] classify_results[sentence_idx] = batch_predicted_lan[i] if options.sentence_predict: eval_trans_param[ batch_predict_sen_index, batch_predict_pos_index, :, batch_predict_dir_index, batch_predict_cvalency_index] = batch_predicted.detach( ).numpy() else: eval_trans_param[ batch_predict_pos_index, :, batch_predict_dir_index, batch_predict_cvalency_index, batch_predict_lan_index] = batch_predicted.detach( ).numpy() batch_score, batch_decision_score = dmv_model.evaluate_batch_score( eval_batch_pos, eval_batch_sen, eval_language_map, eval_languages, eval_trans_param) if options.function_mask: batch_score = dmv_model.function_to_mask( batch_score, eval_batch_pos) batch_score = np.expand_dims(batch_score, 3) batch_score = np.expand_dims(batch_score, 4) batch_decision_score = np.expand_dims(batch_decision_score, 2) batch_parse = eisner_for_dmv.batch_parse(batch_score, batch_decision_score, dmv_model.dvalency, dmv_model.cvalency) for i in range(len(eval_batch_pos)): parse_results[eval_batch_sen[i]] = (batch_parse[0][i], batch_parse[1][i]) utils.eval_ml(parse_results, eval_sentences, devpath, options.log + '_dev' + str(options.sample_idx), eval_language_map, eval_languages, epoch) # utils.write_distribution(dmv_model) print "====================================" # language classification results if not options.eval_new_language and (options.sentence_predict or options.language_predict): correct = 0 for i in range(len(classify_results)): if classify_results[i] == languages[eval_language_map[i]]: correct += 1 correct_rate = float(correct) / len(classify_results) print "Language classification accuracy " + str(correct_rate)
pos, sentences, languages, language_map = utils.read_multiple_data( options.train, file_set, False) sentence_language_map = {} if options.concat_all: languages = {'all': 0} for s in language_map.keys(): language_map[s] = 'all' print 'Data read' with open( os.path.join(options.output, options.params + '_' + str(options.sample_idx)), 'w') as paramsfp: pickle.dump((pos, options), paramsfp) print 'Parameters saved' data_list, data_pos, sentence_map = utils.construct_ml_pos_data( sentences, pos, languages, language_map) batch_data = utils.construct_update_batch_data(data_list, options.batchsize) print 'Batch data constructed' data_size = len(data_list) ml_dmv_model = MLDMV(pos, sentence_map, languages, language_map, data_size, options) print 'Model constructed' ml_dmv_model.init_param(sentences) print 'Parameters initialized' if options.gpu >= 0 and torch.cuda.is_available(): torch.cuda.set_device(options.gpu)