def evaluate_model(predicted_templates, source_words, source_feature_dicts, target_words, target_feature_dicts, feature_types, print_results=True): if print_results: print ('evaluating model...') # 2 possible approaches: one - predict template, instantiate, check if equal to word # TODO: other option - predict template, generate template using the correct word, check if templates are equal test_data = zip(source_words, source_feature_dicts, target_words, target_feature_dicts) c = 0 for i, (source_word, source_feat_dict, target_word, target_feat_dict) in enumerate(test_data): joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \ + ':' + common.get_morph_string(target_feat_dict, feature_types) predicted_word = instantiate_template(predicted_templates[joint_index], source_word) if predicted_word == target_word: c += 1 sign = 'V' else: sign = 'X' if print_results: print('source word: ' + source_word + ' gold: ' + target_words[i] + ' template:' + ''.join( predicted_templates[joint_index]) \ + ' prediction: ' + predicted_word + ' ' + sign) accuracy = float(c) / len(predicted_templates) if print_results: print('finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \ str(accuracy) + '\n\n') return len(predicted_templates), accuracy
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, source_words, source_feats, target_feats, feat_index, feature_types): predictions = {} for i, (source_word, source_feat_dict, target_feat_dict) in enumerate(zip(source_words, source_feats, target_feats)): predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feat_dict, target_feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \ + ':' + common.get_morph_string(target_feat_dict, feature_types) predictions[joint_index] = predicted_template return predictions
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=True): if print_results: print 'evaluating model...' # 2 possible approaches: one - predict template, instantiate, check if equal to word # for now, go with one, maybe try two later # TODO: two - predict template, generate template using the correct word, check if templates are equal test_data = zip(lemmas, feature_dicts, words) c = 0 for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predicted_word = instantiate_template(predicted_templates[joint_index], lemma) if predicted_word == word: c += 1 sign = 'V' else: sign = 'X' if print_results: print 'lemma: ' + lemma + ' gold: ' + words[i] + ' template: ' + ''.join(predicted_templates[joint_index]) \ + ' prediction: ' + predicted_word + ' ' + sign accuracy = float(c) / len(predicted_templates) if print_results: print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \ str(accuracy) + '\n\n' return len(predicted_templates), accuracy
def predict_templates(model, params, alphabet_index, inverse_alphabet_index, source_words, source_feats, target_feats, feat_index, feature_types): predictions = {} for i, (source_word, source_feat_dict, target_feat_dict) in enumerate( zip(source_words, source_feats, target_feats)): predicted_template = predict_inflection_template(model, params, source_word, source_feat_dict, target_feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \ + ':' + common.get_morph_string(target_feat_dict, feature_types) predictions[joint_index] = predicted_template return predictions
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas, feats, feat_index, feature_types): predictions = {} for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)): predicted_template = predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) # index each output by its matching inputs - lemma + features joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predictions[joint_index] = predicted_template return predictions
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, epochs, layers, optimization, feat_input_dim, nbest): hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'NBEST': nbest} print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load data (train_target_words, train_source_words, train_target_feat_dicts, train_source_feat_dicts) = prepare_sigmorphon_data.load_data( train_path, 2) (test_target_words, test_source_words, test_target_feat_dicts, test_source_feat_dicts) = prepare_sigmorphon_data.load_data( test_path, 2) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_target_words, train_source_words, train_target_feat_dicts, train_source_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) feature_alphabet = common.get_feature_alphabet(train_source_feat_dicts + train_target_feat_dicts) feature_alphabet.append(UNK_FEAT) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # feat 2 int feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = {index: char for char, index in alphabet_index.items()} # cluster the data by POS type (features) # TODO: do we need to cluster on both source and target feats? # probably enough to cluster on source here becasue pos will be same # (no derivational morphology in this task) train_cluster_to_data_indices = common.cluster_data_by_pos(train_source_feat_dicts) test_cluster_to_data_indices = common.cluster_data_by_pos(test_source_feat_dicts) # cluster the data by inflection type (features) # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types) accuracies = [] final_results = {} # factored model: new model per inflection type for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices): # get the inflection-specific data train_cluster_target_words = [train_target_words[i] for i in train_cluster_to_data_indices[cluster_type]] if len(train_cluster_target_words) < 1: print 'only ' + str(len(train_cluster_target_words)) + ' samples for this inflection type. skipping' continue else: print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \ str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \ str(len(train_cluster_target_words)) + ' examples' # test best model test_cluster_source_words = [test_source_words[i] for i in test_cluster_to_data_indices[cluster_type]] test_cluster_target_words = [test_target_words[i] for i in test_cluster_to_data_indices[cluster_type]] test_cluster_source_feat_dicts = [test_source_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type]] test_cluster_target_feat_dicts = [test_target_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type]] # load best model best_model, params = load_best_model(str(cluster_index), alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) lang = train_path.split('/')[-1].replace('-task{0}-train'.format('1'), '') # handle greedy prediction if nbest == 1: is_nbest = False predicted_templates = task2_ms2s.predict_templates( best_model, params, alphabet_index, inverse_alphabet_index, test_cluster_source_words, test_cluster_source_feat_dicts, test_cluster_target_feat_dicts, feat_index, feature_types) accuracy = task2_ms2s.evaluate_model(predicted_templates, test_cluster_source_words, test_cluster_source_feat_dicts, test_cluster_target_words, test_cluster_target_feat_dicts, feature_types, print_results=False) accuracies.append(accuracy) print '{0} {1} accuracy: {2}'.format(lang, cluster_type, accuracy[1]) # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i], feature_types) \ + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types) inflection = task2_ms2s.instantiate_template( predicted_templates[joint_index], test_source_words[i]) final_results[i] = ( test_source_words[i], test_source_feat_dicts[i], inflection, test_target_feat_dicts[i]) micro_average_accuracy = accuracy[1] # handle n-best prediction else: is_nbest = True predicted_nbset_templates = task2_ms2s.predict_nbest_templates( best_model, params, alphabet_index, inverse_alphabet_index, test_cluster_source_words, test_cluster_source_feat_dicts, test_cluster_target_feat_dicts, feat_index, feature_types, nbest, test_cluster_target_words) # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i], feature_types) \ + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types) nbest_inflections = [] templates = [t for (t, p) in predicted_nbset_templates[joint_index]] for template in templates: nbest_inflections.append( task2_ms2s.instantiate_template( template, test_source_words[i])) final_results[i] = ( test_source_words[i], test_source_feat_dicts[i], nbest_inflections, test_target_feat_dicts[i]) micro_average_accuracy = -1 if 'test' in test_path: suffix = '.best.test' else: suffix = '.best' task2_joint_structured_inflection.write_results_file(hyper_params, micro_average_accuracy, train_path, test_path, results_file_path + suffix, sigmorphon_root_dir, final_results, is_nbest)
def predict_nbest_templates(model, params, alphabet_index, inverse_alphabet_index, source_words, source_feats, target_feats, feat_index, feature_types, nbest, words): predictions = {} fix_count = 0 for i, (source_word, source_feat_dict, target_feat_dict) in enumerate( zip(source_words, source_feats, target_feats)): predicted_template = predict_inflection_template(model, params, source_word, source_feat_dict, target_feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) predicted_nbest = predict_nbest_template(model, params, source_word, source_feat_dict, target_feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types, nbest) # DEBUG: greedy_guess = instantiate_template(predicted_template, source_word) if words[i] == greedy_guess: gsign = 'V' else: gsign = 'X' for j, n in enumerate(predicted_nbest): s, p = n nbest_guess = instantiate_template(s, source_word) if words[i] == nbest_guess: nsign = 'V' else: nsign = 'X' if gsign == 'X' and nsign == 'V': fix_count += 1 print(str(i) + ' out of ' + str(len(source_words))) print(source_word.encode('utf8') + '\n') encoded_template = [c.encode('utf8') for c in predicted_template] joined = ''.join(encoded_template) print('GREEDY: \n' + joined) print(greedy_guess.encode('utf8') + ' ' + gsign + '\n') print(u'{0}-BEST:'.format(j + 1)) print(str(''.join(s).encode('utf8')) + ' ' + str(p)) print(nbest_guess.encode('utf8') + ' ' + nsign + '\n') joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \ + ':' + common.get_morph_string(target_feat_dict, feature_types) predictions[joint_index] = predicted_nbest print ('================================================================') print ('beam search fixed {0} out of {1}, {2}%'.format(fix_count, len(source_words), float(fix_count) / len(source_words) * 100)) print ('================================================================') return predictions
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, epochs, layers, optimization, feat_input_dim, nbest): hyper_params = { 'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'NBEST': nbest } print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load data (train_target_words, train_source_words, train_target_feat_dicts, train_source_feat_dicts) = prepare_sigmorphon_data.load_data( train_path, 2) (test_target_words, test_source_words, test_target_feat_dicts, test_source_feat_dicts) = prepare_sigmorphon_data.load_data(test_path, 2) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet( train_target_words, train_source_words, train_target_feat_dicts, train_source_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) feature_alphabet = common.get_feature_alphabet(train_source_feat_dicts + train_target_feat_dicts) feature_alphabet.append(UNK_FEAT) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # feat 2 int feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = { index: char for char, index in alphabet_index.items() } # cluster the data by POS type (features) # TODO: do we need to cluster on both source and target feats? # probably enough to cluster on source here becasue pos will be same # (no derivational morphology in this task) train_cluster_to_data_indices = common.cluster_data_by_pos( train_source_feat_dicts) test_cluster_to_data_indices = common.cluster_data_by_pos( test_source_feat_dicts) # cluster the data by inflection type (features) # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types) accuracies = [] final_results = {} # factored model: new model per inflection type for cluster_index, cluster_type in enumerate( train_cluster_to_data_indices): # get the inflection-specific data train_cluster_target_words = [ train_target_words[i] for i in train_cluster_to_data_indices[cluster_type] ] if len(train_cluster_target_words) < 1: print 'only ' + str( len(train_cluster_target_words )) + ' samples for this inflection type. skipping' continue else: print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \ str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \ str(len(train_cluster_target_words)) + ' examples' # test best model test_cluster_source_words = [ test_source_words[i] for i in test_cluster_to_data_indices[cluster_type] ] test_cluster_target_words = [ test_target_words[i] for i in test_cluster_to_data_indices[cluster_type] ] test_cluster_source_feat_dicts = [ test_source_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type] ] test_cluster_target_feat_dicts = [ test_target_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type] ] # load best model best_model, params = load_best_model(str(cluster_index), alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) lang = train_path.split('/')[-1].replace('-task{0}-train'.format('1'), '') # handle greedy prediction if nbest == 1: is_nbest = False predicted_templates = task2_ms2s.predict_templates( best_model, params, alphabet_index, inverse_alphabet_index, test_cluster_source_words, test_cluster_source_feat_dicts, test_cluster_target_feat_dicts, feat_index, feature_types) accuracy = task2_ms2s.evaluate_model( predicted_templates, test_cluster_source_words, test_cluster_source_feat_dicts, test_cluster_target_words, test_cluster_target_feat_dicts, feature_types, print_results=False) accuracies.append(accuracy) print '{0} {1} accuracy: {2}'.format(lang, cluster_type, accuracy[1]) # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i], feature_types) \ + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types) inflection = task2_ms2s.instantiate_template( predicted_templates[joint_index], test_source_words[i]) final_results[i] = (test_source_words[i], test_source_feat_dicts[i], inflection, test_target_feat_dicts[i]) micro_average_accuracy = accuracy[1] # handle n-best prediction else: is_nbest = True predicted_nbset_templates = task2_ms2s.predict_nbest_templates( best_model, params, alphabet_index, inverse_alphabet_index, test_cluster_source_words, test_cluster_source_feat_dicts, test_cluster_target_feat_dicts, feat_index, feature_types, nbest, test_cluster_target_words) # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i], feature_types) \ + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types) nbest_inflections = [] templates = [ t for (t, p) in predicted_nbset_templates[joint_index] ] for template in templates: nbest_inflections.append( task2_ms2s.instantiate_template( template, test_source_words[i])) final_results[i] = (test_source_words[i], test_source_feat_dicts[i], nbest_inflections, test_target_feat_dicts[i]) micro_average_accuracy = -1 if 'test' in test_path: suffix = '.best.test' else: suffix = '.best' task2_joint_structured_inflection.write_results_file( hyper_params, micro_average_accuracy, train_path, test_path, results_file_path + suffix, sigmorphon_root_dir, final_results, is_nbest)