Example #1
0
def evaluate_model(predicted_templates, source_words, source_feature_dicts, target_words, target_feature_dicts,
                   feature_types, print_results=True):
    if print_results:
        print ('evaluating model...')

    # 2 possible approaches: one - predict template, instantiate, check if equal to word
    # TODO: other option - predict template, generate template using the correct word, check if templates are equal
    test_data = zip(source_words, source_feature_dicts, target_words, target_feature_dicts)
    c = 0
    for i, (source_word, source_feat_dict, target_word, target_feat_dict) in enumerate(test_data):
        joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \
                      + ':' + common.get_morph_string(target_feat_dict, feature_types)
        predicted_word = instantiate_template(predicted_templates[joint_index], source_word)
        if predicted_word == target_word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_results:
            print('source word: ' + source_word + ' gold: ' + target_words[i] + ' template:' + ''.join(
                predicted_templates[joint_index]) \
                  + ' prediction: ' + predicted_word + ' ' + sign)

    accuracy = float(c) / len(predicted_templates)

    if print_results:
        print('finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \
              str(accuracy) + '\n\n')

    return len(predicted_templates), accuracy
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, source_words,
                      source_feats, target_feats, feat_index, feature_types):
    predictions = {}
    for i, (source_word, source_feat_dict, target_feat_dict) in enumerate(zip(source_words, source_feats, target_feats)):
        predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word,
                                                         source_feat_dict, target_feat_dict, 
                                                         alphabet_index, inverse_alphabet_index, feat_index,
                                                         feature_types)

        joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \
                                    + ':' + common.get_morph_string(target_feat_dict, feature_types)
        predictions[joint_index] = predicted_template

    return predictions
Example #3
0
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=True):
    if print_results:
        print 'evaluating model...'

    # 2 possible approaches: one - predict template, instantiate, check if equal to word
    # for now, go with one, maybe try two later
    # TODO: two - predict template, generate template using the correct word, check if templates are equal
    test_data = zip(lemmas, feature_dicts, words)
    c = 0
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predicted_word = instantiate_template(predicted_templates[joint_index], lemma)
        if predicted_word == word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_results:
            print 'lemma: ' + lemma + ' gold: ' + words[i] + ' template: ' + ''.join(predicted_templates[joint_index]) \
                  + ' prediction: ' + predicted_word + ' ' + sign

    accuracy = float(c) / len(predicted_templates)
    if print_results:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \
              str(accuracy) + '\n\n'

    return len(predicted_templates), accuracy
Example #4
0
def predict_templates(model, params, alphabet_index, inverse_alphabet_index,
                      source_words,
                      source_feats, target_feats, feat_index, feature_types):
    predictions = {}
    for i, (source_word, source_feat_dict, target_feat_dict) in enumerate(
            zip(source_words, source_feats, target_feats)):
        predicted_template = predict_inflection_template(model, params, source_word,
                                                         source_feat_dict, target_feat_dict,
                                                         alphabet_index, inverse_alphabet_index, feat_index,
                                                         feature_types)

        joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \
                      + ':' + common.get_morph_string(target_feat_dict, feature_types)
        predictions[joint_index] = predicted_template

    return predictions
Example #5
0
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas,
                      feats, feat_index, feature_types):
    predictions = {}
    for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)):
        predicted_template = predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma,
                                                     feat_dict, alphabet_index, inverse_alphabet_index, feat_index,
                                                     feature_types)

        # index each output by its matching inputs - lemma + features
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predictions[joint_index] = predicted_template

    return predictions
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, epochs, layers,
         optimization, feat_input_dim, nbest):
    hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'EPOCHS': epochs, 'LAYERS': layers,
                    'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'NBEST': nbest}

    print 'train path = ' + str(train_path)
    print 'test path =' + str(test_path)
    for param in hyper_params:
        print param + '=' + str(hyper_params[param])

    # load data
    (train_target_words, train_source_words, train_target_feat_dicts,
     train_source_feat_dicts) = prepare_sigmorphon_data.load_data(
        train_path, 2)
    (test_target_words, test_source_words, test_target_feat_dicts,
     test_source_feat_dicts) = prepare_sigmorphon_data.load_data(
        test_path, 2)
    alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_target_words, train_source_words,
                                                                   train_target_feat_dicts, train_source_feat_dicts)

    # used for character dropout
    alphabet.append(NULL)
    alphabet.append(UNK)

    # used during decoding
    alphabet.append(EPSILON)
    alphabet.append(BEGIN_WORD)
    alphabet.append(END_WORD)

    feature_alphabet = common.get_feature_alphabet(train_source_feat_dicts + train_target_feat_dicts)
    feature_alphabet.append(UNK_FEAT)

    # add indices to alphabet - used to indicate when copying from lemma to word
    for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]:
        alphabet.append(marker)

    # feat 2 int
    feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet))))

    # char 2 int
    alphabet_index = dict(zip(alphabet, range(0, len(alphabet))))
    inverse_alphabet_index = {index: char for char, index in alphabet_index.items()}

    # cluster the data by POS type (features)
    # TODO: do we need to cluster on both source and target feats? 
    #       probably enough to cluster on source here becasue pos will be same
    #       (no derivational morphology in this task)
    train_cluster_to_data_indices = common.cluster_data_by_pos(train_source_feat_dicts)
    test_cluster_to_data_indices = common.cluster_data_by_pos(test_source_feat_dicts)

    # cluster the data by inflection type (features)
    # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types)
    # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types)

    accuracies = []
    final_results = {}

    # factored model: new model per inflection type
    for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices):

        # get the inflection-specific data
        train_cluster_target_words = [train_target_words[i] for i in train_cluster_to_data_indices[cluster_type]]
        if len(train_cluster_target_words) < 1:
            print 'only ' + str(len(train_cluster_target_words)) + ' samples for this inflection type. skipping'
            continue
        else:
            print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \
                  str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \
                  str(len(train_cluster_target_words)) + ' examples'

        # test best model

        test_cluster_source_words = [test_source_words[i] for i in test_cluster_to_data_indices[cluster_type]]
        test_cluster_target_words = [test_target_words[i] for i in test_cluster_to_data_indices[cluster_type]]
        test_cluster_source_feat_dicts = [test_source_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type]]
        test_cluster_target_feat_dicts = [test_target_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type]]

        # load best model
        best_model, params = load_best_model(str(cluster_index), alphabet,
                                                                              results_file_path, input_dim,
                                                                              hidden_dim, layers,
                                                                              feature_alphabet, feat_input_dim,
                                                                              feature_types)

        lang = train_path.split('/')[-1].replace('-task{0}-train'.format('1'), '')

        # handle greedy prediction
        if nbest == 1:
            is_nbest = False
            predicted_templates = task2_ms2s.predict_templates(
                best_model,
                params,
                alphabet_index,
                inverse_alphabet_index,
                test_cluster_source_words,
                test_cluster_source_feat_dicts,
                test_cluster_target_feat_dicts,
                feat_index,
                feature_types)

            accuracy = task2_ms2s.evaluate_model(predicted_templates,
                                                 test_cluster_source_words,
                                                 test_cluster_source_feat_dicts,
                                                 test_cluster_target_words,
                                                 test_cluster_target_feat_dicts,
                                                 feature_types,
                                                 print_results=False)
            accuracies.append(accuracy)
            print '{0} {1} accuracy: {2}'.format(lang, cluster_type, accuracy[1])

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i],
                                                                                   feature_types) \
                              + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types)
                inflection = task2_ms2s.instantiate_template(
                    predicted_templates[joint_index], test_source_words[i])
                final_results[i] = (
                test_source_words[i], test_source_feat_dicts[i], inflection, test_target_feat_dicts[i])

            micro_average_accuracy = accuracy[1]

        # handle n-best prediction
        else:
            is_nbest = True

            predicted_nbset_templates = task2_ms2s.predict_nbest_templates(
                best_model,
                params,
                alphabet_index,
                inverse_alphabet_index,
                test_cluster_source_words,
                test_cluster_source_feat_dicts,
                test_cluster_target_feat_dicts,
                feat_index,
                feature_types,
                nbest,
                test_cluster_target_words)

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i],
                                                                                   feature_types) \
                              + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types)

                nbest_inflections = []
                templates = [t for (t, p) in predicted_nbset_templates[joint_index]]
                for template in templates:
                    nbest_inflections.append(
                        task2_ms2s.instantiate_template(
                            template,
                            test_source_words[i]))
                final_results[i] = (
                test_source_words[i], test_source_feat_dicts[i], nbest_inflections, test_target_feat_dicts[i])

            micro_average_accuracy = -1

    if 'test' in test_path:
        suffix = '.best.test'
    else:
        suffix = '.best'

    task2_joint_structured_inflection.write_results_file(hyper_params,
                                                         micro_average_accuracy,
                                                         train_path,
                                                         test_path,
                                                         results_file_path + suffix,
                                                         sigmorphon_root_dir,
                                                         final_results,
                                                         is_nbest)
Example #7
0
def predict_nbest_templates(model, params, alphabet_index, inverse_alphabet_index,
                            source_words,
                            source_feats, target_feats, feat_index, feature_types, nbest, words):
    predictions = {}
    fix_count = 0
    for i, (source_word, source_feat_dict, target_feat_dict) in enumerate(
            zip(source_words, source_feats, target_feats)):
        predicted_template = predict_inflection_template(model,
                                                         params,
                                                         source_word,
                                                         source_feat_dict,
                                                         target_feat_dict,
                                                         alphabet_index,
                                                         inverse_alphabet_index,
                                                         feat_index,
                                                         feature_types)

        predicted_nbest = predict_nbest_template(model,
                                                 params,
                                                 source_word,
                                                 source_feat_dict,
                                                 target_feat_dict,
                                                 alphabet_index,
                                                 inverse_alphabet_index,
                                                 feat_index,
                                                 feature_types,
                                                 nbest)

        # DEBUG:
        greedy_guess = instantiate_template(predicted_template, source_word)
        if words[i] == greedy_guess:
            gsign = 'V'
        else:
            gsign = 'X'

        for j, n in enumerate(predicted_nbest):
            s, p = n
            nbest_guess = instantiate_template(s, source_word)

            if words[i] == nbest_guess:
                nsign = 'V'
            else:
                nsign = 'X'

            if gsign == 'X' and nsign == 'V':
                fix_count += 1
                print(str(i) + ' out of ' + str(len(source_words)))
                print(source_word.encode('utf8') + '\n')
                encoded_template = [c.encode('utf8') for c in predicted_template]
                joined = ''.join(encoded_template)
                print('GREEDY: \n' + joined)
                print(greedy_guess.encode('utf8') + ' ' + gsign + '\n')
                print(u'{0}-BEST:'.format(j + 1))
                print(str(''.join(s).encode('utf8')) + ' ' + str(p))
                print(nbest_guess.encode('utf8') + ' ' + nsign + '\n')

        joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \
                      + ':' + common.get_morph_string(target_feat_dict, feature_types)

        predictions[joint_index] = predicted_nbest

    print ('================================================================')
    print ('beam search fixed {0} out of {1}, {2}%'.format(fix_count,
                                                          len(source_words),
                                                          float(fix_count) / len(source_words) * 100))
    print ('================================================================')

    return predictions
def main(train_path, test_path, results_file_path, sigmorphon_root_dir,
         input_dim, hidden_dim, epochs, layers, optimization, feat_input_dim,
         nbest):
    hyper_params = {
        'INPUT_DIM': input_dim,
        'HIDDEN_DIM': hidden_dim,
        'EPOCHS': epochs,
        'LAYERS': layers,
        'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN,
        'OPTIMIZATION': optimization,
        'NBEST': nbest
    }

    print 'train path = ' + str(train_path)
    print 'test path =' + str(test_path)
    for param in hyper_params:
        print param + '=' + str(hyper_params[param])

    # load data
    (train_target_words, train_source_words, train_target_feat_dicts,
     train_source_feat_dicts) = prepare_sigmorphon_data.load_data(
         train_path, 2)
    (test_target_words, test_source_words, test_target_feat_dicts,
     test_source_feat_dicts) = prepare_sigmorphon_data.load_data(test_path, 2)
    alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(
        train_target_words, train_source_words, train_target_feat_dicts,
        train_source_feat_dicts)

    # used for character dropout
    alphabet.append(NULL)
    alphabet.append(UNK)

    # used during decoding
    alphabet.append(EPSILON)
    alphabet.append(BEGIN_WORD)
    alphabet.append(END_WORD)

    feature_alphabet = common.get_feature_alphabet(train_source_feat_dicts +
                                                   train_target_feat_dicts)
    feature_alphabet.append(UNK_FEAT)

    # add indices to alphabet - used to indicate when copying from lemma to word
    for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]:
        alphabet.append(marker)

    # feat 2 int
    feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet))))

    # char 2 int
    alphabet_index = dict(zip(alphabet, range(0, len(alphabet))))
    inverse_alphabet_index = {
        index: char
        for char, index in alphabet_index.items()
    }

    # cluster the data by POS type (features)
    # TODO: do we need to cluster on both source and target feats?
    #       probably enough to cluster on source here becasue pos will be same
    #       (no derivational morphology in this task)
    train_cluster_to_data_indices = common.cluster_data_by_pos(
        train_source_feat_dicts)
    test_cluster_to_data_indices = common.cluster_data_by_pos(
        test_source_feat_dicts)

    # cluster the data by inflection type (features)
    # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types)
    # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types)

    accuracies = []
    final_results = {}

    # factored model: new model per inflection type
    for cluster_index, cluster_type in enumerate(
            train_cluster_to_data_indices):

        # get the inflection-specific data
        train_cluster_target_words = [
            train_target_words[i]
            for i in train_cluster_to_data_indices[cluster_type]
        ]
        if len(train_cluster_target_words) < 1:
            print 'only ' + str(
                len(train_cluster_target_words
                    )) + ' samples for this inflection type. skipping'
            continue
        else:
            print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \
                  str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \
                  str(len(train_cluster_target_words)) + ' examples'

        # test best model

        test_cluster_source_words = [
            test_source_words[i]
            for i in test_cluster_to_data_indices[cluster_type]
        ]
        test_cluster_target_words = [
            test_target_words[i]
            for i in test_cluster_to_data_indices[cluster_type]
        ]
        test_cluster_source_feat_dicts = [
            test_source_feat_dicts[i]
            for i in test_cluster_to_data_indices[cluster_type]
        ]
        test_cluster_target_feat_dicts = [
            test_target_feat_dicts[i]
            for i in test_cluster_to_data_indices[cluster_type]
        ]

        # load best model
        best_model, params = load_best_model(str(cluster_index), alphabet,
                                             results_file_path, input_dim,
                                             hidden_dim, layers,
                                             feature_alphabet, feat_input_dim,
                                             feature_types)

        lang = train_path.split('/')[-1].replace('-task{0}-train'.format('1'),
                                                 '')

        # handle greedy prediction
        if nbest == 1:
            is_nbest = False
            predicted_templates = task2_ms2s.predict_templates(
                best_model, params, alphabet_index, inverse_alphabet_index,
                test_cluster_source_words, test_cluster_source_feat_dicts,
                test_cluster_target_feat_dicts, feat_index, feature_types)

            accuracy = task2_ms2s.evaluate_model(
                predicted_templates,
                test_cluster_source_words,
                test_cluster_source_feat_dicts,
                test_cluster_target_words,
                test_cluster_target_feat_dicts,
                feature_types,
                print_results=False)
            accuracies.append(accuracy)
            print '{0} {1} accuracy: {2}'.format(lang, cluster_type,
                                                 accuracy[1])

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i],
                                                                                   feature_types) \
                              + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types)
                inflection = task2_ms2s.instantiate_template(
                    predicted_templates[joint_index], test_source_words[i])
                final_results[i] = (test_source_words[i],
                                    test_source_feat_dicts[i], inflection,
                                    test_target_feat_dicts[i])

            micro_average_accuracy = accuracy[1]

        # handle n-best prediction
        else:
            is_nbest = True

            predicted_nbset_templates = task2_ms2s.predict_nbest_templates(
                best_model, params, alphabet_index, inverse_alphabet_index,
                test_cluster_source_words, test_cluster_source_feat_dicts,
                test_cluster_target_feat_dicts, feat_index, feature_types,
                nbest, test_cluster_target_words)

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i],
                                                                                   feature_types) \
                              + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types)

                nbest_inflections = []
                templates = [
                    t for (t, p) in predicted_nbset_templates[joint_index]
                ]
                for template in templates:
                    nbest_inflections.append(
                        task2_ms2s.instantiate_template(
                            template, test_source_words[i]))
                final_results[i] = (test_source_words[i],
                                    test_source_feat_dicts[i],
                                    nbest_inflections,
                                    test_target_feat_dicts[i])

            micro_average_accuracy = -1

    if 'test' in test_path:
        suffix = '.best.test'
    else:
        suffix = '.best'

    task2_joint_structured_inflection.write_results_file(
        hyper_params, micro_average_accuracy, train_path, test_path,
        results_file_path + suffix, sigmorphon_root_dir, final_results,
        is_nbest)