def evaluate_model(predicted_templates, source_words, source_feature_dicts, target_words, target_feature_dicts, 
                   feature_types, print_results=True):
    if print_results:
        print 'evaluating model...'

    # 2 possible approaches: one - predict template, instantiate, check if equal to word
    # TODO: other option - predict template, generate template using the correct word, check if templates are equal
    test_data = zip(source_words, source_feature_dicts, target_words, target_feature_dicts)
    c = 0
    for i, (source_word, source_feat_dict, target_word, target_feat_dict) in enumerate(test_data):
        joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \
                                    + ':' + common.get_morph_string(target_feat_dict, feature_types)
        predicted_word = instantiate_template(predicted_templates[joint_index], source_word)
        if predicted_word == target_word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_results:
            print 'source word: ' + source_word + ' gold: ' + target_words[i] + ' template:' + ''.join(predicted_templates[joint_index]) \
                  + ' prediction: ' + predicted_word + ' ' + sign

    accuracy = float(c) / len(predicted_templates)

    if print_results:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \
              str(accuracy) + '\n\n'

    return len(predicted_templates), accuracy
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, source_words,
                      source_feats, target_feats, feat_index, feature_types):
    predictions = {}
    for i, (source_word, source_feat_dict, target_feat_dict) in enumerate(zip(source_words, source_feats, target_feats)):
        predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word,
                                                         source_feat_dict, target_feat_dict, 
                                                         alphabet_index, inverse_alphabet_index, feat_index,
                                                         feature_types)

        joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \
                                    + ':' + common.get_morph_string(target_feat_dict, feature_types)
        predictions[joint_index] = predicted_template

    return predictions
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=True):
    if print_results:
        print 'evaluating model...'

    # 2 possible approaches: one - predict template, instantiate, check if equal to word
    # for now, go with one, maybe try two later
    # TODO: two - predict template, generate template using the correct word, check if templates are equal
    test_data = zip(lemmas, feature_dicts, words)
    c = 0
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predicted_word = instantiate_template(predicted_templates[joint_index], lemma)
        if predicted_word == word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_results or sign == 'X':
            print 'lemma: ' + lemma + ' gold: ' + words[i] + ' template: ' + ''.join(predicted_templates[joint_index]) \
                  + ' prediction: ' + predicted_word + ' ' + sign

    accuracy = float(c) / len(predicted_templates)
    if print_results:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \
              str(accuracy) + '\n\n'

    return len(predicted_templates), accuracy
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index,
                      source_words,
                      source_feats, target_feats, feat_index, feature_types):
    predictions = {}
    for i, (source_word, source_feat_dict, target_feat_dict) in enumerate(
            zip(source_words, source_feats, target_feats)):
        predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word,
                                                         source_feat_dict, target_feat_dict,
                                                         alphabet_index, inverse_alphabet_index, feat_index,
                                                         feature_types)

        joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \
                      + ':' + common.get_morph_string(target_feat_dict, feature_types)
        predictions[joint_index] = predicted_template

    return predictions
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=True):
    if print_results:
        print 'evaluating model...'

    # 2 possible approaches: one - predict template, instantiate, check if equal to word
    # for now, go with one, maybe try two later
    # TODO: two - predict template, generate template using the correct word, check if templates are equal
    test_data = zip(lemmas, feature_dicts, words)
    c = 0
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predicted_word = instantiate_template(predicted_templates[joint_index], lemma)
        if predicted_word == word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_results:
            print 'lemma: ' + lemma + ' gold: ' + words[i] + ' template: ' + ''.join(predicted_templates[joint_index]) \
                  + ' prediction: ' + predicted_word + ' ' + sign

    accuracy = float(c) / len(predicted_templates)
    if print_results:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \
              str(accuracy) + '\n\n'

    return len(predicted_templates), accuracy
def evaluate_model(predicted_sequences, lemmas, feature_dicts, words, feature_types, print_results=False):
    if print_results:
        print 'evaluating model...'

    test_data = zip(lemmas, feature_dicts, words)
    c = 0
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predicted_template = predicted_sequences[joint_index]
        predicted_word = predicted_sequences[joint_index].replace(STEP, '')
        if predicted_word == word:
            c += 1
            sign = u'V'
        else:
            sign = u'X'
        if print_results:# and sign == 'X':
            enc_l = lemma.encode('utf8')
            enc_w = word.encode('utf8')
            enc_t = ''.join([t.encode('utf8') for t in predicted_template])
            enc_p = predicted_word.encode('utf8')
            print 'lemma: {}'.format(enc_l)
            print 'gold: {}'.format(enc_w)
            print 'template: {}'.format(enc_t)
            print 'prediction: {}'.format(enc_p)
            print sign

    accuracy = float(c) / len(predicted_sequences)
    if print_results:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_sequences)) + '=' + \
              str(accuracy) + '\n\n'

    return len(predicted_sequences), accuracy
def evaluate_predictions(predictions,
                         lemmas,
                         feature_dicts,
                         words,
                         feature_types,
                         print_res=False):
    if print_res:
        print 'evaluating model...'

    test_data = zip(lemmas, feature_dicts, words)
    c = 0
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(
            feat_dict, feature_types)
        if predictions[joint_index] == word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_res:
            print 'lemma: ' + lemma + ' gold: ' + word + ' prediction: ' + predictions[
                joint_index] + ' ' + sign
    accuracy = float(c) / len(predictions)

    if print_res:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predictions)) + '=' + str(accuracy) + \
              '\n\n'

    return len(predictions), accuracy
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=True):
    if print_results:
        print 'evaluating model...'

    test_data = zip(lemmas, feature_dicts, words)
    c = 0
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predicted_word = ''.join(predicted_templates[joint_index])
        if predicted_word == word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_results:
            print u'lemma: {} gold: {} template: {} prediction: {} correct: {}'.format(
                                                                            lemma, words[i],
                                                                            ''.join(predicted_templates[joint_index]),
                                                                            predicted_word,
                                                                            sign)
    accuracy = float(c) / len(predicted_templates)

    if print_results:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \
              str(accuracy) + '\n\n'

    return len(predicted_templates), accuracy
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=False):
    if print_results:
        print 'evaluating model...'

    test_data = zip(lemmas, feature_dicts, words)
    c = 0
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predicted_word = ''.join(predicted_templates[joint_index])
        if predicted_word == word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_results:
            enc_l = lemma.encode('utf8')
            enc_w = word.encode('utf8')
            enc_p = predicted_word.encode('utf8')
            print 'lemma: {}'.format(enc_l)
            print 'gold: {}'.format(enc_w)
            print 'prediction: {}'.format(enc_p)
            print sign

    accuracy = float(c) / len(predicted_templates)

    if print_results:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \
              str(accuracy) + '\n\n'

    return len(predicted_templates), accuracy
def evaluate_model(predicted_templates,
                   lemmas,
                   feature_dicts,
                   words,
                   feature_types,
                   print_results=True):
    if print_results:
        print 'evaluating model...'

    test_data = zip(lemmas, feature_dicts, words)
    c = 0
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(
            feat_dict, feature_types)
        predicted_word = ''.join(predicted_templates[joint_index])
        if predicted_word == word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_results:
            print u'lemma: {} gold: {} template: {} prediction: {} correct: {}'.format(
                lemma, words[i], ''.join(predicted_templates[joint_index]),
                predicted_word, sign)
    accuracy = float(c) / len(predicted_templates)

    if print_results:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \
              str(accuracy) + '\n\n'

    return len(predicted_templates), accuracy
def predict_with_ensemble_majority(alphabet, alphabet_index, ensemble,
                                   feat_index, feat_input_dim,
                                   feature_alphabet, feature_types, hidden_dim,
                                   input_dim, inverse_alphabet_index, layers,
                                   test_feat_dicts, test_lemmas, test_words):

    ensemble_model_names = ensemble.split(',')
    print 'ensemble paths:\n'
    print '\n'.join(ensemble_model_names)
    ensemble_models = []

    # load ensemble models
    for ens in ensemble_model_names:
        model, encoder_frnn, encoder_rrnn, decoder_rnn = task1_attention_implementation.load_best_model(
            alphabet, ens, input_dim, hidden_dim, layers, feature_alphabet,
            feat_input_dim, feature_types)

        ensemble_models.append(
            (model, encoder_frnn, encoder_rrnn, decoder_rnn))

    # predict the entire test set with each model in the ensemble
    ensemble_predictions = []
    for em in ensemble_models:
        model, encoder_frnn, encoder_rrnn, decoder_rnn = em
        predicted_sequences = predict_sequences(model, decoder_rnn,
                                                encoder_frnn, encoder_rrnn,
                                                alphabet_index,
                                                inverse_alphabet_index,
                                                test_lemmas, test_feat_dicts,
                                                feat_index, feature_types)

        ensemble_predictions.append(predicted_sequences)

    # perform voting for each test input - joint_index is a lemma+feats representation
    majority_predicted_sequences = {}
    string_to_template = {}
    test_data = zip(test_lemmas, test_feat_dicts, test_words)
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(
            feat_dict, feature_types)
        prediction_counter = defaultdict(int)
        for ens in ensemble_predictions:
            prediction_str = ''.join(ens[joint_index])
            prediction_counter[prediction_str] += 1
            string_to_template[prediction_str] = ens[joint_index]
            print u'template: {} prediction: {}'.format(
                ens[joint_index], prediction_str)

        # return the most predicted output
        majority_prediction_string = max(prediction_counter,
                                         key=prediction_counter.get)
        print u'chosen:{} with {} votes\n'.format(
            majority_prediction_string,
            prediction_counter[majority_prediction_string])
        majority_predicted_sequences[joint_index] = string_to_template[
            majority_prediction_string]

    return majority_predicted_sequences
def predict(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, feat_index,
            feature_types, lemmas, feature_dicts):
    test_data = zip(lemmas, feature_dicts)
    predictions = {}
    for lemma, feat_dict in test_data:
        predicted_word = predict_inflection(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict,
                                            alphabet_index, inverse_alphabet_index, feat_index, feature_types)
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predictions[joint_index] = predicted_word

    return predictions
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas,
                      feats, feat_index, feature_types):
    predictions = {}
    for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)):
        predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma,
                                                         feat_dict, alphabet_index, inverse_alphabet_index, feat_index,
                                                         feature_types)

        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predictions[joint_index] = predicted_template

    return predictions
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas,
                      feats, feat_index, feature_types):
    predictions = {}
    for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)):
        predicted_template = predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma,
                                                     feat_dict, alphabet_index, inverse_alphabet_index, feat_index,
                                                     feature_types)

        # index each output by its matching inputs - lemma + features
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predictions[joint_index] = predicted_template

    return predictions
def predict_sequences(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, alphabet_index, inverse_alphabet_index, lemmas,
                      feats, feat_index, feature_types):
    predictions = {}
    for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)):
        predicted_sequence = predict_output_sequence(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, lemma,
                                                     feat_dict, alphabet_index, inverse_alphabet_index, feat_index,
                                                     feature_types)

        # index each output by its matching inputs - lemma + features
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predictions[joint_index] = predicted_sequence

    return predictions
def predict_with_ensemble_majority(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet,
                                   feature_types, hidden_dim, input_dim, inverse_alphabet_index, layers,
                                   test_feat_dicts, test_lemmas, test_words):

    ensemble_model_names = ensemble.split(',')
    print 'ensemble paths:\n'
    print '\n'.join(ensemble_model_names)
    ensemble_models = []

    # load ensemble models
    for ens in ensemble_model_names:
        model, encoder_frnn, encoder_rrnn, decoder_rnn = task1_attention_implementation.load_best_model(alphabet, ens,
                                                                                                        input_dim,
                                                                                                        hidden_dim,
                                                                                                        layers,
                                                                         feature_alphabet, feat_input_dim,
                                                                         feature_types)

        ensemble_models.append((model, encoder_frnn, encoder_rrnn, decoder_rnn))

    # predict the entire test set with each model in the ensemble
    ensemble_predictions = []
    for em in ensemble_models:
        model, encoder_frnn, encoder_rrnn, decoder_rnn = em
        predicted_sequences = predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index,
                                                inverse_alphabet_index, test_lemmas, test_feat_dicts, feat_index,
                                                feature_types)

        ensemble_predictions.append(predicted_sequences)

    # perform voting for each test input - joint_index is a lemma+feats representation
    majority_predicted_sequences = {}
    string_to_template = {}
    test_data = zip(test_lemmas, test_feat_dicts, test_words)
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        prediction_counter = defaultdict(int)
        for ens in ensemble_predictions:
            prediction_str = ''.join(ens[joint_index])
            prediction_counter[prediction_str] += 1
            string_to_template[prediction_str] = ens[joint_index]
            print u'template: {} prediction: {}'.format(ens[joint_index], prediction_str)

        # return the most predicted output
        majority_prediction_string = max(prediction_counter, key=prediction_counter.get)
        print u'chosen:{} with {} votes\n'.format(majority_prediction_string,
                                                  prediction_counter[majority_prediction_string])
        majority_predicted_sequences[joint_index] = string_to_template[majority_prediction_string]

    return majority_predicted_sequences
Ejemplo n.º 17
0
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn,
                      alphabet_index, inverse_alphabet_index, lemmas, feats,
                      feat_index, feature_types):
    predictions = {}
    for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)):
        predicted_template = predict_inflection_template(
            model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict,
            alphabet_index, inverse_alphabet_index, feat_index, feature_types)

        joint_index = lemma + ':' + common.get_morph_string(
            feat_dict, feature_types)
        predictions[joint_index] = predicted_template

    return predictions
def evaluate_model(predicted_templates,
                   source_words,
                   source_feature_dicts,
                   target_words,
                   target_feature_dicts,
                   feature_types,
                   print_results=True):
    if print_results:
        print 'evaluating model...'

    # 2 possible approaches: one - predict template, instantiate, check if equal to word
    # TODO: other option - predict template, generate template using the correct word, check if templates are equal
    test_data = zip(source_words, source_feature_dicts, target_words,
                    target_feature_dicts)
    c = 0
    for i, (source_word, source_feat_dict, target_word,
            target_feat_dict) in enumerate(test_data):
        joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \
                                    + ':' + common.get_morph_string(target_feat_dict, feature_types)
        predicted_word = instantiate_template(predicted_templates[joint_index],
                                              source_word)
        if predicted_word == target_word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_results:
            print 'source word: ' + source_word + ' gold: ' + target_words[i] + ' template:' + ''.join(predicted_templates[joint_index]) \
                  + ' prediction: ' + predicted_word + ' ' + sign

    accuracy = float(c) / len(predicted_templates)

    if print_results:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \
              str(accuracy) + '\n\n'

    return len(predicted_templates), accuracy
def predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas,
                      feats, feat_index, feature_types):
    print 'predicting...'
    predictions = {}
    data_len = len(lemmas)
    for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)):
        predicted_template = predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma,
                                                     feat_dict, alphabet_index, inverse_alphabet_index, feat_index,
                                                     feature_types)
        if i % 1000 == 0 and i > 0:
            print 'predicted {} examples out of {}'.format(i, data_len)

        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predictions[joint_index] = predicted_template

    return predictions
def predict(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index,
            inverse_alphabet_index, feat_index, feature_types, lemmas,
            feature_dicts):
    test_data = zip(lemmas, feature_dicts)
    predictions = {}
    for lemma, feat_dict in test_data:
        predicted_word = predict_inflection(model, encoder_frnn, encoder_rrnn,
                                            decoder_rnn, lemma, feat_dict,
                                            alphabet_index,
                                            inverse_alphabet_index, feat_index,
                                            feature_types)
        joint_index = lemma + ':' + common.get_morph_string(
            feat_dict, feature_types)
        predictions[joint_index] = predicted_word

    return predictions
def predict_nbest_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index,
                          lemmas, feats, feat_index, feature_types, nbest, words):
    predictions = {}
    fix_count = 0
    for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)):
        predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma,
                                                         feat_dict, alphabet_index, inverse_alphabet_index, feat_index,
                                                         feature_types)
        predicted_nbest = predict_nbest_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma,
                                                     feat_dict, alphabet_index, inverse_alphabet_index, feat_index,
                                                     feature_types,nbest)

        # DEBUG:
        greedy_guess = instantiate_template(predicted_template, lemma)
        if words[i] == greedy_guess:
            gsign = 'V'
        else:
            gsign = 'X'

        for j, n in enumerate(predicted_nbest):
            s, p = n
            nbest_guess = instantiate_template(s, lemma)

            if words[i] == nbest_guess:
                nsign = 'V'
            else:
                nsign = 'X'

            if gsign == 'X' and nsign == 'V':
                fix_count += 1
                print str(i) + ' out of ' + str(len(lemmas))
                print lemma + '\n'
                print 'GREEDY: \n' + str(''.join(predicted_template).encode('utf8'))
                print  greedy_guess + ' ' + gsign + '\n'
                print '{0}-BEST:'.format(j+1)
                print str(''.join(s).encode('utf8')) + ' ' + str(p)
                print nbest_guess + ' ' + nsign + '\n'


        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        predictions[joint_index] = predicted_nbest
    print '================================================================'
    print 'beam search fixed {0} out of {1}, {2}%'.format(fix_count, len(lemmas), float(fix_count)/len(lemmas)*100)
    print '================================================================'

    return predictions
def predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn,
                      alphabet_index, inverse_alphabet_index, lemmas, feats,
                      feat_index, feature_types):
    print 'predicting...'
    predictions = {}
    data_len = len(lemmas)
    for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)):
        predicted_template = predict_output_sequence(
            model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict,
            alphabet_index, inverse_alphabet_index, feat_index, feature_types)
        if i % 1000 == 0 and i > 0:
            print 'predicted {} examples out of {}'.format(i, data_len)

        joint_index = lemma + ':' + common.get_morph_string(
            feat_dict, feature_types)
        predictions[joint_index] = predicted_template

    return predictions
def evaluate_predictions(predictions, lemmas, feature_dicts, words, feature_types, print_res=False):
    if print_res:
        print 'evaluating model...'

    test_data = zip(lemmas, feature_dicts, words)
    c = 0
    for i, (lemma, feat_dict, word) in enumerate(test_data):
        joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
        if predictions[joint_index] == word:
            c += 1
            sign = 'V'
        else:
            sign = 'X'
        if print_res:
            print 'lemma: ' + lemma + ' gold: ' + word + ' prediction: ' + predictions[joint_index] + ' ' + sign
    accuracy = float(c) / len(predictions)

    if print_res:
        print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predictions)) + '=' + str(accuracy) + \
              '\n\n'

    return len(predictions), accuracy
Ejemplo n.º 24
0
def main(train_path, dev_path, test_path, results_file_path,
         sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs,
         layers, optimization, regularization, learning_rate, plot, override):
    hyper_params = {
        'INPUT_DIM': input_dim,
        'HIDDEN_DIM': hidden_dim,
        'FEAT_INPUT_DIM': feat_input_dim,
        'EPOCHS': epochs,
        'LAYERS': layers,
        'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN,
        'OPTIMIZATION': optimization,
        'PATIENCE': MAX_PATIENCE,
        'REGULARIZATION': regularization,
        'LEARNING_RATE': learning_rate
    }

    (initial_model, char_lookup, feat_lookup, R, bias, encoder_frnn,
     encoder_rrnn, decoder_rnn, W_c, W__a, U__a, v__a, alphabet_index,
     feat_index, feature_types, inverse_alphabet_index, dev_words, dev_lemmas,
     dev_feat_dicts) = init_model(dev_path, feat_input_dim, hidden_dim,
                                  input_dim, layers, results_file_path,
                                  test_path, train_path)

    # char_lookup = initial_model["char_lookup"]
    # feat_lookup = initial_model["feat_lookup"]

    # "what is learned by the encoder" experiment:
    # get lots of input words (dev set)
    # run blstm encoder on them (encode feats and chars)
    # experiments:
    # we want to understand what's captured/whats more significant: current symbol, context or all?
    # to do so:
    # take the blstm rep. for the same character, same context, different positions. how will it cluster by position?
    # i.e: abbbbbb, babbbb, bbabbbb, bbbabbbb, bbbbabb, bbbbbba...

    # take the blstm rep. for the same character, same position, diff. contexts. how will it cluster by context?
    # aaaabaaaa, bbbbbbbbb, cccbcccc, dddbdddd, eeeebeeee...

    # take the blstm rep. for diff characters, same position, same contexts. how will it cluster by character?
    # aaaaaaaa, aaabaaa, aaacaaa, aaadaaa, aaaeaaa, aaafaaa...

    # other option: take (all?) "natural" (dev) examples, throw on SVD, paint by location, character, context (last one
    #  is more complex but can probably think about something)

    start = 0
    end = len(dev_lemmas) - 1
    encoded_vecs = {}

    index_to_feats_and_lemma = {}

    # get bilstm encoder representation
    for lemma, feats in zip(dev_lemmas[start:end], dev_feat_dicts[start:end]):
        index = common.get_morph_string(feats, feature_types) + lemma
        index_to_feats_and_lemma[index] = (feats, lemma)
        encoded_vecs[index] = soft_attention.encode_feats_and_chars(
            alphabet_index, char_lookup, encoder_frnn, encoder_rrnn,
            feat_index, feat_lookup, feats, feature_types, lemma)
    # get examples (encoder hidden states) by location: 1, 2, 3, 4, 5...
    location_to_vec = {}
    for encoded_rep_index in encoded_vecs:
        encoded_rep = encoded_vecs[encoded_rep_index]
        for location, vec in enumerate(encoded_rep):
            if location in location_to_vec:
                location_to_vec[location].append(vec)
            else:
                location_to_vec[location] = [vec]

    location_labels = []
    vecs = []

    # take 10 samples from each character
    for key in location_to_vec:
        for value in location_to_vec[key][0:100]:
            location_labels.append(key)
            vecs.append(value.vec_value())

    # plot_svd_reduction(hidden_states, location_labels, title='SVD for encoder hidden states by location')

    # get examples (encoder hidden states) by character: א,ב,ג,ד,ה,ו...
    char_to_vec = {}
    char_vecs = []
    char_labels = []
    char_location_labels = []
    current_char_labels = []
    feat_vecs = []
    feat_labels = []
    for encoded_rep_index in encoded_vecs:

        # get bilstm encoding for the sequence
        encoded_rep = encoded_vecs[encoded_rep_index]

        # should skip the feat vecs (?)
        # get matching lemma and features
        feats, lemma = index_to_feats_and_lemma[encoded_rep_index]
        sorted_feats = []
        for feat in sorted(feature_types):
            if feat in feats:
                sorted_feats.append(u'{}:{}'.format(feat, feats[feat]))

        seq_symbols = ['<'] + list(sorted_feats) + list(lemma) + ['>']

        # sort vectors by symbol
        for i, symbol in enumerate(seq_symbols):
            if symbol in lemma:
                char_vecs.append(encoded_rep[i])
                if i > 0:
                    prev_symbol = seq_symbols[i - 1]
                else:
                    prev_symbol = '_'
                if i < len(seq_symbols) - 1:
                    next_symbol = seq_symbols[i + 1]
                else:
                    next_symbol = '_'
                char_labels.append(u'{} ({},{},{})'.format(
                    symbol, prev_symbol, i, next_symbol))
                char_location_labels.append(u'{}'.format(i))
                current_char_labels.append(u'{}'.format(symbol))
            else:
                if symbol in sorted_feats:
                    feat_vecs.append(encoded_rep[i])
                    feat_labels.append(symbol)

            if symbol in char_to_vec:
                char_to_vec[symbol].append(encoded_rep[i])
            else:
                char_to_vec[symbol] = [encoded_rep[i]]

    symbol_labels = []
    vecs = []

    # take 20 samples from each symbol
    for key in char_to_vec:
        for value in char_to_vec[key][0:20]:
            symbol_labels.append(key)
            vecs.append(value.vec_value())

    # plot_svd_reduction(all_hidden_states, symbol_labels, title='SVD for encoder hidden states by symbol')

    char_hidden_states = np.array([v.vec_value() for v in char_vecs])
    # plot_svd_reduction(char_hidden_states[0:100], char_labels[0:100], title='SVD for encoder hidden states by symbol (characters only)')

    plot_svd_reduction(
        char_hidden_states[0:200],
        char_labels[0:200],
        color_labels=char_location_labels[0:200],
        title='SVD for encoder hidden states by location (characters only)')

    plot_svd_reduction(
        char_hidden_states[0:200],
        char_labels[0:200],
        color_labels=current_char_labels[0:200],
        title='SVD for encoder hidden states by character (characters only)')

    plot_svd_reduction(char_hidden_states[0:500],
                       current_char_labels[0:500],
                       color_labels=char_location_labels[0:500],
                       title='Soft Attention - Encoded Inputs by Location')

    plot_svd_reduction(char_hidden_states[0:500],
                       current_char_labels[0:500],
                       color_labels=current_char_labels[0:500],
                       title='Soft Attention - Encoded Inputs by Character')

    feat_hidden_states = np.array([v.vec_value() for v in feat_vecs])
    plot_svd_reduction(
        feat_hidden_states[0:50],
        feat_labels[0:50],
        color_labels=[f[0:4] for f in feat_labels[0:50]],
        title='SVD for encoder hidden states by type (features only)')

    # TODO: get examples (encoder hidden states) by context: after/before א,ב,ג,ד,ה...
    char_embeddings = {}
    char_embeddings_matrix = []
    clean_alphabet_index = {}

    # print SVD for char embeddings
    # workaround to remove feat embeddings from plot
    for char in alphabet_index:
        if not len(char) > 1 and not char.isdigit() and char not in [
                UNK, UNK_FEAT, EPSILON, NULL
        ]:
            clean_alphabet_index[char] = alphabet_index[char]

    for char in clean_alphabet_index:
        char_embeddings[char] = char_lookup[
            clean_alphabet_index[char]].vec_value()
        char_embeddings_matrix.append(
            char_lookup[clean_alphabet_index[char]].vec_value())
    X = np.array(char_embeddings_matrix)

    plot_svd_reduction(X,
                       clean_alphabet_index,
                       title='SVD for character embeddings')

    # print SVD for feat embeddings
    feat_embeddings = {}
    feat_embeddings_matrix = []
    for feat in feat_index:
        feat_embeddings[feat] = feat_lookup[feat_index[feat]].vec_value()
        feat_embeddings_matrix.append(
            feat_lookup[feat_index[feat]].vec_value())
    Y = np.array(feat_embeddings_matrix)

    plot_svd_reduction(Y, feat_index, title='SVD for feature embeddings')

    start = 1000
    end = 1001
    for lemma, feats in zip(dev_lemmas[start:end], dev_feat_dicts[start:end]):
        if len(lemma) < 6:
            plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn,
                                     encoder_rrnn, feat_index, feature_types,
                                     initial_model, inverse_alphabet_index,
                                     dev_path, feat_input_dim, feats,
                                     hidden_dim, hyper_params, input_dim,
                                     layers, results_file_path, test_path,
                                     train_path, lemma)

    return
    # get user input word and features

    feats = {
        u'pos': u'VB',
        u'num': u'S',
        u'per': u'2',
        u'gen': u'M',
        u'binyan': u'HITPAEL',
        u'tense': u'PAST'
    }
    user_input = u'ספר'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn,
                             encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path,
                             feat_input_dim, feats, hidden_dim, hyper_params,
                             input_dim, layers, results_file_path, test_path,
                             train_path, user_input)

    feats = {u'pos': u'JJ', u'num': u'P', u'def': u'DEF', u'gen': u'F'}
    user_input = u'צמחוני'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn,
                             encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path,
                             feat_input_dim, feats, hidden_dim, hyper_params,
                             input_dim, layers, results_file_path, test_path,
                             train_path, user_input)

    feats = {
        u'pos': u'VB',
        u'num': u'S',
        u'gen': u'F',
        u'per': u'3',
        u'tense': u'FUTURE',
        u'binyan': u'PAAL'
    }
    user_input = u'שש'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn,
                             encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path,
                             feat_input_dim, feats, hidden_dim, hyper_params,
                             input_dim, layers, results_file_path, test_path,
                             train_path, user_input)

    # feats = {u'pos': u'NN', u'num': u'P', u'gen': u'F', u'poss_per': u'2', u'poss_gen': u'M', u'poss_num': u'P'}
    feats = {
        u'pos': u'NN',
        u'num': u'P',
        u'gen': u'F',
        u'poss_per': u'2',
        u'poss_gen': u'M',
        u'poss_num': u'P'
    }  # u'tense' : u'FUTURE', u'poss_per': u'2', u'poss_gen': u'M', u'poss_num': u'P'}

    user_input = u'כלב'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn,
                             encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path,
                             feat_input_dim, feats, hidden_dim, hyper_params,
                             input_dim, layers, results_file_path, test_path,
                             train_path, user_input)

    feats = {
        u'pos': u'VB',
        u'num': u'P',
        u'gen': u'M',
        u'per': u'3',
        u'tense': u'FUTURE',
        u'binyan': u'PAAL'
    }
    user_input = u'ישן'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn,
                             encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path,
                             feat_input_dim, feats, hidden_dim, hyper_params,
                             input_dim, layers, results_file_path, test_path,
                             train_path, user_input)

    feats = {
        u'pos': u'VB',
        u'num': u'P',
        u'gen': u'F',
        u'per': u'3',
        u'tense': u'FUTURE',
        u'binyan': u'PAAL'
    }
    user_input = u'ישן'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn,
                             encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path,
                             feat_input_dim, feats, hidden_dim, hyper_params,
                             input_dim, layers, results_file_path, test_path,
                             train_path, user_input)
    print 'Bye!'
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, epochs, layers,
         optimization, feat_input_dim):
    hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'EPOCHS': epochs, 'LAYERS': layers,
                    'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization}

    print 'train path = ' + str(train_path)
    print 'test path =' + str(test_path)
    for param in hyper_params:
        print param + '=' + str(hyper_params[param])

    # load data
    (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(
        train_path)
    (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(
        test_path)
    alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts)

    # used for character dropout
    alphabet.append(NULL)
    alphabet.append(UNK)

    # used during decoding
    alphabet.append(EPSILON)
    alphabet.append(BEGIN_WORD)
    alphabet.append(END_WORD)

    feature_alphabet = common.get_feature_alphabet(train_feat_dicts)
    feature_alphabet.append(UNK_FEAT)

    # add indices to alphabet - used to indicate when copying from lemma to word
    for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]:
        alphabet.append(marker)

    # feat 2 int
    feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet))))

    # char 2 int
    alphabet_index = dict(zip(alphabet, range(0, len(alphabet))))
    inverse_alphabet_index = {index: char for char, index in alphabet_index.items()}

    # cluster the data by POS type (features)
    train_cluster_to_data_indices = common.cluster_data_by_pos(train_feat_dicts)
    test_cluster_to_data_indices = common.cluster_data_by_pos(test_feat_dicts)

    # cluster the data by inflection type (features)
    # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types)
    # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types)

    accuracies = []
    final_results = {}

    # factored model: new model per inflection type
    for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices):

        # get the inflection-specific data
        train_cluster_words = [train_words[i] for i in train_cluster_to_data_indices[cluster_type]]
        if len(train_cluster_words) < 1:
            print 'only ' + str(len(train_cluster_words)) + ' samples for this inflection type. skipping'
            continue
        else:
            print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \
                  str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \
                  str(len(train_cluster_words)) + ' examples'

        # test best model
        try:
            test_cluster_lemmas = [test_lemmas[i] for i in test_cluster_to_data_indices[cluster_type]]
            test_cluster_words = [test_words[i] for i in test_cluster_to_data_indices[cluster_type]]
            test_cluster_feat_dicts = [test_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type]]

            # load best model
            best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(str(cluster_index), alphabet,
                                                                                  results_file_path, input_dim,
                                                                                  hidden_dim, layers,
                                                                                  feature_alphabet, feat_input_dim,
                                                                                  feature_types)

            predicted_templates = task1_joint_structured_inflection.predict_templates(best_model, decoder_rnn,
                                                                                      encoder_frnn, encoder_rrnn,
                                                                                      alphabet_index,
                                                                                      inverse_alphabet_index,
                                                                                      test_cluster_lemmas,
                                                                                      test_cluster_feat_dicts,
                                                                                      feat_index,
                                                                                      feature_types)

            accuracy = task1_joint_structured_inflection.evaluate_model(predicted_templates, test_cluster_lemmas,
                                                                        test_cluster_feat_dicts, test_cluster_words,
                                                                        feature_types, True)
            accuracies.append(accuracy)

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types)
                inflection = task1_joint_structured_inflection.instantiate_template(predicted_templates[joint_index],
                                                                                    test_lemmas[i])
                final_results[i] = (test_lemmas[i], test_feat_dicts[i], inflection)

        except KeyError:
            print 'could not find relevant examples in test data for cluster: ' + cluster_type

    accuracy_vals = [accuracies[i][1] for i in xrange(len(accuracies))]
    macro_avg_accuracy = sum(accuracy_vals) / len(accuracies)
    print 'macro avg accuracy: ' + str(macro_avg_accuracy)

    mic_nom = sum([accuracies[i][0] * accuracies[i][1] for i in xrange(len(accuracies))])
    mic_denom = sum([accuracies[i][0] for i in xrange(len(accuracies))])
    micro_average_accuracy = mic_nom / mic_denom
    print 'micro avg accuracy: ' + str(micro_average_accuracy)

    if 'test' in test_path:
        suffix = '.best.test'
    else:
        suffix = '.best'
    common.write_results_file(hyper_params, micro_average_accuracy, train_path,
                                              test_path, results_file_path + suffix, sigmorphon_root_dir,
                                              final_results)
Ejemplo n.º 26
0
def main(train_path, test_path, results_file_path, sigmorphon_root_dir,
         input_dim, hidden_dim, epochs, layers, optimization, feat_input_dim,
         nbest):
    hyper_params = {
        'INPUT_DIM': input_dim,
        'HIDDEN_DIM': hidden_dim,
        'EPOCHS': epochs,
        'LAYERS': layers,
        'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN,
        'OPTIMIZATION': optimization,
        'NBEST': nbest
    }

    print 'train path = ' + str(train_path)
    print 'test path =' + str(test_path)
    for param in hyper_params:
        print param + '=' + str(hyper_params[param])

    # load data
    (train_target_words, train_source_words, train_target_feat_dicts,
     train_source_feat_dicts) = prepare_sigmorphon_data.load_data(
         train_path, 2)
    (test_target_words, test_source_words, test_target_feat_dicts,
     test_source_feat_dicts) = prepare_sigmorphon_data.load_data(test_path, 2)
    alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(
        train_target_words, train_source_words, train_target_feat_dicts,
        train_source_feat_dicts)

    # used for character dropout
    alphabet.append(NULL)
    alphabet.append(UNK)

    # used during decoding
    alphabet.append(EPSILON)
    alphabet.append(BEGIN_WORD)
    alphabet.append(END_WORD)

    feature_alphabet = common.get_feature_alphabet(train_source_feat_dicts +
                                                   train_target_feat_dicts)
    feature_alphabet.append(UNK_FEAT)

    # add indices to alphabet - used to indicate when copying from lemma to word
    for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]:
        alphabet.append(marker)

    # feat 2 int
    feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet))))

    # char 2 int
    alphabet_index = dict(zip(alphabet, range(0, len(alphabet))))
    inverse_alphabet_index = {
        index: char
        for char, index in alphabet_index.items()
    }

    # cluster the data by POS type (features)
    # TODO: do we need to cluster on both source and target feats?
    #       probably enough to cluster on source here becasue pos will be same
    #       (no derivational morphology in this task)
    # train_cluster_to_data_indices = common.cluster_data_by_pos(train_source_feat_dicts)
    # test_cluster_to_data_indices = common.cluster_data_by_pos(test_source_feat_dicts)

    # cluster the data by inflection type (features)
    # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types)
    # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types)

    # no clustering, single model
    train_cluster_to_data_indices = common.get_single_pseudo_cluster(
        train_source_feat_dicts)
    test_cluster_to_data_indices = common.get_single_pseudo_cluster(
        test_source_feat_dicts)

    accuracies = []
    final_results = {}

    # factored model: new model per inflection type
    for cluster_index, cluster_type in enumerate(
            train_cluster_to_data_indices):

        # get the inflection-specific data
        train_cluster_target_words = [
            train_target_words[i]
            for i in train_cluster_to_data_indices[cluster_type]
        ]
        if len(train_cluster_target_words) < 1:
            print 'only ' + str(
                len(train_cluster_target_words
                    )) + ' samples for this inflection type. skipping'
            continue
        else:
            print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \
                  str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \
                  str(len(train_cluster_target_words)) + ' examples'

        # test best model

        test_cluster_source_words = [
            test_source_words[i]
            for i in test_cluster_to_data_indices[cluster_type]
        ]
        test_cluster_target_words = [
            test_target_words[i]
            for i in test_cluster_to_data_indices[cluster_type]
        ]
        test_cluster_source_feat_dicts = [
            test_source_feat_dicts[i]
            for i in test_cluster_to_data_indices[cluster_type]
        ]
        test_cluster_target_feat_dicts = [
            test_target_feat_dicts[i]
            for i in test_cluster_to_data_indices[cluster_type]
        ]

        # load best model
        best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(
            str(cluster_index), alphabet, results_file_path, input_dim,
            hidden_dim, layers, feature_alphabet, feat_input_dim,
            feature_types)

        lang = train_path.split('/')[-1].replace('-task{0}-train'.format('1'),
                                                 '')

        # handle greedy prediction
        if nbest == 1:
            is_nbest = False
            predicted_templates = task2_ms2s.predict_templates(
                best_model, decoder_rnn, encoder_frnn, encoder_rrnn,
                alphabet_index, inverse_alphabet_index,
                test_cluster_source_words, test_cluster_source_feat_dicts,
                test_cluster_target_feat_dicts, feat_index, feature_types)

            accuracy = task2_ms2s.evaluate_model(
                predicted_templates,
                test_cluster_source_words,
                test_cluster_source_feat_dicts,
                test_cluster_target_words,
                test_cluster_target_feat_dicts,
                feature_types,
                print_results=False)
            accuracies.append(accuracy)
            print '{0} {1} accuracy: {2}'.format(lang, cluster_type,
                                                 accuracy[1])

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i],
                                                                                   feature_types) \
                              + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types)
                inflection = task2_ms2s.instantiate_template(
                    predicted_templates[joint_index], test_source_words[i])
                final_results[i] = (test_source_words[i],
                                    test_source_feat_dicts[i], inflection,
                                    test_target_feat_dicts[i])

            micro_average_accuracy = accuracy[1]

        # handle n-best prediction
        else:
            is_nbest = True

            predicted_nbset_templates = task2_ms2s.predict_nbest_templates(
                best_model, decoder_rnn, encoder_frnn, encoder_rrnn,
                alphabet_index, inverse_alphabet_index,
                test_cluster_source_words, test_cluster_source_feat_dicts,
                test_cluster_target_feat_dicts, feat_index, feature_types,
                nbest, test_cluster_target_words)

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i],
                                                                                   feature_types) \
                              + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types)

                nbest_inflections = []
                templates = [
                    t for (t, p) in predicted_nbset_templates[joint_index]
                ]
                for template in templates:
                    nbest_inflections.append(
                        task2_ms2s.instantiate_template(
                            template, test_source_words[i]))
                final_results[i] = (test_source_words[i],
                                    test_source_feat_dicts[i],
                                    nbest_inflections,
                                    test_target_feat_dicts[i])

            micro_average_accuracy = -1

    if 'test' in test_path:
        suffix = '.best.test'
    else:
        suffix = '.best'

    task2_joint_structured_inflection.write_results_file(
        hyper_params, micro_average_accuracy, train_path, test_path,
        results_file_path + suffix, sigmorphon_root_dir, final_results,
        is_nbest)
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, epochs, layers,
         optimization, feat_input_dim, nbest):
    hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'EPOCHS': epochs, 'LAYERS': layers,
                    'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'NBEST':nbest}

    print 'train path = ' + str(train_path)
    print 'test path =' + str(test_path)
    for param in hyper_params:
        print param + '=' + str(hyper_params[param])

    # load data
    (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(
        train_path)
    (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(
        test_path)
    alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts)

    # used for character dropout
    alphabet.append(NULL)
    alphabet.append(UNK)

    # used during decoding
    alphabet.append(EPSILON)
    alphabet.append(BEGIN_WORD)
    alphabet.append(END_WORD)

    feature_alphabet = common.get_feature_alphabet(train_feat_dicts)
    feature_alphabet.append(UNK_FEAT)

    # add indices to alphabet - used to indicate when copying from lemma to word
    for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]:
        alphabet.append(marker)

    # feat 2 int
    feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet))))

    # char 2 int
    alphabet_index = dict(zip(alphabet, range(0, len(alphabet))))
    inverse_alphabet_index = {index: char for char, index in alphabet_index.items()}

    # cluster the data by POS type (features)
    train_cluster_to_data_indices = common.cluster_data_by_pos(train_feat_dicts)
    test_cluster_to_data_indices = common.cluster_data_by_pos(test_feat_dicts)

    # cluster the data by inflection type (features)
    # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types)
    # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types)

    accuracies = []
    final_results = {}

    # factored model: new model per inflection type
    for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices):

        # get the inflection-specific data
        train_cluster_words = [train_words[i] for i in train_cluster_to_data_indices[cluster_type]]
        if len(train_cluster_words) < 1:
            print 'only ' + str(len(train_cluster_words)) + ' samples for this inflection type. skipping'
            continue
        else:
            print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \
                  str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \
                  str(len(train_cluster_words)) + ' examples'

        # test best model
        test_cluster_lemmas = [test_lemmas[i] for i in test_cluster_to_data_indices[cluster_type]]
        test_cluster_words = [test_words[i] for i in test_cluster_to_data_indices[cluster_type]]
        test_cluster_feat_dicts = [test_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type]]

        # load best model
        best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(str(cluster_index), alphabet,
                                                                              results_file_path, input_dim,
                                                                              hidden_dim, layers,
                                                                              feature_alphabet, feat_input_dim,
                                                                              feature_types)

        lang  = train_path.split('/')[-1].replace('-task{0}-train'.format('1'),'')
        if nbest == 1:
            is_nbest = False
            predicted_templates = task1_joint_structured_inflection_blstm_feedback_fix.predict_templates(
                best_model,
                decoder_rnn,
                encoder_frnn, encoder_rrnn,
                alphabet_index,
                inverse_alphabet_index,
                test_cluster_lemmas,
                test_cluster_feat_dicts,
                feat_index,
                feature_types)

            accuracy = task1_joint_structured_inflection_blstm_feedback_fix.evaluate_model(predicted_templates,
                                                                                       test_cluster_lemmas,
                                                                    test_cluster_feat_dicts, test_cluster_words,
                                                                    feature_types, print_results=False)
            accuracies.append(accuracy)
            print '{0} {1} accuracy: {2}'.format(lang, cluster_type, accuracy[1])

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types)
                inflection = task1_joint_structured_inflection_blstm_feedback_fix.instantiate_template(
                    predicted_templates[joint_index], test_lemmas[i])
                final_results[i] = (test_lemmas[i], test_feat_dicts[i], inflection)

            micro_average_accuracy = accuracy[1]

        else:
            is_nbest = True

            predicted_nbset_templates = task1_joint_structured_inflection_blstm_feedback_fix.predict_nbest_templates(
            best_model,
            decoder_rnn,
            encoder_frnn,
            encoder_rrnn,
            alphabet_index,
            inverse_alphabet_index,
            test_cluster_lemmas,
            test_cluster_feat_dicts,
            feat_index,
            feature_types,
            nbest,
            test_cluster_words)

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types)

                nbest_inflections = []
                templates = [t for (t,p) in predicted_nbset_templates[joint_index]]
                for template in templates:
                    nbest_inflections.append(
                            task1_joint_structured_inflection_blstm_feedback_fix.instantiate_template(
                                template,
                                test_lemmas[i]))
                final_results[i] = (test_lemmas[i], test_feat_dicts[i], nbest_inflections)

            micro_average_accuracy = -1


    if 'test' in test_path:
        suffix = '.best.test'
    else:
        suffix = '.best'

    common.write_results_file(hyper_params,
                              micro_average_accuracy,
                              train_path,
                              test_path,
                              results_file_path + suffix,
                              sigmorphon_root_dir,
                              final_results,
                              is_nbest)
def predict_nbest_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index,
                            source_words,
                            source_feats, target_feats, feat_index, feature_types, nbest, words):
    predictions = {}
    fix_count = 0
    for i, (source_word, source_feat_dict, target_feat_dict) in enumerate(
            zip(source_words, source_feats, target_feats)):
        predicted_template = predict_inflection_template(model,
                                                         encoder_frnn,
                                                         encoder_rrnn,
                                                         decoder_rnn,
                                                         source_word,
                                                         source_feat_dict,
                                                         target_feat_dict,
                                                         alphabet_index,
                                                         inverse_alphabet_index,
                                                         feat_index,
                                                         feature_types)

        predicted_nbest = predict_nbest_template(model,
                                                 encoder_frnn,
                                                 encoder_rrnn,
                                                 decoder_rnn,
                                                 source_word,
                                                 source_feat_dict,
                                                 target_feat_dict,
                                                 alphabet_index,
                                                 inverse_alphabet_index,
                                                 feat_index,
                                                 feature_types,
                                                 nbest)

        # DEBUG:
        greedy_guess = instantiate_template(predicted_template, source_word)
        if words[i] == greedy_guess:
            gsign = 'V'
        else:
            gsign = 'X'

        for j, n in enumerate(predicted_nbest):
            s, p = n
            nbest_guess = instantiate_template(s, source_word)

            if words[i] == nbest_guess:
                nsign = 'V'
            else:
                nsign = 'X'

            if gsign == 'X' and nsign == 'V':
                fix_count += 1
                print str(i) + ' out of ' + str(len(source_words))
                print source_word.encode('utf8') + '\n'
                encoded_template = [c.encode('utf8') for c in predicted_template]
                joined = ''.join(encoded_template)
                print 'GREEDY: \n' + joined
                print  greedy_guess.encode('utf8') + ' ' + gsign + '\n'
                print u'{0}-BEST:'.format(j + 1)
                print str(''.join(s).encode('utf8')) + ' ' + str(p)
                print nbest_guess.encode('utf8') + ' ' + nsign + '\n'

        joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \
                      + ':' + common.get_morph_string(target_feat_dict, feature_types)

        predictions[joint_index] = predicted_nbest

    print '================================================================'
    print 'beam search fixed {0} out of {1}, {2}%'.format(fix_count,
                                                          len(source_words),
                                                          float(fix_count) / len(source_words) * 100)
    print '================================================================'

    return predictions
def main(train_path, dev_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim,
         epochs, layers, optimization, regularization, learning_rate, plot, override):
    hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim,
                    'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN,
                    'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization,
                    'LEARNING_RATE': learning_rate}

    (initial_model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, W_c, W__a, U__a, v__a,
     alphabet_index, feat_index, feature_types,
     inverse_alphabet_index, dev_words, dev_lemmas, dev_feat_dicts) = init_model(dev_path, feat_input_dim, hidden_dim,
                                                                                 input_dim, layers, results_file_path,
                                                                                 test_path, train_path)

    # char_lookup = initial_model["char_lookup"]
    # feat_lookup = initial_model["feat_lookup"]

    # "what is learned by the encoder" experiment:
    # get lots of input words (dev set)
    # run blstm encoder on them (encode feats and chars)
    # experiments:
    # we want to understand what's captured/whats more significant: current symbol, context or all?
    # to do so:
    # take the blstm rep. for the same character, same context, different positions. how will it cluster by position?
    # i.e: abbbbbb, babbbb, bbabbbb, bbbabbbb, bbbbabb, bbbbbba...

    # take the blstm rep. for the same character, same position, diff. contexts. how will it cluster by context?
    # aaaabaaaa, bbbbbbbbb, cccbcccc, dddbdddd, eeeebeeee...

    # take the blstm rep. for diff characters, same position, same contexts. how will it cluster by character?
    # aaaaaaaa, aaabaaa, aaacaaa, aaadaaa, aaaeaaa, aaafaaa...

    # other option: take (all?) "natural" (dev) examples, throw on SVD, paint by location, character, context (last one
    #  is more complex but can probably think about something)

    start = 0
    end = len(dev_lemmas) - 1
    encoded_vecs = {}

    index_to_feats_and_lemma = {}

    # get bilstm encoder representation
    for lemma, feats in zip(dev_lemmas[start:end], dev_feat_dicts[start:end]):
        index = common.get_morph_string(feats, feature_types) + lemma
        index_to_feats_and_lemma[index] = (feats, lemma)
        encoded_vecs[index] = soft_attention.encode_feats_and_chars(alphabet_index, char_lookup,
                                                                    encoder_frnn, encoder_rrnn,
                                                                    feat_index, feat_lookup, feats,
                                                                    feature_types, lemma)
    # get examples (encoder hidden states) by location: 1, 2, 3, 4, 5...
    location_to_vec = {}
    for encoded_rep_index in encoded_vecs:
        encoded_rep =  encoded_vecs[encoded_rep_index]
        for location, vec in enumerate(encoded_rep):
            if location in location_to_vec:
                location_to_vec[location].append(vec)
            else:
                location_to_vec[location] = [vec]

    location_labels = []
    vecs = []

    # take 10 samples from each character
    for key in location_to_vec:
        for value in location_to_vec[key][0:100]:
            location_labels.append(key)
            vecs.append(value.vec_value())

    # plot_svd_reduction(hidden_states, location_labels, title='SVD for encoder hidden states by location')

    # get examples (encoder hidden states) by character: א,ב,ג,ד,ה,ו...
    char_to_vec = {}
    char_vecs = []
    char_labels = []
    char_location_labels = []
    current_char_labels = []
    feat_vecs = []
    feat_labels = []
    for encoded_rep_index in encoded_vecs:

        # get bilstm encoding for the sequence
        encoded_rep = encoded_vecs[encoded_rep_index]

        # should skip the feat vecs (?)
        # get matching lemma and features
        feats, lemma = index_to_feats_and_lemma[encoded_rep_index]
        sorted_feats = []
        for feat in sorted(feature_types):
            if feat in feats:
                sorted_feats.append(u'{}:{}'.format(feat, feats[feat]))

        seq_symbols = ['<']  + list(sorted_feats) + list(lemma) + ['>']

        # sort vectors by symbol
        for i, symbol in enumerate(seq_symbols):
            if symbol in lemma:
                char_vecs.append(encoded_rep[i])
                if i > 0:
                    prev_symbol = seq_symbols[i-1]
                else:
                    prev_symbol = '_'
                if i < len(seq_symbols) - 1:
                    next_symbol = seq_symbols[i+1]
                else:
                    next_symbol = '_'
                char_labels.append(u'{} ({},{},{})'.format(symbol, prev_symbol, i, next_symbol))
                char_location_labels.append(u'{}'.format(i))
                current_char_labels.append(u'{}'.format(symbol))
            else:
                if symbol in sorted_feats:
                    feat_vecs.append(encoded_rep[i])
                    feat_labels.append(symbol)

            if symbol in char_to_vec:
                char_to_vec[symbol].append(encoded_rep[i])
            else:
                char_to_vec[symbol] = [encoded_rep[i]]

    symbol_labels = []
    vecs = []

    # take 20 samples from each symbol
    for key in char_to_vec:
        for value in char_to_vec[key][0:20]:
            symbol_labels.append(key)
            vecs.append(value.vec_value())

    # plot_svd_reduction(all_hidden_states, symbol_labels, title='SVD for encoder hidden states by symbol')

    char_hidden_states = np.array([v.vec_value() for v in char_vecs])
    # plot_svd_reduction(char_hidden_states[0:100], char_labels[0:100], title='SVD for encoder hidden states by symbol (characters only)')

    plot_svd_reduction(char_hidden_states[0:200], char_labels[0:200], color_labels=char_location_labels[0:200],
                       title='SVD for encoder hidden states by location (characters only)')

    plot_svd_reduction(char_hidden_states[0:200], char_labels[0:200], color_labels=current_char_labels[0:200],
                       title='SVD for encoder hidden states by character (characters only)')

    plot_svd_reduction(char_hidden_states[0:500], current_char_labels[0:500], color_labels=char_location_labels[0:500],
                       title='Soft Attention - Encoded Inputs by Location')


    plot_svd_reduction(char_hidden_states[0:500], current_char_labels[0:500], color_labels=current_char_labels[0:500],
                       title='Soft Attention - Encoded Inputs by Character')

    feat_hidden_states = np.array([v.vec_value() for v in feat_vecs])
    plot_svd_reduction(feat_hidden_states[0:50], feat_labels[0:50],
                       color_labels=[f[0:4] for f in feat_labels[0:50]],
                       title = 'SVD for encoder hidden states by type (features only)')



    # TODO: get examples (encoder hidden states) by context: after/before א,ב,ג,ד,ה...
    char_embeddings = {}
    char_embeddings_matrix = []
    clean_alphabet_index = {}

    # print SVD for char embeddings
    # workaround to remove feat embeddings from plot
    for char in alphabet_index:
        if not len(char) > 1 and not char.isdigit() and char not in [UNK, UNK_FEAT, EPSILON, NULL]:
            clean_alphabet_index[char] = alphabet_index[char]

    for char in clean_alphabet_index:
        char_embeddings[char] = char_lookup[clean_alphabet_index[char]].vec_value()
        char_embeddings_matrix.append(char_lookup[clean_alphabet_index[char]].vec_value())
    X = np.array(char_embeddings_matrix)

    plot_svd_reduction(X, clean_alphabet_index, title = 'SVD for character embeddings')

    # print SVD for feat embeddings
    feat_embeddings = {}
    feat_embeddings_matrix = []
    for feat in feat_index:
        feat_embeddings[feat] = feat_lookup[feat_index[feat]].vec_value()
        feat_embeddings_matrix.append(feat_lookup[feat_index[feat]].vec_value())
    Y = np.array(feat_embeddings_matrix)

    plot_svd_reduction(Y, feat_index, title = 'SVD for feature embeddings')

    start = 1000
    end = 1001
    for lemma, feats in zip(dev_lemmas[start:end], dev_feat_dicts[start:end]):
        if len(lemma) < 6:
            plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types,
                                 initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim,
                                 hyper_params, input_dim, layers, results_file_path, test_path, train_path, lemma)

    return
    # get user input word and features

    feats = {u'pos': u'VB', u'num': u'S', u'per': u'2', u'gen': u'M', u'binyan': u'HITPAEL', u'tense': u'PAST'}
    user_input = u'ספר'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim,
                             hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input)

    feats = {u'pos': u'JJ', u'num': u'P', u'def': u'DEF', u'gen': u'F'}
    user_input = u'צמחוני'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim,
                             hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input)

    feats = {u'pos': u'VB', u'num': u'S', u'gen': u'F', u'per': u'3', u'tense': u'FUTURE', u'binyan': u'PAAL'}
    user_input = u'שש'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim,
                             hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input)

    # feats = {u'pos': u'NN', u'num': u'P', u'gen': u'F', u'poss_per': u'2', u'poss_gen': u'M', u'poss_num': u'P'}
    feats = {u'pos': u'NN', u'num': u'P', u'gen': u'F', u'poss_per': u'2', u'poss_gen': u'M',
             u'poss_num': u'P'}  # u'tense' : u'FUTURE', u'poss_per': u'2', u'poss_gen': u'M', u'poss_num': u'P'}

    user_input = u'כלב'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim,
                             hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input)

    feats = {u'pos': u'VB', u'num': u'P', u'gen': u'M', u'per': u'3', u'tense': u'FUTURE', u'binyan': u'PAAL'}
    user_input = u'ישן'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim,
                             hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input)

    feats = {u'pos': u'VB', u'num': u'P', u'gen': u'F', u'per': u'3', u'tense': u'FUTURE', u'binyan': u'PAAL'}
    user_input = u'ישן'
    plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types,
                             initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim,
                             hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input)
    print 'Bye!'
def main(train_path, dev_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim,
         epochs, layers, optimization, regularization, learning_rate, plot, override, eval_only, ensemble):
    hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim,
                    'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN,
                    'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization,
                    'LEARNING_RATE': learning_rate}

    print 'train path = ' + str(train_path)
    print 'test path =' + str(test_path)
    for param in hyper_params:
        print param + '=' + str(hyper_params[param])

    # load train and test data
    (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path)
    (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path)
    (dev_words, dev_lemmas, dev_feat_dicts) = prepare_sigmorphon_data.load_data(dev_path)
    alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts)

    # used for character dropout
    alphabet.append(NULL)
    alphabet.append(UNK)

    # used during decoding
    alphabet.append(EPSILON)
    alphabet.append(BEGIN_WORD)
    alphabet.append(END_WORD)

    # add indices to alphabet - used to indicate when copying from lemma to word
    for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]:
        alphabet.append(marker)

    # char 2 int
    alphabet_index = dict(zip(alphabet, range(0, len(alphabet))))
    inverse_alphabet_index = {index: char for char, index in alphabet_index.items()}

    # feat 2 int
    feature_alphabet = common.get_feature_alphabet(train_feat_dicts)
    feature_alphabet.append(UNK_FEAT)
    feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet))))

    model_file_name = results_file_path + '_bestmodel.txt'
    if os.path.isfile(model_file_name) and not override:
        print 'loading existing model from {}'.format(model_file_name)
        model, encoder_frnn, encoder_rrnn, decoder_rnn = task1_attention_implementation.load_best_model(alphabet,
                                                                         results_file_path, input_dim,
                                                                         hidden_dim, layers, feature_alphabet,
                                                                         feat_input_dim, feature_types)
        print 'loaded existing model successfully'
    else:
        print 'could not find existing model or explicit override was requested. starting training from scratch...'
        model, encoder_frnn, encoder_rrnn, decoder_rnn = build_model(alphabet, input_dim, hidden_dim, layers,
                                                                     feature_types, feat_input_dim, feature_alphabet)
    if not eval_only:
        # start training
        trained_model, last_epoch, best_epoch = train_model(model, encoder_frnn, encoder_rrnn, decoder_rnn,
                                                            train_lemmas, train_feat_dicts, train_words, dev_lemmas,
                                                            dev_feat_dicts, dev_words, alphabet_index,
                                                            inverse_alphabet_index, epochs, optimization,
                                                            results_file_path, feat_index, feature_types, plot)
        model = trained_model
        print 'last epoch is {}'.format(last_epoch)
        print 'best epoch is {}'.format(best_epoch)
        print 'finished training'
    else:
        print 'skipped training, evaluating on test set...'

    if ensemble:
        predicted_sequences = predict_with_ensemble_majority(alphabet, alphabet_index, ensemble, feat_index,
                                                             feat_input_dim, feature_alphabet, feature_types,
                                                             hidden_dim, input_dim, inverse_alphabet_index, layers,
                                                             test_feat_dicts, test_lemmas, test_words)
    else:
        predicted_sequences = predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index,
                                                inverse_alphabet_index, test_lemmas, test_feat_dicts, feat_index,
                                                feature_types)
    if len(predicted_sequences) > 0:
        # evaluate last model on test
        amount, accuracy = evaluate_model(predicted_sequences, test_lemmas, test_feat_dicts, test_words, feature_types,
                                          print_results=False)
        print 'initial eval: {}% accuracy'.format(accuracy)

        final_results = {}
        for i in xrange(len(test_lemmas)):
            joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types)
            inflection = predicted_sequences[joint_index]
            final_results[i] = (test_lemmas[i], test_feat_dicts[i], ''.join(inflection))

        # evaluate best models
        common.write_results_file_and_evaluate_externally(hyper_params, accuracy, train_path, test_path,
                                                          results_file_path + '.external_eval.txt', sigmorphon_root_dir,
                                                          final_results)
    return
def evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types,
                  hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path,
                  sigmorphon_root_dir, test_feat_dicts, test_lemmas, test_path,
                  test_words, train_path, print_results=False):
    print "<<<<<<<<<<<<<<<<<< DEBUG ==>evaluate ndst"
    accuracies = []
    final_results = {}
    if ensemble:
        # load ensemble models
        ensemble_model_names = ensemble.split(',')
        print 'ensemble paths:\n'
        print '\n'.join(ensemble_model_names)
        ensemble_models = []
        for ens in ensemble_model_names:
            model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(
                alphabet,
                ens,
                input_dim,
                hidden_dim,
                layers,
                feature_alphabet,
                feat_input_dim,
                feature_types)

            ensemble_models.append((model, encoder_frnn, encoder_rrnn, decoder_rnn))

        # predict the entire test set with each model in the ensemble
        print 'predicting...'
        ensemble_predictions = []
        count = 0
        for em in ensemble_models:
            count += 1
            model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn = em
            predicted_sequences = predict_sequences(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn,
                                                    alphabet_index,
                                                    inverse_alphabet_index,
                                                    test_lemmas,
                                                    test_feat_dicts,
                                                    feat_index,
                                                    feature_types)
            ensemble_predictions.append(predicted_sequences)
            print 'finished to predict with ensemble: {}/{}'.format(count, len(ensemble_model_names))

        predicted_sequences = {}
        string_to_sequence = {}

        # perform voting for each test input - joint_index is a lemma+feats representation
        test_data = zip(test_lemmas, test_feat_dicts, test_words)
        for i, (lemma, feat_dict, word) in enumerate(test_data):
            joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
            prediction_counter = defaultdict(int)

            # count votes
            for en in ensemble_predictions:
                prediction_str = ''.join(en[joint_index]).replace(STEP, '')
                prediction_counter[prediction_str] += 1
                string_to_sequence[prediction_str] = en[joint_index]
                if print_results:
                    print 'template: {} prediction: {}'.format(en[joint_index].encode('utf8'),
                                                               prediction_str.encode('utf8'))

            # return the most predicted output
            predicted_sequence_string = max(prediction_counter, key=prediction_counter.get)

            # hack: if chosen without majority, pick shortest prediction
            if prediction_counter[predicted_sequence_string] == 1:
                predicted_sequence_string = min(prediction_counter, key=len)

            if print_results:
                print 'chosen:{} with {} votes\n'.format(predicted_sequence_string.encode('utf8'),
                                                         prediction_counter[predicted_sequence_string])

            predicted_sequences[joint_index] = string_to_sequence[predicted_sequence_string]

            # progress indication
            sys.stdout.write("\r%d%%" % (float(i) / len(test_lemmas) * 100))
            sys.stdout.flush()
    else:
        # load best model - no ensemble
        best_model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(alphabet,
                                                                              results_file_path, input_dim,
                                                                              hidden_dim, layers,
                                                                              feature_alphabet, feat_input_dim,
                                                                              feature_types)
        try:
            print "predicting"
            predicted_sequences = predict_sequences(best_model,
                                                    char_lookup, feat_lookup, R, bias, encoder_frnn,
                                                    encoder_rrnn, decoder_rnn,
                                                    alphabet_index,
                                                    inverse_alphabet_index,
                                                    test_lemmas,
                                                    test_feat_dicts,
                                                    feat_index,
                                                    feature_types)
        except Exception as e:
            print "except1!"
            print e
            traceback.print_exc()

    # run internal evaluation
    try:
        accuracy = evaluate_model(predicted_sequences,
                                  test_lemmas,
                                  test_feat_dicts,
                                  test_words,
                                  feature_types,
                                  print_results=False)
        accuracies.append(accuracy)
    except Exception as e:
        print "except2!"
        print e
        traceback.print_exc()

    # get predicted_sequences in the same order they appeared in the original file
    # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
    for i, lemma in enumerate(test_lemmas):
        joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types)
        inflection = ''.join(predicted_sequences[joint_index]).replace(STEP, '')
        final_results[i] = (test_lemmas[i], test_feat_dicts[i], inflection)

    accuracy_vals = [accuracies[i][1] for i in xrange(len(accuracies))]
    macro_avg_accuracy = sum(accuracy_vals) / len(accuracies)
    print 'macro avg accuracy: ' + str(macro_avg_accuracy)

    mic_nom = sum([accuracies[i][0] * accuracies[i][1] for i in xrange(len(accuracies))])
    mic_denom = sum([accuracies[i][0] for i in xrange(len(accuracies))])
    micro_average_accuracy = mic_nom / mic_denom
    print 'micro avg accuracy: ' + str(micro_average_accuracy)

    if 'test' in test_path:
        suffix = '.best.test'
    else:
        suffix = '.best'

    common.write_results_file_and_evaluate_externally(hyper_params, micro_average_accuracy, train_path,
                                                      test_path, results_file_path + suffix, sigmorphon_root_dir,
                                                      final_results)
def main(train_path, dev_path, test_path, results_file_path,
         sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs,
         layers, optimization, regularization, learning_rate, plot, override,
         eval_only, ensemble):
    hyper_params = {
        'INPUT_DIM': input_dim,
        'HIDDEN_DIM': hidden_dim,
        'FEAT_INPUT_DIM': feat_input_dim,
        'EPOCHS': epochs,
        'LAYERS': layers,
        'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN,
        'OPTIMIZATION': optimization,
        'PATIENCE': MAX_PATIENCE,
        'REGULARIZATION': regularization,
        'LEARNING_RATE': learning_rate
    }

    print 'train path = ' + str(train_path)
    print 'test path =' + str(test_path)
    for param in hyper_params:
        print param + '=' + str(hyper_params[param])

    # load train and test data
    (train_words, train_lemmas,
     train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path)
    (test_words, test_lemmas,
     test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path)
    (dev_words, dev_lemmas,
     dev_feat_dicts) = prepare_sigmorphon_data.load_data(dev_path)
    alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(
        train_words, train_lemmas, train_feat_dicts)

    # used for character dropout
    alphabet.append(NULL)
    alphabet.append(UNK)

    # used during decoding
    alphabet.append(EPSILON)
    alphabet.append(BEGIN_WORD)
    alphabet.append(END_WORD)

    # add indices to alphabet - used to indicate when copying from lemma to word
    for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]:
        alphabet.append(marker)

    # char 2 int
    alphabet_index = dict(zip(alphabet, range(0, len(alphabet))))
    inverse_alphabet_index = {
        index: char
        for char, index in alphabet_index.items()
    }

    # feat 2 int
    feature_alphabet = common.get_feature_alphabet(train_feat_dicts)
    feature_alphabet.append(UNK_FEAT)
    feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet))))

    model_file_name = results_file_path + '_bestmodel.txt'
    if os.path.isfile(model_file_name) and not override:
        print 'loading existing model from {}'.format(model_file_name)
        model, encoder_frnn, encoder_rrnn, decoder_rnn = task1_attention_implementation.load_best_model(
            alphabet, results_file_path, input_dim, hidden_dim, layers,
            feature_alphabet, feat_input_dim, feature_types)
        print 'loaded existing model successfully'
    else:
        print 'could not find existing model or explicit override was requested. starting training from scratch...'
        model, encoder_frnn, encoder_rrnn, decoder_rnn = build_model(
            alphabet, input_dim, hidden_dim, layers, feature_types,
            feat_input_dim, feature_alphabet)
    if not eval_only:
        # start training
        trained_model, last_epoch, best_epoch = train_model(
            model, encoder_frnn, encoder_rrnn, decoder_rnn, train_lemmas,
            train_feat_dicts, train_words, dev_lemmas, dev_feat_dicts,
            dev_words, alphabet_index, inverse_alphabet_index, epochs,
            optimization, results_file_path, feat_index, feature_types, plot)
        model = trained_model
        print 'last epoch is {}'.format(last_epoch)
        print 'best epoch is {}'.format(best_epoch)
        print 'finished training'
    else:
        print 'skipped training, evaluating on test set...'

    if ensemble:
        predicted_sequences = predict_with_ensemble_majority(
            alphabet, alphabet_index, ensemble, feat_index, feat_input_dim,
            feature_alphabet, feature_types, hidden_dim, input_dim,
            inverse_alphabet_index, layers, test_feat_dicts, test_lemmas,
            test_words)
    else:
        predicted_sequences = predict_sequences(model, decoder_rnn,
                                                encoder_frnn, encoder_rrnn,
                                                alphabet_index,
                                                inverse_alphabet_index,
                                                test_lemmas, test_feat_dicts,
                                                feat_index, feature_types)
    if len(predicted_sequences) > 0:
        # evaluate last model on test
        amount, accuracy = evaluate_model(predicted_sequences,
                                          test_lemmas,
                                          test_feat_dicts,
                                          test_words,
                                          feature_types,
                                          print_results=False)
        print 'initial eval: {}% accuracy'.format(accuracy)

        final_results = {}
        for i in xrange(len(test_lemmas)):
            joint_index = test_lemmas[i] + ':' + common.get_morph_string(
                test_feat_dicts[i], feature_types)
            inflection = predicted_sequences[joint_index]
            final_results[i] = (test_lemmas[i], test_feat_dicts[i],
                                ''.join(inflection))

        # evaluate best models
        common.write_results_file_and_evaluate_externally(
            hyper_params, accuracy, train_path, test_path,
            results_file_path + '.external_eval.txt', sigmorphon_root_dir,
            final_results)
    return
Ejemplo n.º 33
0
def main(train_path, test_path, results_file_path, sigmorphon_root_dir,
         input_dim, hidden_dim, epochs, layers, optimization, feat_input_dim):
    hyper_params = {
        'INPUT_DIM': input_dim,
        'HIDDEN_DIM': hidden_dim,
        'EPOCHS': epochs,
        'LAYERS': layers,
        'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN,
        'OPTIMIZATION': optimization
    }

    print 'train path = ' + str(train_path)
    print 'test path =' + str(test_path)
    for param in hyper_params:
        print param + '=' + str(hyper_params[param])

    # load data
    (train_words, train_lemmas,
     train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path)
    (test_words, test_lemmas,
     test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path)
    alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(
        train_words, train_lemmas, train_feat_dicts)

    # used for character dropout
    alphabet.append(NULL)
    alphabet.append(UNK)

    # used during decoding
    alphabet.append(EPSILON)
    alphabet.append(BEGIN_WORD)
    alphabet.append(END_WORD)

    feature_alphabet = common.get_feature_alphabet(train_feat_dicts)
    feature_alphabet.append(UNK_FEAT)

    # add indices to alphabet - used to indicate when copying from lemma to word
    for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]:
        alphabet.append(marker)

    # feat 2 int
    feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet))))

    # char 2 int
    alphabet_index = dict(zip(alphabet, range(0, len(alphabet))))
    inverse_alphabet_index = {
        index: char
        for char, index in alphabet_index.items()
    }

    # cluster the data by POS type (features)
    train_cluster_to_data_indices = common.cluster_data_by_pos(
        train_feat_dicts)
    test_cluster_to_data_indices = common.cluster_data_by_pos(test_feat_dicts)

    # cluster the data by inflection type (features)
    # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types)
    # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types)

    accuracies = []
    final_results = {}

    # factored model: new model per inflection type
    for cluster_index, cluster_type in enumerate(
            train_cluster_to_data_indices):

        # get the inflection-specific data
        train_cluster_words = [
            train_words[i] for i in train_cluster_to_data_indices[cluster_type]
        ]
        if len(train_cluster_words) < 1:
            print 'only ' + str(
                len(train_cluster_words
                    )) + ' samples for this inflection type. skipping'
            continue
        else:
            print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \
                  str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \
                  str(len(train_cluster_words)) + ' examples'

        # test best model
        try:
            test_cluster_lemmas = [
                test_lemmas[i]
                for i in test_cluster_to_data_indices[cluster_type]
            ]
            test_cluster_words = [
                test_words[i]
                for i in test_cluster_to_data_indices[cluster_type]
            ]
            test_cluster_feat_dicts = [
                test_feat_dicts[i]
                for i in test_cluster_to_data_indices[cluster_type]
            ]

            # load best model
            best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(
                str(cluster_index), alphabet, results_file_path, input_dim,
                hidden_dim, layers, feature_alphabet, feat_input_dim,
                feature_types)

            predicted_templates = task1_joint_structured_inflection_feedback_fix.predict_templates(
                best_model, decoder_rnn, encoder_frnn, encoder_rrnn,
                alphabet_index, inverse_alphabet_index, test_cluster_lemmas,
                test_cluster_feat_dicts, feat_index, feature_types)

            accuracy = task1_joint_structured_inflection_feedback_fix.evaluate_model(
                predicted_templates,
                test_cluster_lemmas,
                test_cluster_feat_dicts,
                test_cluster_words,
                feature_types,
                print_results=False)
            accuracies.append(accuracy)

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_lemmas[i] + ':' + common.get_morph_string(
                    test_feat_dicts[i], feature_types)
                inflection = task1_joint_structured_inflection_feedback_fix.instantiate_template(
                    predicted_templates[joint_index], test_lemmas[i])
                final_results[i] = (test_lemmas[i], test_feat_dicts[i],
                                    inflection)

        except KeyError:
            print 'could not find relevant examples in test data for cluster: ' + cluster_type

    accuracy_vals = [accuracies[i][1] for i in xrange(len(accuracies))]
    macro_avg_accuracy = sum(accuracy_vals) / len(accuracies)
    print 'macro avg accuracy: ' + str(macro_avg_accuracy)

    mic_nom = sum(
        [accuracies[i][0] * accuracies[i][1] for i in xrange(len(accuracies))])
    mic_denom = sum([accuracies[i][0] for i in xrange(len(accuracies))])
    micro_average_accuracy = mic_nom / mic_denom
    print 'micro avg accuracy: ' + str(micro_average_accuracy)

    if 'test' in test_path:
        suffix = '.best.test'
    else:
        suffix = '.best'
    common.write_results_file_and_evaluate_externally(
        hyper_params, micro_average_accuracy, train_path, test_path,
        results_file_path + suffix, sigmorphon_root_dir, final_results)
def evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types,
                  hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path,
                  sigmorphon_root_dir, test_cluster_to_data_indices, test_feat_dicts, test_lemmas, test_path,
                  test_words, train_cluster_to_data_indices, train_path, train_words):
    accuracies = []
    final_results = {}
    # factored model: new model per inflection type
    for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices):

        # get the inflection-specific data
        train_cluster_words = [train_words[i] for i in train_cluster_to_data_indices[cluster_type]]
        if len(train_cluster_words) < 1:
            print 'only {} samples for this inflection type. skipping'.format(str(len(train_cluster_words)))
            continue
        else:
            print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \
                  str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \
                  str(len(train_cluster_words)) + ' examples'

        # test best model
        try:
            test_cluster_lemmas = [test_lemmas[i] for i in test_cluster_to_data_indices[cluster_type]]
            test_cluster_words = [test_words[i] for i in test_cluster_to_data_indices[cluster_type]]
            test_cluster_feat_dicts = [test_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type]]

            if ensemble:
                # load ensemble models
                ensemble_model_names = ensemble.split(',')
                print 'ensemble paths:\n'
                print '\n'.join(ensemble_model_names)
                ensemble_models = []
                for ens in ensemble_model_names:
                    model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(
                        str(cluster_index),
                        alphabet,
                        ens,
                        input_dim,
                        hidden_dim,
                        layers,
                        feature_alphabet,
                        feat_input_dim,
                        feature_types)

                    ensemble_models.append((model, encoder_frnn, encoder_rrnn, decoder_rnn))

                # predict the entire test set with each model in the ensemble
                ensemble_predictions = []
                for em in ensemble_models:
                    model, encoder_frnn, encoder_rrnn, decoder_rnn = em
                    predicted_templates = predict_templates(model, decoder_rnn,
                                                            encoder_frnn,
                                                            encoder_rrnn,
                                                            alphabet_index,
                                                            inverse_alphabet_index,
                                                            test_cluster_lemmas,
                                                            test_cluster_feat_dicts,
                                                            feat_index,
                                                            feature_types)
                    ensemble_predictions.append(predicted_templates)

                predicted_templates = {}
                string_to_template = {}

                # perform voting for each test input - joint_index is a lemma+feats representation
                test_data = zip(test_cluster_lemmas, test_cluster_feat_dicts, test_cluster_words)
                for i, (lemma, feat_dict, word) in enumerate(test_data):
                    joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types)
                    prediction_counter = defaultdict(int)
                    for ens in ensemble_predictions:
                        prediction_str = ''.join(instantiate_template(ens[joint_index], lemma))
                        prediction_counter[prediction_str] += 1
                        string_to_template[prediction_str] = ens[joint_index]
                        print u'template: {} prediction: {}'.format(ens[joint_index], prediction_str)

                    # return the most predicted output
                    predicted_template_string = max(prediction_counter, key=prediction_counter.get)

                    # hack: if chosen without majority, pick shortest prediction
                    if prediction_counter[predicted_template_string] == 1:
                        predicted_template_string = min(prediction_counter, key=len)

                    print u'chosen:{} with {} votes\n'.format(predicted_template_string,
                                                              prediction_counter[predicted_template_string])
                    predicted_templates[joint_index] = string_to_template[predicted_template_string]

                    # progress indication
                    sys.stdout.write("\r%d%%" % (float(i) / len(test_cluster_lemmas) * 100))
                    sys.stdout.flush()
                    ##

            else:
                # load best model - no ensemble
                best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(
                    str(cluster_index), alphabet,
                    results_file_path, input_dim,
                    hidden_dim, layers,
                    feature_alphabet, feat_input_dim,
                    feature_types)
                print 'starting to predict for cluster: {}'.format(cluster_type)
                try:
                    predicted_templates = predict_templates(best_model,
                                                            decoder_rnn,
                                                            encoder_frnn,
                                                            encoder_rrnn,
                                                            alphabet_index,
                                                            inverse_alphabet_index,
                                                            test_cluster_lemmas,
                                                            test_cluster_feat_dicts,
                                                            feat_index,
                                                            feature_types)
                except Exception as e:
                    print e
                    traceback.print_exc()

            print 'evaluating predictions for cluster: {}'.format(cluster_type)
            try:
                accuracy = evaluate_model(predicted_templates,
                                          test_cluster_lemmas,
                                          test_cluster_feat_dicts,
                                          test_cluster_words,
                                          feature_types,
                                          print_results=True)
                accuracies.append(accuracy)
            except Exception as e:
                print e
                traceback.print_exc()

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types)
                inflection = instantiate_template(predicted_templates[joint_index],
                                                  test_lemmas[i])

                final_results[i] = (test_lemmas[i], test_feat_dicts[i], inflection)

        except KeyError:
            print 'could not find relevant examples in test data for cluster: ' + cluster_type
            print 'clusters in test are: {}'.format(test_cluster_to_data_indices.keys())
            print 'clusters in train are: {}'.format(train_cluster_to_data_indices.keys())

    accuracy_vals = [accuracies[i][1] for i in xrange(len(accuracies))]
    macro_avg_accuracy = sum(accuracy_vals) / len(accuracies)
    print 'macro avg accuracy: ' + str(macro_avg_accuracy)

    mic_nom = sum([accuracies[i][0] * accuracies[i][1] for i in xrange(len(accuracies))])
    mic_denom = sum([accuracies[i][0] for i in xrange(len(accuracies))])
    micro_average_accuracy = mic_nom / mic_denom
    print 'micro avg accuracy: ' + str(micro_average_accuracy)

    if 'test' in test_path:
        suffix = '.best.test'
    else:
        suffix = '.best'

    common.write_results_file_and_evaluate_externally(hyper_params, micro_average_accuracy, train_path,
                                                      test_path, results_file_path + suffix, sigmorphon_root_dir,
                                                      final_results)
Ejemplo n.º 35
0
def evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index,
                  feat_input_dim, feature_alphabet, feature_types, hidden_dim,
                  hyper_params, input_dim, inverse_alphabet_index, layers,
                  results_file_path, sigmorphon_root_dir,
                  test_cluster_to_data_indices, test_feat_dicts, test_lemmas,
                  test_path, test_words, train_cluster_to_data_indices,
                  train_path, train_words):
    accuracies = []
    final_results = {}
    # factored model: new model per inflection type
    for cluster_index, cluster_type in enumerate(
            train_cluster_to_data_indices):

        # get the inflection-specific data
        train_cluster_words = [
            train_words[i] for i in train_cluster_to_data_indices[cluster_type]
        ]
        if len(train_cluster_words) < 1:
            print 'only {} samples for this inflection type. skipping'.format(
                str(len(train_cluster_words)))
            continue
        else:
            print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \
                  str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \
                  str(len(train_cluster_words)) + ' examples'

        # test best model
        try:
            test_cluster_lemmas = [
                test_lemmas[i]
                for i in test_cluster_to_data_indices[cluster_type]
            ]
            test_cluster_words = [
                test_words[i]
                for i in test_cluster_to_data_indices[cluster_type]
            ]
            test_cluster_feat_dicts = [
                test_feat_dicts[i]
                for i in test_cluster_to_data_indices[cluster_type]
            ]

            if ensemble:
                # load ensemble models
                ensemble_model_names = ensemble.split(',')
                print 'ensemble paths:\n'
                print '\n'.join(ensemble_model_names)
                ensemble_models = []
                for ens in ensemble_model_names:
                    model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(
                        str(cluster_index), alphabet, ens, input_dim,
                        hidden_dim, layers, feature_alphabet, feat_input_dim,
                        feature_types)

                    ensemble_models.append(
                        (model, encoder_frnn, encoder_rrnn, decoder_rnn))

                # predict the entire test set with each model in the ensemble
                ensemble_predictions = []
                for em in ensemble_models:
                    model, encoder_frnn, encoder_rrnn, decoder_rnn = em
                    predicted_templates = predict_templates(
                        model, decoder_rnn, encoder_frnn, encoder_rrnn,
                        alphabet_index, inverse_alphabet_index,
                        test_cluster_lemmas, test_cluster_feat_dicts,
                        feat_index, feature_types)
                    ensemble_predictions.append(predicted_templates)

                predicted_templates = {}
                string_to_template = {}

                # perform voting for each test input - joint_index is a lemma+feats representation
                test_data = zip(test_cluster_lemmas, test_cluster_feat_dicts,
                                test_cluster_words)
                for i, (lemma, feat_dict, word) in enumerate(test_data):
                    joint_index = lemma + ':' + common.get_morph_string(
                        feat_dict, feature_types)
                    prediction_counter = defaultdict(int)
                    for ens in ensemble_predictions:
                        prediction_str = ''.join(
                            instantiate_template(ens[joint_index], lemma))
                        prediction_counter[prediction_str] += 1
                        string_to_template[prediction_str] = ens[joint_index]
                        print u'template: {} prediction: {}'.format(
                            ens[joint_index], prediction_str)

                    # return the most predicted output
                    predicted_template_string = max(prediction_counter,
                                                    key=prediction_counter.get)

                    # hack: if chosen without majority, pick shortest prediction
                    if prediction_counter[predicted_template_string] == 1:
                        predicted_template_string = min(prediction_counter,
                                                        key=len)

                    print u'chosen:{} with {} votes\n'.format(
                        predicted_template_string,
                        prediction_counter[predicted_template_string])
                    predicted_templates[joint_index] = string_to_template[
                        predicted_template_string]

                    # progress indication
                    sys.stdout.write(
                        "\r%d%%" % (float(i) / len(test_cluster_lemmas) * 100))
                    sys.stdout.flush()
                    ##

            else:
                # load best model - no ensemble
                best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(
                    str(cluster_index), alphabet, results_file_path, input_dim,
                    hidden_dim, layers, feature_alphabet, feat_input_dim,
                    feature_types)
                print 'starting to predict for cluster: {}'.format(
                    cluster_type)
                try:
                    predicted_templates = predict_templates(
                        best_model, decoder_rnn, encoder_frnn, encoder_rrnn,
                        alphabet_index, inverse_alphabet_index,
                        test_cluster_lemmas, test_cluster_feat_dicts,
                        feat_index, feature_types)
                except Exception as e:
                    print e
                    traceback.print_exc()

            print 'evaluating predictions for cluster: {}'.format(cluster_type)
            try:
                accuracy = evaluate_model(predicted_templates,
                                          test_cluster_lemmas,
                                          test_cluster_feat_dicts,
                                          test_cluster_words,
                                          feature_types,
                                          print_results=True)
                accuracies.append(accuracy)
            except Exception as e:
                print e
                traceback.print_exc()

            # get predicted_templates in the same order they appeared in the original file
            # iterate through them and foreach concat morph, lemma, features in order to print later in the task format
            for i in test_cluster_to_data_indices[cluster_type]:
                joint_index = test_lemmas[i] + ':' + common.get_morph_string(
                    test_feat_dicts[i], feature_types)
                inflection = instantiate_template(
                    predicted_templates[joint_index], test_lemmas[i])

                final_results[i] = (test_lemmas[i], test_feat_dicts[i],
                                    inflection)

        except KeyError:
            print 'could not find relevant examples in test data for cluster: ' + cluster_type
            print 'clusters in test are: {}'.format(
                test_cluster_to_data_indices.keys())
            print 'clusters in train are: {}'.format(
                train_cluster_to_data_indices.keys())

    accuracy_vals = [accuracies[i][1] for i in xrange(len(accuracies))]
    macro_avg_accuracy = sum(accuracy_vals) / len(accuracies)
    print 'macro avg accuracy: ' + str(macro_avg_accuracy)

    mic_nom = sum(
        [accuracies[i][0] * accuracies[i][1] for i in xrange(len(accuracies))])
    mic_denom = sum([accuracies[i][0] for i in xrange(len(accuracies))])
    micro_average_accuracy = mic_nom / mic_denom
    print 'micro avg accuracy: ' + str(micro_average_accuracy)

    if 'test' in test_path:
        suffix = '.best.test'
    else:
        suffix = '.best'

    common.write_results_file_and_evaluate_externally(
        hyper_params, micro_average_accuracy, train_path, test_path,
        results_file_path + suffix, sigmorphon_root_dir, final_results)