コード例 #1
0
ファイル: train.py プロジェクト: HaydenFaulkner/phd
def get_training_stats(model_id, model, one_hot_words, one_hot_dictionary):
    vis_model_id, vis_layer_id, fs = rnn_models.get_vis_model_from_id(model_id)

    max_feat_len, max_sent_len = rnn_models.get_max_lengths(model_id)
    ref = {}
    hypo = {}
    # Transform sentences into one hot vector sentences
    with open(sentence_path + 'sents_train_lc_nopunc.txt') as f:
        lines = f.readlines()  # read the lines into an array
    random.shuffle(lines)
    IDS = []
    X = []
    Y = []
    for line in lines:
        IDS.append(line.rstrip().split('\t')[0])
        ref[line.rstrip().split('\t')[0]] = [line.rstrip().split('\t')[1],line.rstrip().split('\t')[1]] # need at least two ref for bleu
        X_sample = np.load(feature_path + vis_model_id + '/' + vis_layer_id + '/npy/' + line.rstrip().split('\t')[0] + '.npy')
        Y_sample = []
        for word in line.rstrip().split('\t')[1].split():
            Y_sample.append(one_hot_words[word])
        Y_sample.append(one_hot_words['<eos>'])

        X.append(X_sample)
        Y.append(Y_sample)

    X = pad_sequences(X, maxlen=max_feat_len)
    Y = pad_sequences(Y, maxlen=max_sent_len)

    loss = model.evaluate(X, Y, batch_size=64, verbose=1)
    preds = model.predict_classes(X, batch_size=64, verbose=1)
    print(loss)

    for i in range(len(IDS)):
        pred_str = ''
        for word in preds[i]:
            if one_hot_dictionary[word] == '<eos>':
                break
            pred_str += one_hot_dictionary[word] + ' '

        hypo[IDS[i]] = [pred_str]

    return loss, score(ref, hypo)["METEOR"]
コード例 #2
0
ファイル: rnns.py プロジェクト: HaydenFaulkner/phd
def test(type, model, one_hot_words, one_hot_dictionary):
    ref = {}
    hypo = {}
    # Transform sentences into one hot vector sentences
    with open(sentence_path + 'sents_'+type+'_lc_nopunc.txt') as f:
        lines = f.readlines()  # read the lines into an array
    # random.shuffle(lines)
    IDS = []
    X = []
    Y = []
    for line in lines:
        IDS.append(line.rstrip().split('\t')[0])
        ref[line.rstrip().split('\t')[0]] = [line.rstrip().split('\t')[1],line.rstrip().split('\t')[1]]
        X_sample = np.load(feature_path + vis_model_id + '/' + vis_layer_id + '/npy/' + line.rstrip().split('\t')[0] + '.npy')
        Y_sample = []
        for word in line.rstrip().split('\t')[1].split():
            Y_sample.append(one_hot_words[word])
        Y_sample.append(one_hot_words['<eos>'])

        X.append(X_sample)
        Y.append(Y_sample)

    X = pad_sequences(X, maxlen=max_feat_len)
    Y = pad_sequences(Y, maxlen=max_sent_len)

    loss = model.evaluate(X, Y, batch_size=64, verbose=1)
    preds = model.predict_classes(X, batch_size=64, verbose=1)

    for i in range(len(IDS)):
        pred_str = ''
        for word in preds[i]:
            if one_hot_dictionary[word] == '<eos>':
                break
            pred_str += one_hot_dictionary[word] + ' '

        hypo[IDS[i]] = [pred_str]

    return loss, score(ref, hypo), ref, hypo