Example #1
0
def main():
    # Get the dataset from http://ai.stanford.edu/~amaas/data/sentiment/
    model, utt_train = load_all_models(model_path)
    path = dataset_path

    train_x, train_y= grab_data(path+'.train', model)
    test_x, test_y = grab_data(path+'.test', model)
    
    f = open('swda_embedded_'+ model_path.split('/')[-1] + '.pkl', 'wb')
    pkl.dump((train_x, train_y), f, -1)
    pkl.dump((test_x, test_y), f, -1)
    f.close()
Example #2
0
def evaluate_model(embedding_model_location, with_context=False, f=e_add, aggregated_tagset=False):
    # load training and test data
    train_utt, train_Y, test_utt, test_Y = load_data()

    # uncomment this to find basline scores
    # baseline_scores(train_utt, train_Y, test_utt, test_Y)

    print "Creating representations"
    # Load utterance embedding models
    embedding_model, _ = load_all_models(embedding_model_location)

    # represent utterances in some way,
    # train_X = represent_simple(train_utt, embedding_model)
    # test_X = represent_simple(test_utt, embedding_model)

    if with_context:
        # ---------- lqrz: add or concatenate the previous utterance
        # f = concat
        # f = e_add
        test_X = represent_mix_simple(test_utt, test_Y, embedding_model, f)
        train_X = represent_mix_lookup(train_Y, embedding_model, f)
        # ----------
    else:
        train_X = represent_lookup(train_Y, embedding_model)
        test_X = represent_simple(test_utt, embedding_model)


    # encode tags
    train_Y, test_Y = encode_tags(train_Y, test_Y, aggregated_tagset=aggregated_tagset)

    # print np.array(train_X).shape, np.array(test_X).shape, np.array(train_Y).shape, np.array(test_Y).shape
    print "Training classifiers"
    # Train classifiers, print scores
    print "Model: ", embedding_model_location
    print "KNN Accuracy: ", cl.KNN_classifier(train_X, train_Y, test_X, test_Y)
    # print "SVM Accuracy: ", cl.SVM_classifier(train_X, train_Y, test_X, test_Y)
    print "NB Accuracy: ", cl.NB_classifier(train_X, train_Y, test_X, test_Y)
    print "MLP Accuracy: ", cl.MLP_classifier(train_X, train_Y, test_X, test_Y, n_iter=10)
Example #3
0
    for tag, tokens in get_utterances_from_file(fname):
        # remove id from tag
        tag = tag.split("/")[0]
        utterances[tag].append(tokens)

    return utterances


if __name__ == '__main__':

    #TODO: change paths accordingly
    embedding_model_filename = 'data/test'  # path to doc2vec model
    utterance_filename = 'data/swda_utterances.txt'    # path to utterance file

    model, _ = load_all_models(embedding_model_filename)
    utterances = load_utterances(utterance_filename)

    #TODO: select tags to plot (max 5)
    tags_to_plot = ['qw','ft','ar','fa', 't1']

    #TODO: select nr of samples
    n_samples = 500

    samples = np.empty((n_samples*len(tags_to_plot), model.layer1_size))

    for i,tag in enumerate(tags_to_plot):
        tag_samples = get_vector_samples(tag, utterances, model, n_samples)
        samples[i*n_samples:(i+1)*n_samples,:] = tag_samples

    plotGraph(samples, n_samples, tags_to_plot, dimensions='2D')