Python build_data Examples

Programming Language: Python

Namespace/Package Name: cnn_text_trainer.rw.datasets

Method/Function: build_data

Examples at hotexamples.com: 6

Python build_data - 6 examples found. These are the top rated real world Python examples of cnn_text_trainer.rw.datasets.build_data extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_cnn_text_trainer.py Project: yiiwood/optimus

def trainer_helper(configFile,dataSetFile,tempModel):
    print "Training model on ",configFile,dataSetFile
    config  = get_training_config_from_json(configFile)
    sentences, vocab, labels = build_data(dataSetFile,True)
    word_vecs = wordvecs.load_wordvecs(config.word2vec,vocab)
    trainer = TextCNNModelTrainer(config,word_vecs,sentences,labels)
    trainer.train(tempModel)
    print "Succesfully trained model on ",configFile,dataSetFile," and model is at ",tempModel
    print "Will proceed at testing the model on same data. If everything is correct, you should see the same accuracy"
    model = cPickle.load(open(tempModel,"rb"))
    op = model.classify(sentences)
    os.remove(tempModel)

Example #2

Show file

File: train.py Project: yiiwood/optimus

__author__ = 'devashish.shankar'

if __name__ == "__main__":
    if len(sys.argv) < 5:
        print "Usage: training.py"
        print "\t<model config file path>"
        print "\t<training data file path>"
        print "\t<file path to store classifier model>"
        print "\t<true/false(preprocessing flag)>"
        exit(0)

    #processing..
    config_file = sys.argv[1]
    train_data_file = sys.argv[2]
    model_output_file = sys.argv[3]
    preprocess = sys.argv[4].lower()

    training_config = config.get_training_config_from_json(config_file)
    sentences, vocab, labels = datasets.build_data(train_data_file, preprocess)
    word_vecs = wordvecs.load_wordvecs(training_config.word2vec, vocab)

    if training_config.mode == "multichannel":
        nntrainer = MultiChannelTrainer(training_config, word_vecs, sentences,
                                        labels)
    else:
        nntrainer = TextCNNModelTrainer(training_config, word_vecs, sentences,
                                        labels)

    nntrainer.train(model_output_file)

Example #3

Show file

File: test.py Project: howardlinus/optimus

    modelfile = sys.argv[1]
    outputdir = sys.argv[3]
    preprocess = sys.argv[4].lower()
    load_word_vecs = sys.argv[5].lower() == "true"

    if not os.path.exists(outputdir):
        print "Output dir ", outputdir, " doesn't exist. Creating it"
        os.makedirs(outputdir)
    else:
        print "Using Output dir ", outputdir, ". Any previous results in this dir on same dataset might get overwritten. "
    model = cPickle.load(open(modelfile, "rb"))
    if load_word_vecs:
        print "Loading word vectors"
        model.add_global_word_vecs({})
        print "Loading word vectors done"
    sentences, vocab, labels = datasets.build_data(testfile, preprocess)
    labels = model.get_labels()
    output = model.classify(sentences)
    # Free memory
    del model
    print "Removed model from memory"
    # Format the output to earlier format
    # TODO evaluate function should be changed to accept newer format, which is cleaner
    data = []
    for i in range(len(output[0])):
        actual_label = sentences[i]["y"]
        text = sentences[i]["text"]
        predicted_label = output[0][i]
        predicted_prob = output[1][i][predicted_label]
        data.append([predicted_prob, labels[predicted_label], labels[actual_label], text])
    evaluate(data, outputdir)

Example #4

Show file

    modelfile = sys.argv[1]
    outputdir = sys.argv[3]
    preprocess = sys.argv[4].lower()
    load_word_vecs = sys.argv[5].lower() == "true"

    if not os.path.exists(outputdir):
        print "Output dir ", outputdir, " doesn't exist. Creating it"
        os.makedirs(outputdir)
    else:
        print "Using Output dir ", outputdir, ". Any previous results in this dir on same dataset might get overwritten. "
    model = cPickle.load(open(modelfile, "rb"))
    if load_word_vecs:
        print "Loading word vectors"
        model.add_global_word_vecs({})
        print "Loading word vectors done"
    sentences, vocab, labels = datasets.build_data(testfile, preprocess)
    labels = model.get_labels()
    output = model.classify(sentences)
    #Free memory
    del model
    print "Removed model from memory"
    #Format the output to earlier format
    #TODO evaluate function should be changed to accept newer format, which is cleaner
    data = []
    for i in range(len(output[0])):
        actual_label = sentences[i]['y']
        text = sentences[i]['text']
        predicted_label = output[0][i]
        predicted_prob = output[1][i][predicted_label]
        data.append([
            predicted_prob, labels[predicted_label], labels[actual_label], text

Example #5

Show file

File: test_cnn_text_trainer.py Project: yiiwood/optimus

def test_dataset_reader():
    sentences,vocabs,labels = build_data("../sample/datasets/sst_small_sample.csv")
    assert len(sentences) == 300
    assert len(labels) == 2
    assert "neg" in labels and "pos" in labels

Example #6

Show file

File: train.py Project: AHAAAAAAA/CSCI7000-BigData

if __name__=="__main__":
    if len(sys.argv)<5:
        print "Usage: training.py"
        print "\t<model config file path>"
        print "\t<training data file path>"
        print "\t<file path to store classifier model>"
        print "\t<true/false(preprocessing flag)>"
        exit(0)

    #processing..
    config_file=sys.argv[1]
    train_data_file=sys.argv[2]
    model_output_file=sys.argv[3]
    preprocess=sys.argv[4].lower()

    training_config = config.get_training_config_from_json(config_file)
    sentences, vocab, labels = datasets.build_data(train_data_file,preprocess)
    print "Dataset loaded"
    word_vecs = wordvecs.load_wordvecs(training_config.word2vec,vocab)
    print "Loaded word vecs from file"

    if training_config.mode=="multichannel":
        nntrainer = MultiChannelTrainer(training_config,word_vecs,sentences,labels)
    else:
        nntrainer = TextCNNModelTrainer(training_config,word_vecs,sentences,labels)

    nntrainer.train(model_output_file)