Python WordVectorModel Examples

Programming Language: Python

Namespace/Package Name: util

Class/Type: WordVectorModel

Examples at hotexamples.com: 2

Python WordVectorModel - 2 examples found. These are the top rated real world Python examples of util.WordVectorModel extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

from_file(2)

Example #1

Show file

File: issues_model.py Project: arunchaganty/aeschines

def do_train(args):
    """
    Train the model using the provided arguments.
    """

    # Assumption: it is cheap to store all the data in text form in
    # memory (it's only about 144mb)
    _, X, y = load_data_raw(args.input)
    X_train, y_train, X_val, y_val = split_data(X, y, args.dev_split)

    # Assumption: word vector model will also easily fit in memory.
    wvecs = WordVectorModel.from_file(args.wvecs, False, '*UNKNOWN*')

    # Typical values are 50, 50
    input_shape = (1,args.n_words, wvecs.dim)
    output_shape = len(LABELS)

    # Build model
    model = build_model(args, input_shape=input_shape, output_shape=output_shape, output_type=args.output_type)

    # Training data on the other hand will not. Each input instance is
    # 50x50 matrix with 8bytes per value: that's about 20kb.
    # Assuming we want to store only about 500mb in memory at a time,
    # that means we want at most 25k items in a batch.
    # Typically minibatches of 32-128 are probably ok. Let's keep it
    # that way?
    for epoch in range(args.n_epochs):
        log("== Training model, epoch {}", epoch)

        scorer = Scorer(model)
        for xy in tqdm(grouper(args.batch_size, zip(X_train, y_train))):
            X_batch, y_batch = zip(*xy)
            X_batch, y_batch = wvecs.embed_sentences(X_batch), array(make_one_hot(y_batch, len(LABELS)))
            score = model.train_on_batch(X_batch, y_batch)
            scorer.update(score, len(X_batch))
        log("=== train error: {}", scorer)

        scorer = Scorer(model)
        for xy in tqdm(grouper(args.batch_size, zip(X_val, y_val))):
            X_batch, y_batch = zip(*xy)
            X_batch, y_batch = wvecs.embed_sentences(X_batch), array(make_one_hot(y_batch, len(LABELS)))
            score = model.test_on_batch(X_batch, y_batch)
            scorer.update(score, len(X_batch))
        log("=== val error: {}", scorer)

    ## Save the model
    save_model(model, args.model, args.weights)

Example #2

Show file

File: issues_model.py Project: arunchaganty/aeschines

def do_run(args):
    """
    Run the neural net to predict on new data.
    """
    # Load the model and weights
    model = load_model(args.model, args.weights)
    wvecs = WordVectorModel.from_file(args.wvecs, False, '*UNKNOWN*')

    data = ((tweet.id, tokenize(to_ascii(tweet.text))) for tweet in RowObjectFactory.from_stream(csv.reader(args.input, delimiter="\t")))
    writer = csv.writer(args.output, delimiter='\t')
    writer.writerow(['id',] + LABELS)

    for ix in tqdm(grouper(args.batch_size, data)):
        ids_batch, X_batch = zip(*ix)
        X_batch = wvecs.embed_sentences(X_batch)
        labels = model.predict_on_batch(X_batch)
        for id, label in zip(ids_batch, labels):
            writer.writerow([id,] + [float(l) for l in label])