def load_data_raw(istream):
    """
    Load training data from a stream of input.
    Input is a TSV with fields (id, text, label).
    Returns a set of words, and input.
    """
    log("Loading training data...")
    data = list(map(process_input, tqdm(RowObjectFactory.from_stream(csv.reader(istream, delimiter="\t")))))
    np.random.shuffle(data)
    ids, X, y = zip(*data)

    log("Done. Loaded {} instances", len(data))

    return ids, X, y
def do_run(args):
    """
    Run the neural net to predict on new data.
    """
    # Load the model and weights
    model = load_model(args.model, args.weights)
    wvecs = WordVectorModel.from_file(args.wvecs, False, '*UNKNOWN*')

    data = ((tweet.id, tokenize(to_ascii(tweet.text))) for tweet in RowObjectFactory.from_stream(csv.reader(args.input, delimiter="\t")))
    writer = csv.writer(args.output, delimiter='\t')
    writer.writerow(['id',] + LABELS)

    for ix in tqdm(grouper(args.batch_size, data)):
        ids_batch, X_batch = zip(*ix)
        X_batch = wvecs.embed_sentences(X_batch)
        labels = model.predict_on_batch(X_batch)
        for id, label in zip(ids_batch, labels):
            writer.writerow([id,] + [float(l) for l in label])
def do_command(args):
    tweets = RowObjectFactory.from_stream(csv.reader(args.input, delimiter="\t"))
    X_train, y_train = prepare_data(tweets)
    np.savez(args.output, X_train = X_train, y_train = y_train)