Ejemplo n.º 1
0
def build_model(weights=None,
                embedding_size=256,
                recurrent_gate_size=512,
                n_features=5,
                dropout=0.4):
    """
    build_model

    Inputs:
        weights - Path to a weights file to load, or None if the model should be built from scratch
        embedding_size - Size of the embedding layer
        recurrent_gate_size - Size of the gated recurrent layer
        n_features - Number of features for the embedding layer
        dropout - Dropout value

    Returns:
        A model object ready for training (or evaluation if a previous model was loaded via `weights`)
    """
    # vvvvv
    #Modify this if you want to change the structure of the network!
    # ^^^^^
    model_layers = [
        Embedding(size=embedding_size, n_features=n_features),
        GatedRecurrent(size=recurrent_gate_size, p_drop=dropout),
        Dense(size=1, activation='sigmoid', p_drop=dropout)
    ]
    model = RNN(layers=model_layers,
                cost='BinaryCrossEntropy',
                verbose=2,
                updater='Adam')
    if weights:  #Just load the provided model instead, I guess?
        model = load(weights)
    return model
Ejemplo n.º 2
0
def rnn(train_text, train_label):
    tokenizer = Tokenizer()
    train_tokens = tokenizer.fit_transform(train_text)
    layers = [
        Embedding(size=50, n_features=tokenizer.n_features),
        GatedRecurrent(size=128),
        Dense(size=1, activation='sigmoid')
    ]
    #    print "train_tokens=", train_tokens
    model = RNN(layers=layers, cost='BinaryCrossEntropy')
    model.fit(train_tokens, train_label)
    return model
Ejemplo n.º 3
0
def main(ptrain, ntrain, ptest, ntest, out, modeltype):
    assert modeltype in ["gated_recurrent", "lstm_recurrent"]

    print("Using the %s model ..." % modeltype)
    print("Loading data ...")
    trX, trY = load_data(ptrain, ntrain)
    teX, teY = load_data(ptest, ntest)

    tokenizer = Tokenizer(min_df=10, max_features=100000)
    trX = tokenizer.fit_transform(trX)
    teX = tokenizer.transform(teX)

    print("Training ...")
    if modeltype == "gated_recurrent":
        layers = [
            Embedding(size=256, n_features=tokenizer.n_features),
            GatedRecurrent(size=512, activation='tanh', gate_activation='steeper_sigmoid',
                           init='orthogonal', seq_output=False, p_drop=0.75),
            Dense(size=1, activation='sigmoid', init='orthogonal')
        ]
    else:
        layers = [
            Embedding(size=256, n_features=tokenizer.n_features),
            LstmRecurrent(size=512, activation='tanh', gate_activation='steeper_sigmoid',
                          init='orthogonal', seq_output=False, p_drop=0.75),
            Dense(size=1, activation='sigmoid', init='orthogonal')
        ]

    model = RNN(layers=layers, cost='bce', updater=Adadelta(lr=0.5))
    model.fit(trX, trY, n_epochs=10)

    # Predicting the probabilities of positive labels
    print("Predicting ...")
    pr_teX = model.predict(teX).flatten()

    predY = np.ones(len(teY))
    predY[pr_teX < 0.5] = -1

    with open(out, "w") as f:
        for lab, pos_pr, neg_pr in zip(predY, pr_teX, 1 - pr_teX):
            f.write("%d %f %f\n" % (lab, pos_pr, neg_pr))
Ejemplo n.º 4
0
def train_RNN(tokenizer, tokens, labels):
	"""
	INPUT: Trained tokenizer class, label array
		- The arrays of the tokenized critic reviews and the corresponding labels
	Returns a trained Recurrent Neural Network class object
	"""
	layers = [
		Embedding(size=256, n_features=tokenizer.n_features),
		GatedRecurrent(size=512, activation='tanh', gate_activation='steeper_sigmoid', init='orthogonal', seq_output=False, p_drop=0.75),
		Dense(size=1, activation='sigmoid', init='orthogonal')
	]

	model = RNN(layers=layers, cost='bce', updater=Adadelta(lr=0.5))

	path_snapshots = 'model_snapshots'

	print "Begin fitting RNN"

	model.fit(tokens, labels, n_epochs=12)

	return model
Ejemplo n.º 5
0
def build_model(weights=None,
                embedding_size=128,
                recurrent_gate_size=256,
                n_features=5,
                dropout=0.1):
    """
    build_model

    Inputs:
        weights - Path to a weights file to load, or None if the model should be built from scratch
        embedding_size - Size of the embedding layer
        recurrent_gate_size - Size of the gated recurrent layer
        n_features - Number of features for the embedding layer
        dropout - Dropout value

    Returns:
        A model object ready for training (or evaluation if a previous model was loaded via `weights`)
    """
    # vvvvv
    #Modify this if you want to change the structure of the network!
    # ^^^^^
    model_layers = [
        Embedding(size=embedding_size, n_features=n_features),
        GatedRecurrent(size=recurrent_gate_size, p_drop=dropout),
        Dense(size=1, activation='sigmoid', p_drop=dropout)
    ]
    args = {
        'layers': model_layers,
        'cost': 'BinaryCrossEntropy',
        'verbose': 2,
        'updater': Adadelta(lr=0.5),
        'embedding_size': embedding_size
    }
    model = RNN(**args)
    if weights:  #Just load the provided model instead, I guess?
        print "Loading previously created weights file: ", weights
        model = load(weights)
    return model
Ejemplo n.º 6
0
import numpy as np

from passage.models import RNN
from passage.updates import NAG, Regularizer
from passage.layers import Generic, GatedRecurrent, Dense
from passage.utils import load, save

from load import load_mnist

trX, teX, trY, teY = load_mnist()

#Use generic layer - RNN processes a size 28 vector at a time scanning from left to right 
layers = [
	Generic(size=28),
	GatedRecurrent(size=512, p_drop=0.2),
	Dense(size=10, activation='softmax', p_drop=0.5)
]

#A bit of l2 helps with generalization, higher momentum helps convergence
updater = NAG(momentum=0.95, regularizer=Regularizer(l2=1e-4))

#Linear iterator for real valued data, cce cost for softmax
model = RNN(layers=layers, updater=updater, iterator='linear', cost='cce')
model.fit(trX, trY, n_epochs=20)

tr_preds = model.predict(trX[:len(teY)])
te_preds = model.predict(teX)

tr_acc = np.mean(trY[:len(teY)] == np.argmax(tr_preds, axis=1))
te_acc = np.mean(teY == np.argmax(te_preds, axis=1))
Ejemplo n.º 7
0
from passage.utils import save, load

print("Loading data...")
num_training = int((1.0 - 0.2) * len(xs))

X_train, y_train, X_test, y_test = xs[:num_training], ys[:num_training], xs[num_training:], ys[num_training:]

num_feats = generator.max_id() + 1

layers = [
    Embedding(size=128, n_features=num_feats),
    #LstmRecurrent(size=32),
    #NOTE - to use a deep RNN, you need all but the final layers with seq_ouput=True
    #GatedRecurrent(size=128, seq_output=True),
    #GatedRecurrent(size=256, direction= 'backward' if REVERSE else 'forward'),
    GatedRecurrent(size=128, seq_output=True),
    GatedRecurrent(size=128),
    #Dense(size=64, activation='sigmoid'),
    Dense(size=len(lst_freq_tags), activation='sigmoid'),
]

#emd 128, gru 32/64 is good - 0.70006 causer

print("Creating Model")
model = RNN(layers=layers, cost='bce')

def find_cutoff(y_test, predictions):
    scale = 20.0

    min_val = round(min(predictions))
    max_val = round(max(predictions))
Ejemplo n.º 8
0
import sys

# ---

# ---

print 'loading dataset'
d = Dataset(settings['FN_DATASET'], settings['FN_VOCABULARY'])
d.load()

print 'generating labeled training set'
train_text,train_labels = d.getNextWordPredTrainset(10)
#for t,l in zip(train_text,train_labels):
#    print t,'->',l

tokenizer = Tokenizer()
train_tokens = tokenizer.fit_transform(train_text)
save(train_tokens, settings['FN_TRAINED_TOKENIZER'])

layers = [
    Embedding(size=128, n_features=tokenizer.n_features),
    GatedRecurrent(size=128),
    Dense(size=1, activation='sigmoid')
]

model = RNN(layers=layers, cost='BinaryCrossEntropy')
model.fit(train_tokens, train_labels)

save(model, settings['FN_MODEL_NEXTWORDPRED'])
Ejemplo n.º 9
0
from passage.models import RNN
from passage.utils import load, save

from load import load_gender_data

trX, teX, trY, teY = load_gender_data(ntrain=10000) # Can increase up to 250K or so

tokenizer = Tokenizer(min_df=10, max_features=50000)
print trX[1] # see a blog example
trX = tokenizer.fit_transform(trX)
teX = tokenizer.transform(teX)
print tokenizer.n_features

layers = [
    Embedding(size=128, n_features=tokenizer.n_features),
    GatedRecurrent(size=256, activation='tanh', gate_activation='steeper_sigmoid', init='orthogonal', seq_output=False),
    Dense(size=1, activation='sigmoid', init='orthogonal') # sigmoid for binary classification
]

model = RNN(layers=layers, cost='bce') # bce is classification loss for binary classification and sigmoid output
for i in range(2):
    model.fit(trX, trY, n_epochs=1)
    tr_preds = model.predict(trX[:len(teY)])
    te_preds = model.predict(teX)

    tr_acc = metrics.accuracy_score(trY[:len(teY)], tr_preds > 0.5)
    te_acc = metrics.accuracy_score(teY, te_preds > 0.5)

    print i, tr_acc, te_acc

save(model, 'save_test.pkl') # How to save
Ejemplo n.º 10
0
    tr_data = pd.read_csv('labeledTrainData.tsv', delimiter='\t')
    trX = clean(tr_data['review'].values)
    trY = tr_data['sentiment'].values

    print("Training data loaded and cleaned.")

    tokenizer = Tokenizer(min_df=10, max_features=100000)
    trX = tokenizer.fit_transform(trX)

    print("Training data tokenized.")

    layers = [
        Embedding(size=256, n_features=tokenizer.n_features),
        GatedRecurrent(size=512,
                       activation='tanh',
                       gate_activation='steeper_sigmoid',
                       init='orthogonal',
                       seq_output=False,
                       p_drop=0.75),
        Dense(size=1, activation='sigmoid', init='orthogonal')
    ]

    model = RNN(layers=layers, cost='bce', updater=Adadelta(lr=0.5))
    model.fit(trX, trY, n_epochs=10)

    te_data = pd.read_csv('testData.tsv', delimiter='\t')
    ids = te_data['id'].values
    teX = clean(te_data['review'].values)
    teX = tokenizer.transform(teX)
    pr_teX = model.predict(teX).flatten()

    pd.DataFrame(np.asarray([ids,
from passage.utils import save, load

print("Loading data...")
num_training = int((1.0 - TEST_SPLIT) * len(xs))

X_train, y_train, X_test, y_test = xs[:num_training], ys[:num_training], xs[
    num_training:], ys[num_training:]

num_feats = generator.max_id() + 1

layers = [
    Embedding(size=64, n_features=num_feats),
    #LstmRecurrent(size=32),
    #NOTE - to use a deep RNN, you need all but the final layers with seq_ouput=True
    #GatedRecurrent(size=64, seq_output=True),
    GatedRecurrent(size=64, direction='backward' if REVERSE else 'forward'),
    #LstmRecurrent(size=128),
    Dense(size=1, activation='sigmoid'),
]

#emd 64, gru 64 is good - 0.70833 causer (0 prev sents)

print("Creating Model")
model = RNN(layers=layers, cost='bce')


def find_cutoff(y_test, predictions):
    scale = 100.0

    min_val = round(min(predictions))
    max_val = round(max(predictions))
Ejemplo n.º 12
0
def train(X, y):
    from keras.layers.embeddings import Embedding
    from keras.layers.recurrent import LSTM, GRU, SimpleRNN
    from keras.layers.core import Dense
    from keras.models import Sequential
    from keras.layers.core import Dropout
    from keras.preprocessing.text import Tokenizer
    from keras.preprocessing.sequence import pad_sequences
    from math import e
    vocab = 10000
    tokenizer = Tokenizer(nb_words=vocab)
    tokenizer.fit_on_texts(X)
    X = tokenizer.texts_to_sequences(X)
    """
    index_word =  {v: k for k, v in tokenizer.word_index.items()}
    for i in range(1, 10001):
        print str(i) + "," + index_word[i]

    return
    """
    maxlen = 50
    X1 = []
    y1 = []
    for thing, target in zip(X, y):
        if len(thing) != 0:
            X1.append(thing)
            y1.append(target)

    X = X1
    y = y1
    KERAS = False
    if KERAS:
        X = pad_sequences(X, maxlen=maxlen)

    from random import shuffle
    xy = zip(X, y)
    shuffle(xy)
    X_s, y_s = zip(*xy)
    X_train, y_train, X_test, y_test = X_s[:-1000], y_s[:-1000], X_s[
        -1000:], y_s[-1000:]
    embedding_size = 256
    dropout = .3
    batch_size = 256
    recurrent_gate_size = 512
    """
    model = Sequential()
    model.add(Embedding(vocab, embedding_size, mask_zero=True))
    model.add(Dropout(dropout))
    model.add(LSTM(recurrent_gate_size))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    print "building model..."
    model.compile(loss="msle", optimizer="rmsprop")
    print "fitting model"
    #model.load_weights("mymodel")
    model.fit(np.asarray(X_train), np.asarray(y_train), nb_epoch=30, verbose=1, batch_size=batch_size, validation_data=(np.asarray(X_test), np.asarray(y_test)))
   
    model.save_weights("mymodel") 
    """
    from passage.preprocessing import Tokenizer, LenFilter
    from passage.layers import Embedding, GatedRecurrent, Dense, OneHot, LstmRecurrent
    from passage.models import RNN
    from passage.utils import save, load
    from passage.iterators import Padded

    layers = [
        #    OneHot(n_features=5),
        Embedding(size=embedding_size, n_features=vocab),
        #    GatedRecurrent(size=recurrent_gate_size, seq_output=True, p_drop=dropout),
        #    LstmRecurrent(size=recurrent_gate_size, p_drop=dropout),
        GatedRecurrent(size=recurrent_gate_size, p_drop=dropout),
        Dense(size=8, activation='softmax', p_drop=dropout)
    ]

    print >> sys.stderr, "learning model"
    model_iterator = Padded()
    model = load("mymodel.final.pkl")
    #model = RNN(layers=layers, cost='CategoricalCrossEntropy', verbose=2, updater="Adam")
    filter = LenFilter(max_len=maxlen)
    model.fit(np.asarray(X_train),
              np.asarray(y_train),
              batch_size=batch_size,
              n_epochs=1000,
              path="mymodel.pkl",
              snapshot_freq=49,
              len_filter=filter)
    save(model, "mymodel.final.pkl")
    #    print "test cost"
    #    print model._cost(np.asarray(X_test), np.asarray(y_test))
    print "test accuracy"
    passage_batch_predict(np.asarray(X_train), np.asarray(y_train), model)

    exit = False
    print "enter a sentence"
    while not exit:
        text = raw_input()
        if text == "exit":
            break
        else:
            tokens = tokenizer.texts_to_sequences([text])
            if len(tokens) == 0:
                print "Sentence too strange, try again"
                continue
            if KERAS:
                tokens = pad_sequences(tokens, maxlen=maxlen)
            prediction = np.argmax(model.predict(tokens)[0])
            try:
                print e**(prediction - 2)
            except Exception:
                pass
Ejemplo n.º 13
0
def train_model(modeltype, delta):

    assert modeltype in ["gated_recurrent", "lstm_recurrent"]
    print "Begin Training"

    df_imdb_reviews = pd.read_csv('../data/imdb_review_data.tsv', escapechar='\\', delimiter='\t')

    X = clean(df_imdb_reviews['review'].values)
    y = df_imdb_reviews['sentiment'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
    print "Tokenize"

    tokenizer = Tokenizer(min_df=10, max_features=100000)
    X_train = tokenizer.fit_transform(X_train)
    X_train = [[float(x) for x in  y] for y in X_train]
    X_test = tokenizer.transform(X_test)
    X_test = [[float(x) for x in  y] for y in X_test]

    print "Number of featers: {}".format(tokenizer.n_features)

    print "Training model"

    if modeltype == "gated_recurrent":
        layers = [
            Embedding(size=256, n_features=tokenizer.n_features),
            GatedRecurrent(size=512, activation='tanh', gate_activation='steeper_sigmoid',
                           init='orthogonal', seq_output=True, p_drop=0.5),
            Dense(size=1, activation='sigmoid', init='orthogonal')
        ]
    else:
        layers = [
            Embedding(size=256, n_features=tokenizer.n_features),
            LstmRecurrent(size=512, activation='tanh', gate_activation='steeper_sigmoid',
                          init='orthogonal', seq_output=True, p_drop=0.5),
            Dense(size=1, activation='sigmoid', init='orthogonal')
        ]

    # bce is classification loss for binary classification and sigmoid output
    model = RNN(layers=layers, cost='bce', updater=Adadelta, (lr=delta))
    model.fit(X_train, y_train, n_epochs=20)

    with open('../data/{}_tokenizer_delta_{}_pdrop_0.5.pkl'.format(modeltype, delta), 'w') as f:
        vectorizer = pickle.dump(tokenizer, f)
    with open('../data/{}_model_delta_{}._pdrop_0.5.pkl'.format(modeltype, delta), 'w') as f:
        model = pickle.dump(model, f)

    try:
        y_pred_te = model.predict(X_test).flatten() >= 0.5
        y_pred_tr = model.predict(X_train).flatten() >= 0.5
        print 'Test Accuracy: {}'.format(accuracy_score(y_test,y_pred_te))
        print 'Test Precision: {}'.format(precision_score(y_test,y_pred_te))
        print 'Test Recall: {}'.format(recall_score(y_test,y_pred_te))
        print 'Train Accuracy: {}'.format(accuracy_score(y_train,y_pred_tr))
        print 'Train Precision: {}'.format(precision_score(y_train,y_pred_tr))
        print 'Train Recall: {}'.format(recall_score(y_train,y_pred_tr))

    except:
        print "Unable to perform metrics"

    return tokenizer, model
Ejemplo n.º 14
0
#################
# training data have to be lemmatized using Morphodita !!!!!!!!!
#################
trX, teX, trY, teY = load_data(ntrain=9000, ntest=1000)
print len(trX), len(trY), len(teX), len(teY)

tokenizer = Tokenizer(min_df=10, max_features=50000)
trX = tokenizer.fit_transform(trX)
pickle.dump(tokenizer, open('tokenizer.pkl', 'wb'))
print "number of tokens:" + str(len(trX))
teX = tokenizer.transform(teX)
print "number of feathures:" + str(tokenizer.n_features)

layers = [
    Embedding(size=256, n_features=tokenizer.n_features),
    GatedRecurrent(size=725),
    Dense(size=10, activation='softmax')
]

model = RNN(layers=layers, cost='cce')
model.fit(trX, trY, n_epochs=10)
save(model, 'modelEcho.pkl')

tr_preds = model.predict(trX)
te_preds = model.predict(teX)

data = pd.DataFrame(trY)
data.to_csv('data/trY.vec')

data = pd.DataFrame(tr_preds)
data.to_csv('data/tr_preds.vec')
Ejemplo n.º 15
0
def train_and_save_passage_tokenizer_and_rnn_model(x_train,
                                                   y_train,
                                                   x_test,
                                                   character_model=False):
    """Train and save Passage tokenizer and Passage RNN model.

    x_train and x_test should each be a series that's already been pre-preocessed: html->text, lowercase, removed
    punct/#s
    x_train+x_test are used to build the tokenizer.

    Note that character-based RNN is a work-in-progress and not actuallly implemented as of now.
    """

    # Note that we assume we have train/test reviews that had been preprocessed: html->text, lowercased, removed
    # punct/#s

    # Note in https://github.com/IndicoDataSolutions/Passage/blob/master/examples/sentiment.py they only
    # extract text from html, lowercase and strip (no punctuation removal)

    # Tokenization: Assign each word in the reviews an ID to be used in all reviews
    tokenizer = Tokenizer(min_df=10,
                          max_features=100000,
                          character=character_model)

    train_reviews_list = x_train.tolist()
    tokenizer.fit(train_reviews_list + x_test.tolist())

    # Tokenize training reviws (so can use to fit RNN model on)
    train_reviews_tokenized = tokenizer.transform(train_reviews_list)

    # Based on https://github.com/vinhkhuc/kaggle-sentiment-popcorn/blob/master/scripts/passage_nn.py which is based
    # on https://github.com/IndicoDataSolutions/Passage/blob/master/examples/sentiment.py

    # RNN Network:
    # -Each tokenized review will be converted into a sequence of words, where each word has an embedding representation
    # (256)
    # -RNN layer (GRU) attempts to find pattern in sequence of words
    # -Final dense layer is used as a logistic classifier to turn RNN output into a probability/prediction
    if not character_model:
        layers = [
            Embedding(size=256, n_features=tokenizer.n_features),
            # May replace with LstmRecurrent for LSTM layer
            GatedRecurrent(size=512,
                           activation='tanh',
                           gate_activation='steeper_sigmoid',
                           init='orthogonal',
                           seq_output=False,
                           p_drop=0.75),
            Dense(size=1, activation='sigmoid', init='orthogonal')
        ]
    else:
        # Character-level RNN
        # Idea is to convert character tokenizations into one-hot encodings in which case
        # the embeddings layer is no longer needed
        train_reviews_tokenized = map(
            lambda r_indexes: pd.get_dummies(
                r_indexes, columns=range(tokenizer.n_features + 1)).values,
            train_reviews_tokenized)
        layers = [
            # May replace with LstmRecurrent for LSTM layer
            GatedRecurrent(size=100,
                           activation='tanh',
                           gate_activation='steeper_sigmoid',
                           init='orthogonal',
                           seq_output=False,
                           p_drop=0.75),
            Dense(size=1, activation='sigmoid', init='orthogonal')
        ]

    # RNN classifer uses Binary Cross-Entropy as the cost function
    classifier = RNN(layers=layers, cost='bce', updater=Adadelta(lr=0.5))
    NUM_EPOCHS = 10
    # 10 epochs may take 10+ hours to run depending on machine
    classifier.fit(train_reviews_tokenized,
                   y_train.tolist(),
                   n_epochs=NUM_EPOCHS)

    # Store model and tokenizer
    if character_model:
        passage.utils.save(classifier, PASSAGE_CHAR_RNN_MODEL)
        _ = joblib.dump(tokenizer, PASSAGE_CHAR_TOKENIZER, compress=9)
    else:
        passage.utils.save(classifier, PASSAGE_RNN_MODEL)
        _ = joblib.dump(tokenizer, PASSAGE_TOKENIZER, compress=9)
Ejemplo n.º 16
0
def main():
    x = T.tensor3('features')
    m = T.matrix('features_mask')
    y = T.imatrix('targets')

    x = x+m.mean()*0

    embedding_size = 300
    glove_version = "glove.6B.300d.txt"
    #embedding_size = 50
    #glove_version = "vectors.6B.50d.txt"
    wstd = 0.02

    #vaguely normalize
    x = x / 3.0 - .5

    #gloveMapping = Linear(
            #input_dim = embedding_size,
            #output_dim = 128,
            #weights_init = Orthogonal(),
            #biases_init = Constant(0.0),
            #name="gloveMapping"
            #)
    #gloveMapping.initialize()
    #o = gloveMapping.apply(x)
    #o = Rectifier(name="gloveRec").apply(o)

    rnn_in = x.dimshuffle(1, 0, 2)
    class Stub(object):
        def output(self, dropout_active=False):
            return rnn_in

    l_in = Stub()
    l_in.size = 300

    layer = GatedRecurrentPassage(
            size=300,
            gate_activation='sigmoid')
    layer.connect(l_in)
    from blocks.roles import add_role, WEIGHT, INITIAL_STATE
    print layer.params
    [add_role(l, WEIGHT) for l in layer.params]

    rnn_out = layer.output()
    o = rnn_out
    #o = rnn_out[-1, :, :]

    #o = rnn_out[:, -1, :]
    #o = rnn_out.mean(axis=1)

    #print rnn_last_out.eval({
        #x: np.ones((3, 101, 300), dtype=theano.config.floatX), 
        #m: np.ones((3, 101), dtype=theano.config.floatX)})
    #raw_input()
    #o = rnn_out.mean(axis=1)

    score_layer = Linear(
            input_dim = 300,
            output_dim = 1,
            weights_init = IsotropicGaussian(std=wstd),
            biases_init = Constant(0.),
            name="linear2")
    score_layer.initialize()
    o = score_layer.apply(o)
    probs = Sigmoid().apply(o)

    cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean()
    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    #print rnn_in.shape.eval(
            #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
                #})
    #print rnn_out.shape.eval(
            #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).sum(axis=1).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #raw_input()


    # =================

    cg = ComputationGraph([cost])
    #cg = apply_dropout(cg, variables=dropout_variables, drop_prob=0.5)
    params = cg.parameters
    print params
    print "Len params", len(params)

    algorithm = GradientDescent(
            cost = cg.outputs[0],
            params=params,
            step_rule = CompositeRule([
                StepClipping(threshold=4),
                AdaM(),
                #NAG(lr=0.1, momentum=0.9),
                #AdaDelta(),
                ])

            )

    # ========
    print "setting up data"
    ports = {
            'gpu0_train' : 5557,
            'gpu0_test' : 5558,
            'gpu1_train' : 5559,
            'gpu1_test' : 5560,
            }

    #batch_size = 16
    batch_size = 32
    def start_server(port, which_set):
        fuel.server.logger.setLevel('WARN')
        dataset = IMDBText(which_set, sorted=True)

        n_train = dataset.num_examples
        #scheme = ShuffledScheme(examples=n_train, batch_size=batch_size)
        scheme = BatchwiseShuffledScheme(examples=n_train, batch_size=batch_size)

        stream = DataStream(
                dataset=dataset,
                iteration_scheme=scheme)
        print "loading glove"
        glove = GloveTransformer(glove_version, data_stream=stream)
        padded = Padding(
                data_stream=glove,
                #mask_sources=('features',)
                mask_sources=('features',)
                )

        fuel.server.start_server(padded, port=port, hwm=20)

    train_port = ports[theano.config.device + '_train']
    train_p = Process(target=start_server, args=(train_port, 'train'))
    train_p.start()

    test_port = ports[theano.config.device + '_test']
    test_p = Process(target=start_server, args=(test_port, 'test'))
    test_p.start()