Beispiel #1
0
 def __init__(self, d, V):
     # d + V are inherited from textlayer
     # Defined embeddings
     self.d = d
     self.V = V
     self.A = glorot_uniform((d, V))
     self.C = glorot_uniform((d, V))
     self.params = [self.A, self.C]
     self.updates = []
Beispiel #2
0
 def __init__(self, d, V):
     # d + V are inherited from textlayer
     # Defined embeddings
     self.d = d
     self.V = V
     self.A = glorot_uniform((d, V))
     self.C = glorot_uniform((d, V))
     self.params = [self.A, self.C]
     self.updates = []
Beispiel #3
0
 def positional_encoding(self, xi):
     """We therefore propose a second representation that encodes the position of words within the
     sentence. This takes the form: mi =SUMjlj * Axij , where * is an element-wise multiplication. lj is a
     column vector with the structure lkj = (1 − j/J) − (k/d)(1 − 2j/J) (assuming 1-based indexing),
     with J being the number of words in the sentence, and d is the dimension of the embedding. This
     sentence representation, which we call position encoding (PE), means that the order of the words
     now affects mi. The same representation is used for questions, memory inputs and memory outputs.  """
     if type(xi) == np.ndarray or type(xi) == list:  # if it's
         L = glorot_uniform((len(xi), self.d))
         J = [len(sentence) for sentence in xi]
         lj = (-L.T / J).T - (L / self.d) * (-2 * L.T).T
         return lj
     else:
         L = glorot_uniform((xi.shape[0], self.d))
         J = [sentence.shape[0] for sentence in xi]
         lj = (-L.T / J).T - (L / self.d) * (-2 * L.T).T
         return lj
Beispiel #4
0
 def positional_encoding(self, xi):
     """We therefore propose a second representation that encodes the position of words within the
     sentence. This takes the form: mi =SUMjlj * Axij , where * is an element-wise multiplication. lj is a
     column vector with the structure lkj = (1 − j/J) − (k/d)(1 − 2j/J) (assuming 1-based indexing),
     with J being the number of words in the sentence, and d is the dimension of the embedding. This
     sentence representation, which we call position encoding (PE), means that the order of the words
     now affects mi. The same representation is used for questions, memory inputs and memory outputs.  """
     if type(xi)==np.ndarray or type(xi)==list: # if it's 
         L = glorot_uniform((len(xi), self.d))
         J = [len(sentence) for sentence in xi]
         lj = (-L.T / J ).T - ( L / self.d ) * (-2 * L.T).T
         return lj
     else:
         L = glorot_uniform((xi.shape[0], self.d))
         J = [sentence.shape[0] for sentence in xi]
         lj = (-L.T / J ).T - ( L / self.d ) * (-2 * L.T).T
         return lj            
    def __init__(self, name, latent_dim, depth, k, hidden_dim, exploration_probability, exploration_decay_rate):
        """
        z = input (n, latent_dim)
        o = hidden representation (n, depth, hidden_dim)
        x = output (n,depth) (int)
        h = hidden input representation
        z*W
        o*U
        x*V
        """
        self.latent_dim = latent_dim
        self.depth = depth
        self.k = k
        self.hidden_dim = hidden_dim
        # z = T.fmatrix("z")  # input latent samples (n, latent_dim)
        self.exploration_probability = theano.shared(np.float32(exploration_probability),
                                                     "{}_exploration_probability".format(name))
        self.exploration_decay_rate = np.float32(exploration_decay_rate)

        # Hidden representation
        self.W_h = glorot_uniform((latent_dim, hidden_dim), "{}_W_h".format(name))  # z, (latent_dim, hidden_dim)
        self.U_h = glorot_uniform((hidden_dim, hidden_dim), "{}_U_h".format(name))  # h, (hidden_dim, hidden_dim)
        self.V_h = glorot_uniform((k + 2, hidden_dim), "{}_V_h".format(name))  # x, (x_k+2, hidden_dim)
        self.b_h = zero((hidden_dim,), "{}_b_h".format(name))  # (hidden_dim,)

        # Forget gate
        self.W_f = glorot_uniform((hidden_dim, hidden_dim), "{}_W_f".format(name))  # z, (latent_dim, hidden_dim)
        self.b_f = zero((hidden_dim,), "{}_b_f".format(name))  # (hidden_dim,)
        # Input gate
        self.W_i = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name))  # z, (latent_dim, hidden_dim)
        self.b_i = zero((hidden_dim,), "{}_b_i".format(name))  # (hidden_dim,)
        # Write gate
        self.W_w = glorot_uniform((hidden_dim, hidden_dim), "{}_W_w".format(name))  # z, (latent_dim, hidden_dim)
        self.b_w = zero((hidden_dim,), "{}_b_w".format(name))  # (hidden_dim,)
        # Output
        self.W_o = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name))  # z, (latent_dim, hidden_dim)
        self.b_o = zero((hidden_dim,), "{}_b_i".format(name))  # (hidden_dim,)
        # Hidden state
        self.W_j = glorot_uniform((hidden_dim, hidden_dim), "{}_W_j".format(name))  # z, (latent_dim, hidden_dim)
        self.b_j = zero((hidden_dim,), "{}_b_j".format(name))  # (hidden_dim,)
        # Value predictions
        self.W_v = glorot_uniform((hidden_dim, k + 1), "{}_W_v".format(name))  # z, (latent_dim, hidden_dim)
        self.b_v = zero((k + 1,), "{}_b_v".format(name))  # (hidden_dim,)
        self.params = [self.W_h, self.U_h, self.V_h, self.b_h,
                       self.W_f, self.b_f,
                       self.W_i, self.b_i,
                       self.W_w, self.b_w,
                       self.W_o, self.b_o,
                       self.W_j, self.b_j,
                       self.W_v, self.b_v]
    def __init__(self, y_vocab, dim_word, dim, dim_ctx):
    
        self.y_vocab = y_vocab  # 430
        self.dim_word = dim_word # 1024
        self.dim = dim  # 512
        self.dim_ctx = dim_ctx  # 512
        self.emb_dim = 512
        
        
        ### image Embedding
        self.W_img_emb = initializations.glorot_uniform((self.dim_ctx, self.emb_dim))     
        self.b_img_emb = initializations.zero((self.emb_dim))   

        self.W_fr_emb = initializations.glorot_uniform((self.dim_word, self.emb_dim))     
        self.b_fr_emb = initializations.zero((self.emb_dim))  
        
        ### enc forward GRU ###
        self.W_gru = initializations.glorot_uniform((self.emb_dim, self.dim * 2))
        self.U_gru = initializations.glorot_uniform((self.dim, self.dim * 2))
        self.b_gru = initializations.zero((self.dim * 2))
        self.W_gru_cdd = initializations.glorot_uniform((self.emb_dim, self.dim)) # cdd : candidate
        self.U_gru_cdd = initializations.glorot_uniform((self.dim, self.dim))
        self.b_gru_cdd = initializations.zero((self.dim))       
        ### prediction ###
        self.W_pred = initializations.glorot_uniform((self.dim * 2, self.y_vocab))
        self.b_pred = initializations.zero((self.y_vocab))


        self.params = [self.W_img_emb, self.W_fr_emb, self.b_img_emb, self.b_fr_emb,
                       self.W_gru, self.U_gru, self.b_gru,
                       self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd,
                       self.W_pred, self.b_pred]
Beispiel #7
0
def get_weights(shape, token2idx, token2embedding):
    weights = glorot_uniform(shape).get_value()
    if token2embedding is not None:
        vocabulary, tokens = set(token2idx.keys()), set(token2embedding.keys())
        for token_to_initialize in vocabulary.intersection(tokens):
            idx = token2idx[token_to_initialize]
            if idx < weights.shape[0]:
                weights[idx, :] = token2embedding[token_to_initialize]
    return weights
Beispiel #8
0
def unitary_ASB2016_init(shape, name=None):
    assert shape[0] == shape[1]
    N = shape[1]

    theta = initializations.uniform((3, N),
                                    scale=np.pi,
                                    name='{}_theta'.format(name))
    reflection = initializations.glorot_uniform(
        (2, 2 * N), name='{}_reflection'.format(name))
    idxperm = np.random.permutation(N)
    idxpermaug = np.concatenate((idxperm, N + idxperm))

    Iaug = augLeft(np.concatenate((np.eye(N), np.zeros((N, N))), axis=0),
                   module=np).astype(np.float32)
    Uaug = times_unitary_ASB2016(Iaug, N, [theta, reflection, idxpermaug])

    return Uaug, theta, reflection, idxpermaug
Beispiel #9
0
def from_vocab(igor, vocab):    
    print("using vocab and glove file to generate embedding matrix")
    remaining_vocab = set(vocab.keys())
    embeddings = np.zeros((len(vocab), igor.embedding_size))
    print("{} words to convert".format(len(remaining_vocab)))


    if igor.save_dir[-1] != "/":
        igor.save_dir += "/"
    if not path.exists(igor.save_dir):
        makedirs(igor.save_dir)

    if igor.from_url:
        assert hasattr(glove_urls, igor.target_glove), "You need to specify one of the glove variables"
        url = urlopen(getattr(glove_urls, igor.target_glove))
        fileiter = ZipFile(StringIO(url.read())).open(file).readlines()
    else:
        assert os.path.exists(igor.target_glove), "You need to specify a real file"
        fileiter = open(igor.target_glove).readlines()

    count=0
    for line in tqdm(fileiter):
        line = line.replace("\n","").split(" ")
        try:
            word, nums = line[0], [float(x.strip()) for x in line[1:]]
            if word in remaining_vocab:
                embeddings[vocab[word]]  = np.array(nums)
                remaining_vocab.remove(word)
        except Exception as e:
            print("{} broke. exception: {}. line: {}.".format(word, e, x))
        count+=1
    

    print("{} words were not in glove; saving to oov.txt".format(len(remaining_vocab)))
    with open(path.join(igor.save_dir, "oov.txt"), "w") as fp:
        fp.write("\n".join(remaining_vocab))

    for word in tqdm(remaining_vocab):
        embeddings[vocab[word]] = np.asarray(glorot_uniform((igor.embedding_size,)).eval())


    
    vocab.save('embedding.vocab')
    with open(path.join(igor.save_dir, "embedding.npy"), "wb") as fp:
        np.save(fp, embeddings)
Beispiel #10
0
    def __init__(self, test_file, train_file, d = 30):
        # houses embedding B. 
        # deals with text
        # d = 30 to limit size of vectors

        # potentially add word2vec vectors
        self.d = d
        self.train_lines, self.test_lines = self.get_lines(train_file), self.get_lines(test_file)
        lines = np.concatenate([self.train_lines, self.test_lines], axis=0)
        self.vectorizer = CountVectorizer(lowercase=False)
        self.vectorizer.fit([x['text'] + ' ' + x['answer'] if 'answer' in x else x['text'] for x in lines])
        self.analyze = self.vectorizer.build_analyzer()

        self.V = len(self.vectorizer.vocabulary_)

        #self.B = theano.shared((np.random.uniform(-0.1, 0.1,(self.d, self.V))).astype(np.float32))
        self.B = glorot_uniform((self.d, self.V)) 

        self.params = [ self.B ]
        self.updates = []
    def __init__(self, name, depth, k, hidden_dim):
        self.depth = depth
        self.k = k
        self.hidden_dim = hidden_dim

        # Hidden representation
        self.W_h = glorot_uniform((hidden_dim, hidden_dim), "{}_W_h".format(name))  # h, (hidden_dim, hidden_dim)
        self.U_h = glorot_uniform((k + 1, hidden_dim), "{}_U_h".format(name))  # x, (k+1, hidden_dim)
        self.b_h = zero((hidden_dim,), "{}_b_h".format(name))  # (hidden_dim,)

        # Forget gate
        self.W_f = glorot_uniform((hidden_dim, hidden_dim), "{}_W_f".format(name))  # z, (latent_dim, hidden_dim)
        self.b_f = zero((hidden_dim,), "{}_b_f".format(name))  # (hidden_dim,)
        # Input gate
        self.W_i = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name))  # z, (latent_dim, hidden_dim)
        self.b_i = zero((hidden_dim,), "{}_b_i".format(name))  # (hidden_dim,)
        # Write gate
        self.W_w = glorot_uniform((hidden_dim, hidden_dim), "{}_W_w".format(name))  # z, (latent_dim, hidden_dim)
        self.b_w = zero((hidden_dim,), "{}_b_w".format(name))  # (hidden_dim,)
        # Output
        self.W_o = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name))  # z, (latent_dim, hidden_dim)
        self.b_o = zero((hidden_dim,), "{}_b_i".format(name))  # (hidden_dim,)
        # Hidden state
        self.W_j = glorot_uniform((hidden_dim, hidden_dim), "{}_W_j".format(name))  # z, (latent_dim, hidden_dim)
        self.b_j = zero((hidden_dim,), "{}_b_j".format(name))  # (hidden_dim,)
        # y predictions
        self.W_y = glorot_uniform((hidden_dim, 1), "{}_W_y".format(name))  # z, (latent_dim, hidden_dim)
        self.b_y = zero((1,), "{}_b_y".format(name))  # (hidden_dim,)
        self.clip_params = [self.W_h, self.U_h, self.W_f, self.W_i, self.W_w, self.W_o, self.W_j, self.W_y]
        self.params = [self.W_h, self.U_h, self.b_h,
                       self.W_f, self.b_f,
                       self.W_i, self.b_i,
                       self.W_w, self.b_w,
                       self.W_o, self.b_o,
                       self.W_j, self.b_j,
                       self.W_y, self.b_y]
Beispiel #12
0
def make_hash_embeddings(igor, vocab):
    assert os.path.exists(igor.target_glove), "You need to specify a real file"
    fileiter = open(igor.target_glove).readlines()

    hash_vocab = Vocabulary()
    hash_vocab.use_mask = True
    hash_vocab.add(hash_vocab.mask_symbol)
    hash_vocab.add(hash_vocab.unk_symbol)
    word2hash = {}
    for word, v_id in vocab.items():
        ids = hash_vocab.add_many(hash_word(word))
        word2hash[v_id] = ids

    embeddings = np.zeros((len(hash_vocab), igor.embedding_size))
    remaining_vocab = set(vocab.keys())
    remaining_hashes = set(hash_vocab.values())
    for line in tqdm(fileiter):
        line = line.replace("\n","").split(" ")
        word, nums = line[0], [float(x.strip()) for x in line[1:]]
        word_hash = hash_word(word)
        if word in remaining_vocab:
            hash_ids = word2hash[vocab[word]]
            remaining_vocab.remove(word)
            remaining_hashes.difference_update(hash_ids)
            embeddings[hash_ids] += np.array(nums) / len(hash_ids)
    print("{} words were not seen.  {} hashes were not seen".format(len(remaining_vocab),
                                                                    len(remaining_hashes)))
    for hash_id in remaining_hashes:
        embeddings[hash_id] = np.asarray(glorot_uniform((igor.embedding_size,)).eval())

    glove_name = igor.target_glove[igor.target_glove.find("glove"):].replace("/","")

    hash_vocab.save('hash_embedding_{}.vocab'.format(glove_name))
    with open(path.join(igor.save_dir, "hash_embedding_{}.npy".format(glove_name)), "wb") as fp:
        np.save(fp, embeddings)
    with open(path.join(igor.save_dir, "word2hash.json".format(glove_name)), "w") as fp:
        json.dump(word2hash, fp)
Beispiel #13
0
    def __init__(self, test_file, train_file, d=30):
        # houses embedding B.
        # deals with text
        # d = 30 to limit size of vectors

        # potentially add word2vec vectors
        self.d = d
        self.train_lines, self.test_lines = self.get_lines(
            train_file), self.get_lines(test_file)
        lines = np.concatenate([self.train_lines, self.test_lines], axis=0)
        self.vectorizer = CountVectorizer(lowercase=False)
        self.vectorizer.fit([
            x['text'] + ' ' + x['answer'] if 'answer' in x else x['text']
            for x in lines
        ])
        self.analyze = self.vectorizer.build_analyzer()

        self.V = len(self.vectorizer.vocabulary_)

        #self.B = theano.shared((np.random.uniform(-0.1, 0.1,(self.d, self.V))).astype(np.float32))
        self.B = glorot_uniform((self.d, self.V))

        self.params = [self.B]
        self.updates = []
Beispiel #14
0
    def __init__(self,
                 n_words=1000,
                 n_embedding=100,
                 lr=0.01,
                 margin=0.1,
                 momentum=0.9,
                 word_to_id=None):
        self.n_embedding = n_embedding
        self.n_lstm_embed = n_embedding
        self.word_embed = n_embedding
        self.lr = lr
        self.momentum = momentum
        self.margin = margin
        self.n_words = n_words
        self.n_D = 3 * self.n_words + 3

        self.word_to_id = word_to_id
        self.id_to_word = dict((v, k) for k, v in word_to_id.iteritems())

        # Question
        x = T.vector('x')
        phi_x = T.vector('phi_x')

        # True statements
        phi_f1_1 = T.vector('phi_f1_1')
        phi_f2_1 = T.vector('phi_f2_1')

        # False statements
        phi_f1_2 = T.vector('phi_f1_2')
        phi_f2_2 = T.vector('phi_f2_2')

        # Supporting memories
        m0 = T.vector('m0')
        m1 = T.vector('m1')
        phi_m0 = T.vector('phi_m0')
        phi_m1 = T.vector('phi_m1')

        # True word
        r = T.vector('r')

        # Word sequence
        words = T.ivector('words')

        # Scoring function
        self.U_O = init_shared_normal(n_embedding, self.n_D, 0.01)

        # Word embedding
        self.L = glorot_uniform((self.n_words, self.word_embed))
        self.Lprime = glorot_uniform((self.n_words, self.n_lstm_embed))

        # LSTM
        self.W_i = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_i = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_i = shared_zeros((self.n_lstm_embed))

        self.W_f = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_f = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_f = shared_zeros((self.n_lstm_embed))

        self.W_c = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_c = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_c = shared_zeros((self.n_lstm_embed))

        self.W_o = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_o = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_o = shared_zeros((self.n_lstm_embed))

        mem_cost = self.calc_cost(phi_x, phi_f1_1, phi_f1_2, phi_f2_1,
                                  phi_f2_2, phi_m0)

        lstm_output = self.lstm_cost(words)
        self.predict_function_r = theano.function(inputs=[words],
                                                  outputs=lstm_output,
                                                  allow_input_downcast=True)

        lstm_cost = -T.sum(T.mul(r, T.log(lstm_output)))

        cost = mem_cost + lstm_cost

        params = [
            self.U_O, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f,
            self.b_f, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o,
            self.b_o, self.L, self.Lprime
        ]

        grads = T.grad(cost, params)

        # Parameter updates
        updates = self.get_updates(params, grads, method='adagrad')

        l_rate = T.scalar('l_rate')

        # Theano functions
        self.train_function = theano.function(
            inputs=[
                phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0, r,
                words,
                theano.Param(l_rate, default=self.lr)
            ],
            outputs=cost,
            updates=updates,
            on_unused_input='warn',
            allow_input_downcast=True,
        )
        #mode='FAST_COMPILE')
        #mode='DebugMode')
        #mode=theano.compile.MonitorMode(pre_func=inspect_inputs,post_func=inspect_outputs))

        # Candidate statement for prediction
        phi_f = T.vector('phi_f')

        score_o = self.calc_score_o(phi_x, phi_f)
        self.predict_function_o = theano.function(inputs=[phi_x, phi_f],
                                                  outputs=score_o)
Beispiel #15
0
from keras import backend as K
from keras.datasets import mnist
from keras.utils.np_utils import to_categorical
from keras.metrics import categorical_accuracy
from keras.initializations import glorot_uniform, zero
import numpy as np

# inputs and targets are placeholders
input_dim = 28 * 28
output_dim = 10
x = K.placeholder(name="x", shape=(None, input_dim))
ytrue = K.placeholder(name="y", shape=(None, output_dim))

# model parameters are variables
hidden_dim = 128
W1 = glorot_uniform((input_dim, hidden_dim))
b1 = zero((hidden_dim, ))
W2 = glorot_uniform((hidden_dim, output_dim))
b2 = zero((output_dim, ))
params = [W1, b1, W2, b2]

# two-layer model
hidden = K.sigmoid(K.dot(x, W1) + b1)
ypred = K.softmax(K.dot(hidden, W2) + b2)

# categorical cross entropy loss
loss = K.mean(K.categorical_crossentropy(ytrue, ypred), axis=None)

# categorical accuracy
accuracy = categorical_accuracy(ytrue, ypred)
Beispiel #16
0
 def __init__(self, d, V):
     self.W = glorot_uniform((V, d))
     self.params = [self.W]
Beispiel #17
0
 def __init__(self, d, V):
     self.W = glorot_uniform((V, d))
     self.params = [ self.W ]
 def build(self, input_shape):
     input_dim = input_shape[1]
     # initial_weight_value = glorot_uniform((input_dim, self.n_output_channels))
     # self.W = K.variable(initial_weight_value)
     self.W = glorot_uniform((input_dim, self.n_output_channels), name='W')
     self.trainable_weights = [self.W]
Beispiel #19
0
    def __init__(self, n_words=1000, n_embedding=100, lr=0.01, margin=0.1, momentum=0.9, word_to_id=None):
        self.n_embedding = n_embedding
        self.n_lstm_embed = n_embedding
        self.word_embed = n_embedding
        self.lr = lr
        self.momentum = momentum
        self.margin = margin
        self.n_words = n_words
        self.n_D = 3 * self.n_words + 3

        self.word_to_id = word_to_id
        self.id_to_word = dict((v, k) for k, v in word_to_id.iteritems())

        # Question
        x = T.vector('x')
        phi_x = T.vector('phi_x')

        # True statements
        phi_f1_1 = T.vector('phi_f1_1')
        phi_f2_1 = T.vector('phi_f2_1')

        # False statements
        phi_f1_2 = T.vector('phi_f1_2')
        phi_f2_2 = T.vector('phi_f2_2')

        # Supporting memories
        m0 = T.vector('m0')
        m1 = T.vector('m1')
        phi_m0 = T.vector('phi_m0')
        phi_m1 = T.vector('phi_m1')

        # True word
        r = T.vector('r')

        # Word sequence
        words = T.ivector('words')

        # Scoring function
        self.U_O = init_shared_normal(n_embedding, self.n_D, 0.01)

        # Word embedding
        self.L = glorot_uniform((self.n_words, self.word_embed))
        self.Lprime = glorot_uniform((self.n_words, self.n_lstm_embed))

        # LSTM
        self.W_i = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_i = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_i = shared_zeros((self.n_lstm_embed))

        self.W_f = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_f = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_f = shared_zeros((self.n_lstm_embed))

        self.W_c = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_c = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_c = shared_zeros((self.n_lstm_embed))

        self.W_o = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_o = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_o = shared_zeros((self.n_lstm_embed))

        mem_cost = self.calc_cost(phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0)

        lstm_output = self.lstm_cost(words)
        self.predict_function_r = theano.function(inputs = [words], outputs = lstm_output, allow_input_downcast=True)

        lstm_cost = -T.sum(T.mul(r, T.log(lstm_output)))

        cost = mem_cost + lstm_cost

        params = [
            self.U_O,
            self.W_i, self.U_i, self.b_i,
            self.W_f, self.U_f, self.b_f,
            self.W_c, self.U_c, self.b_c,
            self.W_o, self.U_o, self.b_o,
            self.L, self.Lprime
        ]

        grads = T.grad(cost, params)

        # Parameter updates
        updates = self.get_updates(params, grads, method='adagrad')

        l_rate = T.scalar('l_rate')

        # Theano functions
        self.train_function = theano.function(
            inputs = [phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2,
                      phi_m0, r, words,
                      theano.Param(l_rate, default=self.lr)],
            outputs = cost,
            updates = updates,
            on_unused_input='warn',
            allow_input_downcast=True,
            )
            #mode='FAST_COMPILE')
            #mode='DebugMode')
            #mode=theano.compile.MonitorMode(pre_func=inspect_inputs,post_func=inspect_outputs))

        # Candidate statement for prediction
        phi_f = T.vector('phi_f')

        score_o = self.calc_score_o(phi_x, phi_f)
        self.predict_function_o = theano.function(inputs = [phi_x, phi_f], outputs = score_o)