Example #1
0
File: stack.py Project: dytmas/seya
    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        input_leng, input_dim = input_shape[1:]

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name))
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.rnn_size, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name))
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        inner_shape = list(input_shape)
        inner_shape[-1] = input_dim+self.m_length
        self.rnn.build(inner_shape)


        self.init_h = K.zeros((self.rnn_size), name="{}_init_h".format(self.name))

        self.W_d = self.rnn.init((self.rnn_size,1), name="{}_W_d".format(self.name))
        self.W_u = self.rnn.init((self.rnn_size,1), name="{}_W_u".format(self.name))

        self.W_v = self.rnn.init((self.rnn_size,self.m_length), name="{}_W_v".format(self.name))
        self.W_o = self.rnn.init((self.rnn_size,self.output_dim), name="{}_W_o".format(self.name))

        self.b_d = K.zeros((1,), name="{}_b_d".format(self.name))
        self.b_u = K.zeros((1,), name="{}_b_u".format(self.name))
        self.b_v = K.zeros((self.m_length,), name="{}_b_v".format(self.name))
        self.b_o = K.zeros((self.output_dim,), name="{}_b_o".format(self.name))

        
        self.trainable_weights = self.rnn.trainable_weights + [
           self.W_d, self.b_d,
           self.W_v, self.b_v,
           self.W_u,  self.b_u,
           self.W_o, self.b_o, self.init_h]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size), name="{}_init_c".format(self.name))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]
			
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weight
Example #2
0
File: draw.py Project: dytmas/seya
    def build(self):
        self.input = T.tensor4()

        if self.inner_rnn == 'gru':
            self.enc = GRU(
                input_length=self.n_steps,
                input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
            self.dec = GRU(
                input_length=self.n_steps,
                input_dim=self.code_dim, output_dim=self.output_dim,
                init=self.init,
                inner_init=self.inner_init)

        elif self.inner_rnn == 'lstm':
            self.enc = LSTM(
                input_length=self.n_steps,
                input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim,
                output_dim=self.output_dim, init=self.init, inner_init=self.inner_init)
            self.dec = LSTM(
                input_length=self.n_steps,
                input_dim=self.code_dim, output_dim=self.output_dim,
                init=self.init, inner_init=self.inner_init)
        else:
            raise ValueError('This type of inner_rnn is not supported')

        self.enc.build()
        self.dec.build()

        self.init_canvas = shared_zeros(self._input_shape)  # canvas and hidden state
        self.init_h_enc = shared_zeros((self.output_dim))  # initial values
        self.init_h_dec = shared_zeros((self.output_dim))  # should be trained
        self.L_enc = self.enc.init((self.output_dim, 5))  # "read" attention parameters (eq. 21)
        self.L_dec = self.enc.init((self.output_dim, 5))  # "write" attention parameters (eq. 28)
        self.b_enc = shared_zeros((5))  # "read" attention parameters (eq. 21)
        self.b_dec = shared_zeros((5))  # "write" attention parameters (eq. 28)
        self.W_patch = self.enc.init((self.output_dim, self.N_dec**2*self._input_shape[0]))
        self.b_patch = shared_zeros((self.N_dec**2*self._input_shape[0]))
        self.W_mean = self.enc.init((self.output_dim, self.code_dim))
        self.W_sigma = self.enc.init((self.output_dim, self.code_dim))
        self.b_mean = shared_zeros((self.code_dim))
        self.b_sigma = shared_zeros((self.code_dim))
        self.trainable_weights = self.enc.trainable_weights + self.dec.trainable_weights + [
            self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch,
            self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma,
            self.init_canvas, self.init_h_enc, self.init_h_dec]

        if self.inner_rnn == 'lstm':
            self.init_cell_enc = shared_zeros((self.output_dim))     # initial values
            self.init_cell_dec = shared_zeros((self.output_dim))     # should be trained
            self.trainable_weights = self.trainable_weights + [self.init_cell_dec, self.init_cell_enc]
Example #3
0
File: draw.py Project: samim23/seya
    def __init__(self, input_shape, h_dim, z_dim, N_enc=2, N_dec=5, n_steps=64,
                 inner_rnn='gru', truncate_gradient=-1, return_sequences=False,
                 canvas_activation=T.nnet.sigmoid, init='glorot_uniform',
                 inner_init='orthogonal'):
        self.input = T.tensor4()
        self.h_dim = h_dim  # this is 256 for MNIST
        self.z_dim = z_dim  # this is 100 for MNIST
        self.input_shape = input_shape
        self.N_enc = N_enc
        self.N_dec = N_dec
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences
        self.n_steps = n_steps
        self.canvas_activation = canvas_activation

        self.height = input_shape[1]
        self.width = input_shape[2]

        self.inner_rnn = inner_rnn
        if inner_rnn == 'gru':
            self.enc = GRU(input_dim=self.input_shape[0]*2*self.N_enc**2 +
                           h_dim, output_dim=h_dim, init=init,
                           inner_init=inner_init)
            self.dec = GRU(input_dim=z_dim, output_dim=h_dim, init=init,
                           inner_init=inner_init)

        elif inner_rnn == 'lstm':
            self.enc = LSTM(input_dim=self.input_shape[0]*2*self.N_enc**2 + h_dim,
                            output_dim=h_dim, init=init,
                            inner_init=inner_init)
            self.dec = LSTM(input_dim=z_dim, output_dim=h_dim, init=init,
                            inner_init=inner_init)
        else:
            raise ValueError('This type of inner_rnn is not supported')

        self.init_canvas = shared_zeros(input_shape)  # canvas and hidden state
        self.init_h_enc = shared_zeros((h_dim))     # initial values
        self.init_h_dec = shared_zeros((h_dim))     # should be trained
        self.L_enc = self.enc.init((h_dim, 5))  # "read" attention parameters (eq. 21)
        self.L_dec = self.enc.init((h_dim, 5))  # "write" attention parameters (eq. 28)
        self.b_enc = shared_zeros((5))  # "read" attention parameters (eq. 21)
        self.b_dec = shared_zeros((5))  # "write" attention parameters (eq. 28)
        self.W_patch = self.enc.init((h_dim, self.N_dec**2*self.input_shape[0]))
        self.b_patch = shared_zeros((self.N_dec**2*self.input_shape[0]))
        self.W_mean = self.enc.init((h_dim, z_dim))
        self.W_sigma = self.enc.init((h_dim, z_dim))
        self.b_mean = shared_zeros((z_dim))
        self.b_sigma = shared_zeros((z_dim))
        self.params = self.enc.params + self.dec.params + [
            self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch,
            self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma]
Example #4
0
    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.rnn_size, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()


        self.init_h = K.zeros((self.rnn_size))

        self.W_d = self.rnn.init((self.rnn_size,1))
        self.W_u = self.rnn.init((self.rnn_size,1))

        self.W_v = self.rnn.init((self.rnn_size,self.m_length))
        self.W_o = self.rnn.init((self.rnn_size,self.output_dim))

        self.b_d = K.zeros((1,),name="b_d")
        self.b_u = K.zeros((1,),name="b_u")
        self.b_v = K.zeros((self.m_length,))
        self.b_o = K.zeros((self.output_dim,))

        
        self.trainable_weights = self.rnn.trainable_weights + [
           self.W_d, self.b_d,
            self.W_v, self.b_v,
            self.W_u,  self.b_u,
            self.W_o, self.b_o, self.init_h]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]
def create_o_test_model(train_model, examples, hidden_size, embed_size, glove, batch_size = 64, prem_len = 22):
    
    
    graph = Graph()
    
    hypo_layer = LSTM(output_dim= hidden_size, batch_input_shape=(batch_size, 1, embed_size), 
                      return_sequences=True, stateful = True, trainable = False)
    
    
    graph.add_input(name='hypo_input', batch_input_shape=(batch_size, 1), dtype = 'int32')
    graph.add_node(make_fixed_embeddings(glove, 1), name = 'hypo_word_vec', input='hypo_input')
    graph.add_node(hypo_layer, name = 'hypo', input='hypo_word_vec')
    
    graph.add_input(name='premise', batch_input_shape=(batch_size, prem_len, embed_size))
    graph.add_input(name='creative', batch_input_shape=(batch_size, embed_size))
    
    attention = LstmAttentionLayer(hidden_size, return_sequences=True, stateful = True, trainable = False, feed_state = False)
    
    
    graph.add_node(attention, name='attention', inputs=['premise', 'hypo', 'creative'], merge_mode='join')
   
    
    graph.add_input(name='train_input', batch_input_shape=(batch_size, 1), dtype='int32')
    hs = HierarchicalSoftmax(len(glove), input_dim = hidden_size, input_length = 1, trainable = False)
    
    graph.add_node(hs, 
                   name = 'softmax', inputs=['attention','train_input'], 
                   merge_mode = 'join')
    graph.add_output(name='output', input='softmax')
    
    hypo_layer.set_weights(train_model.nodes['hypo'].get_weights())
    attention.set_weights(train_model.nodes['attention'].get_weights())
    hs.set_weights(train_model.nodes['softmax'].get_weights())    
    
    graph.compile(loss={'output': hs_categorical_crossentropy}, optimizer='adam')
    
    func_premise = theano.function([train_model.inputs['premise_input'].get_input()],
                                    train_model.nodes['premise'].get_output(False), 
                                    allow_input_downcast=True)
    func_noise = theano.function([train_model.inputs['noise_input'].get_input(),
                                  train_model.inputs['class_input'].get_input()],
                                  train_model.nodes['creative'].get_output(False),
                                  allow_input_downcast=True)                            

    return graph, func_premise, func_noise
Example #6
0
    def build(self, input_shape):
        input_leng, input_dim = input_shape[1:]
       # self.input = T.tensor3()

        self.lstm = LSTM(
            input_dim=input_dim + self.m_length,
            input_length=input_leng,
            output_dim=self.output_dim, init=self.init,
            forget_bias_init='zero',
            inner_init=self.inner_init)

        self.lstm.build(input_shape)

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001 * np.ones((1,)).astype(floatX)))
        self.init_h = backend.zeros((self.output_dim))
        self.init_wr = self.lstm.init((self.n_slots,))
        self.init_ww = self.lstm.init((self.n_slots,))

        # write
        self.W_e = self.lstm.init((self.output_dim, self.m_length))  # erase
        self.b_e = backend.zeros((self.m_length))
        self.W_a = self.lstm.init((self.output_dim, self.m_length))  # add
        self.b_a = backend.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.lstm.init((self.output_dim, self.m_length))
        self.b_k_read = self.lstm.init((self.m_length,))
        self.W_c_read = self.lstm.init((self.output_dim, 3))
        self.b_c_read = backend.zeros((3))
        self.W_s_read = self.lstm.init((self.output_dim, self.shift_range))
        self.b_s_read = backend.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.lstm.init((self.output_dim, self.m_length))
        self.b_k_write = self.lstm.init((self.m_length,))
        self.W_c_write = self.lstm.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = backend.zeros((3))
        self.W_s_write = self.lstm.init((self.output_dim, self.shift_range))
        self.b_s_write = backend.zeros((self.shift_range))

        self.C = circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.lstm.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        self.init_c = backend.zeros((self.output_dim))
        self.trainable_weights = self.trainable_weights + [self.init_c, ]
    for t, char in enumerate(sentence):
        x[0, t, char_indices[char]] = 1.0
    return x


sentence = "^" + sys.argv[1]
sentence = "".join([c for c in sentence if c in char_indices])
x = create_input(sentence)

# build the model: 2 stacked LSTM
print("Build model...")
model = Sequential()
first_layer = LSTM(512, return_sequences=True, input_shape=(None, len(chars)))
model.add(first_layer)
model.add(Dropout(0.5))
second_layer = LSTM(512, return_sequences=True)
model.add(second_layer)
model.add(Dropout(0.5))
model.add(TimeDistributedDense(len(chars)))
model.add(Activation("softmax"))

print("creating function")
layer_output = theano.function([model.get_input(train=False)], second_layer.get_output(train=False))

W = layer_output(x)[0]
print(W.shape)

dists = []
for i in xrange(W.shape[0]):
    for j in xrange(i + 1, W.shape[0]):
        # m = (W[i] + W[j]) / 2
Example #8
0
class Stack(Recurrent):
    """ Neural Turing Machines

    Non obvious parameter:
    ----------------------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location

    Known issues:
    -------------
    Theano may complain when n_slots == 1.

    """
    def __init__(self, output_dim, n_slots, m_length,
                 inner_rnn='lstm',rnn_size=64, stack=True,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots + 1  # because we start at time 1
        self.m_length = m_length
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn
        self.rnn_size = rnn_size
        self.stack = stack

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(Stack, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.rnn_size, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()


        self.init_h = K.zeros((self.rnn_size))

        self.W_d = self.rnn.init((self.rnn_size,1))
        self.W_u = self.rnn.init((self.rnn_size,1))

        self.W_v = self.rnn.init((self.rnn_size,self.m_length))
        self.W_o = self.rnn.init((self.rnn_size,self.output_dim))

        self.b_d = K.zeros((1,),name="b_d")
        self.b_u = K.zeros((1,),name="b_u")
        self.b_v = K.zeros((self.m_length,))
        self.b_o = K.zeros((self.output_dim,))

        
        self.trainable_weights = self.rnn.trainable_weights + [
           self.W_d, self.b_d,
            self.W_v, self.b_v,
            self.W_u,  self.b_u,
            self.W_o, self.b_o, self.init_h]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]
        #self.trainable_weights =[self.W_d]
       

    def get_initial_states(self, X):
        
        
        batch_size = X.shape[0]
        
        init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0)
        init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

        itime = K.zeros((1,),dtype=np.int32)
        

        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_r , init_V,init_S,itime,init_h,init_c]
      
    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def get_full_output(self, train=False):
        """
        This method is for research and visualization purposes. Use it as
        X = model.get_input()  # full model
        Y = ntm.get_output()    # this layer
        F = theano.function([X], Y, allow_input_downcast=True)
        [memory, read_address, write_address, rnn_state] = F(x)

        if inner_rnn == "lstm" use it as
        [memory, read_address, write_address, rnn_cell, rnn_state] = F(x)

        """
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        X = self.get_input(train)
        assert K.ndim(X) == 3
        if K._BACKEND == 'tensorflow':
            if not self.input_shape[1]:
                raise Exception('When using TensorFlow, you should define ' +
                                'explicitely the number of timesteps of ' +
                                'your sequences. Make sure the first layer ' +
                                'has a "batch_input_shape" argument ' +
                                'including the samples axis.')

        mask = self.get_output_mask(train)
        if mask:
            # apply mask
            X *= K.cast(K.expand_dims(mask), X.dtype)
            masking = True
        else:
            masking = False

        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(X)

        states = rnn_states(self.step, X, initial_states,
                            go_backwards=self.go_backwards,
                            masking=masking)
        return states

    def step(self, x, states):
        
        r_tm1, V_tm1,s_tm1,time = states[:4]
        h_tm1 = states[4:]
        
        def print_name_shape(name,x):
            return T.cast( K.sum(theano.printing.Print(name)(x.shape)) * 0,"float32")
        
        
        r_tm1 = r_tm1 +  print_name_shape("out\nr_tm1",r_tm1) + \
                          print_name_shape("V_tm1",V_tm1) + \
                          print_name_shape("s_tm1",s_tm1) + \
                          print_name_shape("x",x) + \
                          print_name_shape("h_tm1_0",h_tm1[0]) + \
                          print_name_shape("h_tm1_1",h_tm1[1]) 
                         
        
        op_t, h_t = self._update_controller( T.concatenate([x, r_tm1], axis=-1),
                                             h_tm1)
              
       # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value()) 
        op_t = op_t + print_name_shape("afterop_t",op_t)
        #op_t = op_t[:,0,:]
        ao = K.dot(op_t, self.W_d)  
        ao = ao +print_name_shape("ao",ao)
        d_t = K.sigmoid( ao + self.b_d)  + print_name_shape("afterop2_t",op_t)
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)+ print_name_shape("d_t",op_t)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) + print_name_shape("u_t",u_t)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) + print_name_shape("v_t",v_t)
        
        o_t = o_t + print_name_shape("afterbulk_t",o_t)
        
        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                             u_t[::,0], v_t,time[0],stack=self.stack)
        
        #V_t, s_t, r_t = V_tm1,s_tm1,T.sum(V_tm1,axis = 1)
        V_t  = V_t + print_name_shape("o_t",o_t) + \
                          print_name_shape("r_t",r_t) + \
                          print_name_shape("V_t",V_t) +\
                          print_name_shape("s_t",s_t) 
                        # T.cast( theano.printing.Print("time")(time[0]),"float32")
        #time = T.set_subtensor(time[0],time[0] +)
        
        
       
        return o_t, [r_t, V_t, s_t, time] + h_t



        
    
    def _update_controller(self, inp , h_tm1):
        """We have to update the inner RNN inside the NTM, this
        is the function to do it. Pretty much copy+pasta from Keras
        """
    
        def print_name_shape(name,x,shape=True):
            if shape:
                return T.cast( K.sum(theano.printing.Print(name)(x.shape)) * 0,"float32")
            else:
                return theano.printing.Print(name)(x)
                
        
        
        #1 is for gru, 2 is for lstm
        if len(h_tm1) in [1,2]:
            if hasattr(self.rnn,"get_constants"):
                BW,BU = self.rnn.get_constants(inp)
                h_tm1 += (BW,BU)
        # update state
                
        op_t, h = self.rnn.step(inp + print_name_shape("inp",inp), h_tm1)
    
        
        return op_t + print_name_shape("opt",op_t) +print_name_shape("h",h[0])  +print_name_shape("h",h[1])\
                , h
Example #9
0
token = Tokenizer(num_words=5000)
token.fit_on_texts(text_train)
X_train_seq = token.texts_to_sequences(text_train)
X_test_seq = token.texts_to_sequences(text_test)
X_train = sequence.pad_sequences(X_train_seq, maxlen=300)
X_test = sequence.pad_sequences(X_test_seq, maxlen=300)
print(len(X_train_seq[104]))
print(len(X_train[104]))
print(len(X_train_seq[6]))
print(len(X_train[1]))
print((X_train[6]))

model = Sequential()
model.add(Embedding(output_dim=32, input_dim=5000, input_length=300))
model.add(Dropout(0.5))
model.add(LSTM(32))
model.add(Dense(units=256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(units=1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
train_history = model.fit(X_train,
                          y_train,
                          validation_split=0.2,
                          epochs=10,
                          batch_size=100,
                          verbose=2)

Example #10
0
X_train = sequence.pad_sequences(X_train, maxlen=max_len)


#y_train= to_categorical(y_train)
#y_test = to_categorical(y_test)


max_features = 5000
model = Sequential()
print('Build model...')
embedding_vecor_length = 32

model = Sequential()
model.add(Embedding(max_features, embedding_vecor_length, input_length=max_len))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])
print(model.summary())
checkpointer = callbacks.ModelCheckpoint(filepath="logs/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max')
csv_logger = CSVLogger('logs/training_set_iranalysis.csv',separator=',', append=False)

model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000,
          validation_data=(X_train, y_train), shuffle=True,callbacks=[checkpointer,csv_logger])
score, acc = model.evaluate(X_train, y_train,
                            batch_size=32)
print('Test score:', score)
print('Test accuracy:', acc)
Example #11
0
#     model.fit(imgs, Y_label, epochs=5,batch_size=64)
print(len(test_img_y))

test_img_x = np.array(test_img_x)

num = int(test_img_x.shape[0]/num_of_frames)
test_img_x = np.reshape(test_img_x,(num,num_of_frames,dim_x,dim_y))

test_img_y = np.array(test_img_y)

test_img_x = np.reshape(test_img_x,(test_img_x.shape[0],num_of_frames,dim_x*dim_y))

# ***********************************************************
model = Sequential()

model.add(LSTM(50,input_shape = (num_of_frames,dim_x*dim_y),return_sequences =True,dropout = 0.2))
# model.add(LSTM(50,return_sequences=True,dropout=0.2))
model.add(Flatten())
model.add(Dense(6,activation='softmax'))
# model.add(BatchNormalization())
model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])  
model.summary()

history = model.fit(img_x, img_y, epochs=100,batch_size=124,validation_data=(test_img_x,test_img_y))
end = time.time()
print("\n\nTime to train the LSTM MODEL: ",end-start)

import matplotlib.pyplot as plt
from matplotlib import style

plt.rcParams['figure.figsize'] = [10, 10]
def train_model(path,
                max_sentence_len=40,
                overlap_size=0,
                num_epochs=20,
                full_model_filename=None,
                initial_epoch=0,
                filter_max=True):
    if overlap_size is None:
        overlap_size = max_sentence_len - 1

    assert (0 <= overlap_size < max_sentence_len)

    print('\nLoading GloVe...')
    nlp = spacy.load('en_vectors_web_lg')
    word_model = nlp.vocab

    print('\nPreparing the sentences...')

    data_driven_vocabulary = set()

    with open(path, 'r', encoding='utf-8') as f:
        docs = f.readlines()
    sentences = []
    for doc in docs:
        tokens = tokenize(doc, word_model)
        current_sentences = filter_sentences(tokens,
                                             max_sentence_len,
                                             overlap_size,
                                             filter_max=filter_max)
        sentences.extend(current_sentences)

        data_driven_vocabulary = data_driven_vocabulary.union(tokens)

    num_unique_words = len(data_driven_vocabulary)

    print('Num sentences: {}'.format(len(sentences)))
    print('Num unique words: {}'.format(num_unique_words))

    # Work on the full GloVe matrix

    true_pretrained_weights = word_model.vectors.data

    num_word_features = true_pretrained_weights.shape[1]

    def true_word2idx(my_word):
        my_key = word_model.strings[my_word]
        try:
            my_row = word_model.vectors.key2row[my_key]
        except KeyError:
            print('Word {} unknown'.format(my_word))
            my_row = 2091  # the row for 'cat' word
        return my_row

    def true_idx2word(my_row):
        my_key = list(word_model.vectors.keys())[my_row]
        my_word = word_model.strings[my_key]
        return my_word

    # Trim the GloVe matrix to lower RAM usage

    sorted_data_driven_vocabulary = sorted(list(data_driven_vocabulary))

    word_indices = dict(
        (c, i) for i, c in enumerate(sorted_data_driven_vocabulary))
    indices_word = dict(
        (i, c) for i, c in enumerate(sorted_data_driven_vocabulary))

    def word2idx(my_word):
        return word_indices.get(my_word, None)

    def idx2word(my_row):
        return indices_word[my_row]

    pretrained_weights = np.zeros((num_unique_words, num_word_features))
    for my_row in range(num_unique_words):
        true_row = true_word2idx(idx2word(my_row))
        pretrained_weights[my_row] = true_pretrained_weights[true_row, :]

    vocab_size, emdedding_size = pretrained_weights.shape
    print('Result embedding shape:', pretrained_weights.shape)

    print('\nPreparing the data for LSTM...')
    train_x = np.zeros([len(sentences), max_sentence_len], dtype=np.int32)
    train_y = np.zeros([len(sentences)], dtype=np.int32)
    for i, sentence in enumerate(sentences):
        for t, word in enumerate(sentence[:-1]):
            train_x[i, t] = word2idx(word)
            train_y[i] = word2idx(sentence[-1])
    print('train_x shape:', train_x.shape)
    print('train_y shape:', train_y.shape)

    print('\nTraining LSTM...')
    model = Sequential()
    model.add(
        Embedding(input_dim=vocab_size,
                  output_dim=emdedding_size,
                  embeddings_initializer=Constant(pretrained_weights),
                  trainable=False))
    model.add(LSTM(512, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(LSTM(512, return_sequences=False))
    model.add(Dropout(0.5))
    model.add(Dense(units=vocab_size, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

    def on_epoch_end(epoch, _):
        print('\nGenerating text after epoch: %d' % epoch)
        generate_examples(model, sorted_data_driven_vocabulary, word_model)

    print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

    save_callback = ModelCheckpoint(
        filepath='model.word_level_rnn_with_embeddings.epoch_{epoch:02d}.hdf5',
        save_weights_only=False)

    if full_model_filename is not None:
        try:
            print('Loading model {} with initial epoch = {}'.format(
                full_model_filename, initial_epoch))
            model = load_model(full_model_filename)
        except FileNotFoundError:
            print('Model not found. Setting initial epoch to 0.')
            initial_epoch = 0

    model.fit(train_x,
              train_y,
              batch_size=128,
              epochs=num_epochs,
              initial_epoch=initial_epoch,
              callbacks=[print_callback, save_callback])

    return model, sorted_data_driven_vocabulary
from keras.layers.core import Dense,Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.metrics import mean_squared_error
from lstm_multime_pre import data_pre

str=1#设置时间刻度

if(str==1):
    X_train,X_test,y_train,y_test,y_scale=data_pre(1)
elif(str==5):
    X_train,X_test,y_train,y_test,y_scale=data_pre(5)
elif(str==10):
    X_train,X_test,y_train,y_test,y_scale=data_pre(10)
model=Sequential()
model.add(LSTM(128,return_sequences=True,input_shape=(1,3)))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))
start = time.time()
model.compile(loss='mse',optimizer='adam')
model.fit(X_train,y_train,batch_size=72,epochs=500,validation_split=0.1,verbose=1)
print("Compliation Time : ",time.time()-start)
print('存入模型中')
if(str==1):
    model.save('model_1_multime.h5')
elif(str==5):
    model.save('model_5_multime.h5')
elif(str==10):
    model.save('model_10_multime.h5')
Example #14
0
    train_y_state = train_y_state.reshape([time_length,1])
    print("Y is ",train_y_state[0:2,0])
    print(train_x.shape,Y.shape)
    print(train_y_state.shape)


    # define X-fold cross validation
    model = Sequential()

    #パラメータ設定
    #model.add(RepeatVector(seq_inout_length,input_dim=input_num))

    #stateful=Trueではbatch_input_shapeで三次元配列を与える必要あり(batch-size込)
    model.add(LSTM(units=n_hidden,
                   return_sequences=False,
                   stateful=False,
                   batch_input_shape=(None,seq_in_length,input_num)))

    #model.add(TimeDistributed(Dense(units=output_num)))
    model.add(Activation('tanh'))
    model.add(Dense(output_num))
    model.compile(optimizer='adam',loss='mean_squared_error',metrics=['mae'])
    #model.compile(optimizer='adam',loss='mape',metrics=['acc'])
    #model.compile(optimizer='adam',loss=mix_mse_mape,metrics=['acc'])
    model.summary()

    ### make callbacks
    ### add for TensorBoard
    tb_cb = keras.callbacks.TensorBoard(log_dir=abspath_tflog,
                                        #histogram_freq=1,
                                        write_grads=True,
Example #15
0
trainnumlength = 30
df = pd.read_csv('traindata1023_30.csv', header=None)  #读入股票数据

data = df.iloc[:, 0:df.shape[1]].values  #取训练数据

# df_test=pd.read_csv('traindata_30.csv',header=None)     #读入股票数据
# data_test=df.iloc[:,0:trainnumlength+1].values  #取第1-20列 训练数据

## 网络构建
EMBEDDING_SIZE = 128
HIDDEN_LAYER_SIZE = 64
BATCH_SIZE = 32
NUM_EPOCHS = 10
model = Sequential()
model.add(Embedding(8500, EMBEDDING_SIZE, input_length=trainnumlength - 1))
model.add(LSTM(HIDDEN_LAYER_SIZE, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1))
model.add(Activation("sigmoid"))
model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])
## 网络训练
model.save('btcmodel1.h5')
del model
model = load_model('btcmodel1.h5')

# Convert labels to categorical one-hot encoding
# one_hot_labels = keras.utils.to_categorical(labels, num_classes=10)


def main():
Example #16
0
x1_test = sequence.pad_sequences(x1_test, maxlen=max_len)

x2_test = tk_train.texts_to_sequences(test.question2.values.astype(str))
x2_test = sequence.pad_sequences(x2_test, maxlen=max_len)


print("[Building the network]")
question1 = Input(shape=(max_len,))
question2 = Input(shape=(max_len,))

q1 = Embedding(word_index + 1,
                 300,
                 weights=[embedding_matrix],
                 input_length=max_len,
                 trainable=False)(question1)
q1 = Bidirectional(LSTM(128, return_sequences=True), merge_mode="sum")(q1)

q2 = Embedding(word_index + 1,
                 300,
                 weights=[embedding_matrix],
                 input_length=max_len,
                 trainable=False)(question2)
q2 = Bidirectional(LSTM(128, return_sequences=True), merge_mode="sum")(q2)

attention = dot([q1,q2], [1,1])
attention = Flatten()(attention)
attention = Dense((max_len*128))(attention)
attention = Reshape((max_len, 128))(attention)

merged = add([q1,attention])
merged = Flatten()(merged)
Example #17
0
            text_file.write(row + '\n')
        row = ''
        for k in range(0, num_features - 5):
            row += str(y_a[i, k])
            row += ','
        text_file.write(row + '\n')
        text_file.write('batch end\n')
print('Matrix file has been created...')

# build the model:
print('Build model...')
main_input = Input(shape=(maxlen, num_features), name='main_input')
# train a 2-layer LSTM with one shared layer
l1 = LSTM(par_neurons,
          consume_less='gpu',
          init='glorot_uniform',
          return_sequences=True,
          dropout_W=par_dropout)(main_input)  # the shared layer
b1 = BatchNormalization()(l1)
l2_1 = LSTM(par_neurons,
            consume_less='gpu',
            init='glorot_uniform',
            return_sequences=False,
            dropout_W=par_dropout)(
                b1)  # the layer specialized in activity prediction
b2_1 = BatchNormalization()(l2_1)
l2_2 = LSTM(par_neurons,
            consume_less='gpu',
            init='glorot_uniform',
            return_sequences=False,
            dropout_W=par_dropout)(
def main():
    start_time = time.time()

    parser = argparse.ArgumentParser(
        prog='trainLSTM_MLP.py',
        description='Train LSTM-MLP model for visual question answering')
    parser.add_argument('--mlp-hidden-units',
                        type=int,
                        default=1024,
                        metavar='<mlp-hidden-units>')
    parser.add_argument('--lstm-hidden-units',
                        type=int,
                        default=512,
                        metavar='<lstm-hidden-units>')
    parser.add_argument('--mlp-hidden-layers',
                        type=int,
                        default=3,
                        metavar='<mlp-hidden-layers>')
    parser.add_argument('--lstm-hidden-layers',
                        type=int,
                        default=1,
                        metavar='<lstm-hidden-layers>')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.5,
                        metavar='<dropout-rate>')
    parser.add_argument('--mlp-activation',
                        type=str,
                        default='tanh',
                        metavar='<activation-function>')
    parser.add_argument('--num-epochs',
                        type=int,
                        default=100,
                        metavar='<num-epochs>')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        metavar='<batch-size>')
    parser.add_argument('--learning-rate',
                        type=float,
                        default=0.001,
                        metavar='<learning-rate>')
    parser.add_argument('--dev-accuracy-path',
                        type=str,
                        required=True,
                        metavar='<accuracy-path>')
    args = parser.parse_args()

    word_vec_dim = 300
    vgg_img_dim = 4096
    inc_img_dim = 2048
    max_len = 30
    ######################
    #      Load Data     #
    ######################

    print('Loading data...')

    train_id_pairs, train_image_ids = LoadIds('train')
    dev_id_pairs, dev_image_ids = LoadIds('dev')

    train_questions = LoadQuestions('train')
    dev_questions = LoadQuestions('dev')

    train_choices = LoadChoices('train')
    dev_choices = LoadChoices('dev')

    train_answers = LoadAnswers('train')
    dev_answers = LoadAnswers('dev')

    print('Finished loading data.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    # Model Descriptions #
    ######################
    print('Generating and compiling model...')

    # VGG model (VGG features)
    vgg_model = Sequential()
    vgg_model.add(Reshape(input_shape=(vgg_img_dim, ), dims=(vgg_img_dim, )))

    # Inception model
    inception_model = Sequential()
    inception_model.add(
        Reshape(input_shape=(inc_img_dim, ), dims=(inc_img_dim, )))

    # language model (LSTM)
    language_model = Sequential()
    if args.lstm_hidden_layers == 1:
        language_model.add(
            LSTM(output_dim=args.lstm_hidden_units,
                 return_sequences=False,
                 input_shape=(max_len, word_vec_dim)))
    else:
        language_model.add(
            LSTM(output_dim=args.lstm_hidden_units,
                 return_sequences=True,
                 input_shape=(max_len, word_vec_dim)))
        for i in range(args.lstm_hidden_layers - 2):
            language_model.add(
                LSTM(output_dim=args.lstm_hidden_units, return_sequences=True))
        language_model.add(
            LSTM(output_dim=args.lstm_hidden_units, return_sequences=False))

    # feedforward model (MLP)
    model = Sequential()
    model.add(
        Merge([language_model, vgg_model, inception_model],
              mode='concat',
              concat_axis=1))
    for i in range(args.mlp_hidden_layers):
        model.add(Dense(args.mlp_hidden_units, init='uniform'))
        model.add(Activation(args.mlp_activation))
        model.add(Dropout(args.dropout))
    model.add(Dense(word_vec_dim))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    model_filename = 'models/2feats_lstm_units_%i_layers_%i_mlp_units_%i_layers_%i_%s_lr%.1e_dropout%.1f' % (
        args.lstm_hidden_units, args.lstm_hidden_layers, args.mlp_hidden_units,
        args.mlp_hidden_layers, args.mlp_activation, args.learning_rate,
        args.dropout)
    open(model_filename + '.json', 'w').write(json_string)

    # loss and optimizer
    rmsprop = RMSprop(lr=args.learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=rmsprop)
    print('Compilation finished.')
    print('Time: %f s' % (time.time() - start_time))

    ########################################
    #  Load CNN Features and Word Vectors  #
    ########################################

    # load VGG features
    print('Loading VGG features...')
    VGG_features, vgg_img_map = LoadVGGFeatures()
    print('VGG features loaded')
    print('Time: %f s' % (time.time() - start_time))

    # load Inception features
    print('Loading Inception features...')
    INC_features, inc_img_map = LoadInceptionFeatures()
    print('Inception features loaded')
    print('Time: %f s' % (time.time() - start_time))

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # training batches
    train_question_batches = [
        b for b in MakeBatches(
            train_questions, args.batch_size, fillvalue=train_questions[-1])
    ]
    train_answer_batches = [
        b for b in MakeBatches(train_answers['toks'],
                               args.batch_size,
                               fillvalue=train_answers['toks'][-1])
    ]
    train_image_batches = [
        b for b in MakeBatches(
            train_image_ids, args.batch_size, fillvalue=train_image_ids[-1])
    ]
    train_indices = list(range(len(train_question_batches)))

    # validation batches
    dev_question_batches = [
        b for b in MakeBatches(
            dev_questions, args.batch_size, fillvalue=dev_questions[-1])
    ]
    dev_answer_batches = [
        b for b in MakeBatches(dev_answers['labs'],
                               args.batch_size,
                               fillvalue=dev_answers['labs'][-1])
    ]
    dev_choice_batches = [
        b for b in MakeBatches(
            dev_choices, args.batch_size, fillvalue=dev_choices[-1])
    ]
    dev_image_batches = [
        b for b in MakeBatches(
            dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1])
    ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #      Training      #
    ######################

    acc_file = open(args.dev_accuracy_path, 'w')
    dev_accs = []
    max_acc = -1
    max_acc_epoch = -1

    # define interrupt handler
    def PrintDevAcc():
        print('Max validation accuracy epoch: %i' % max_acc_epoch)
        print(dev_accs)

    def InterruptHandler(sig, frame):
        print(str(sig))
        PrintDevAcc()
        sys.exit(-1)

    signal.signal(signal.SIGINT, InterruptHandler)
    signal.signal(signal.SIGTERM, InterruptHandler)

    # print training information
    print('-' * 80)
    print('Training Information')
    print('# of LSTM hidden units: %i' % args.lstm_hidden_units)
    print('# of LSTM hidden layers: %i' % args.lstm_hidden_layers)
    print('# of MLP hidden units: %i' % args.mlp_hidden_units)
    print('# of MLP hidden layers: %i' % args.mlp_hidden_layers)
    print('Dropout: %f' % args.dropout)
    print('MLP activation function: %s' % args.mlp_activation)
    print('# of training epochs: %i' % args.num_epochs)
    print('Batch size: %i' % args.batch_size)
    print('Learning rate: %f' % args.learning_rate)
    print('# of train questions: %i' % len(train_questions))
    print('# of dev questions: %i' % len(dev_questions))
    print('-' * 80)
    acc_file.write('-' * 80 + '\n')
    acc_file.write('Training Information\n')
    acc_file.write('# of LSTM hidden units: %i\n' % args.lstm_hidden_units)
    acc_file.write('# of LSTM hidden layers: %i\n' % args.lstm_hidden_layers)
    acc_file.write('# of MLP hidden units: %i\n' % args.mlp_hidden_units)
    acc_file.write('# of MLP hidden layers: %i\n' % args.mlp_hidden_layers)
    acc_file.write('Dropout: %f\n' % args.dropout)
    acc_file.write('MLP activation function: %s\n' % args.mlp_activation)
    acc_file.write('# of training epochs: %i\n' % args.num_epochs)
    acc_file.write('Batch size: %i\n' % args.batch_size)
    acc_file.write('Learning rate: %f\n' % args.learning_rate)
    acc_file.write('# of train questions: %i\n' % len(train_questions))
    acc_file.write('# of dev questions: %i\n' % len(dev_questions))
    acc_file.write('-' * 80 + '\n')

    # start training
    print('Training started...')
    for k in range(args.num_epochs):
        print('-' * 80)
        print('Epoch %i' % (k + 1))
        progbar = generic_utils.Progbar(len(train_indices) * args.batch_size)
        # shuffle batch indices
        random.shuffle(train_indices)
        for i in train_indices:
            X_question_batch = GetQuestionsTensor(train_question_batches[i],
                                                  word_embedding, word_map)
            X_vgg_image_batch = GetImagesMatrix(train_image_batches[i],
                                                vgg_img_map, VGG_features)
            X_inc_image_batch = GetImagesMatrix(train_image_batches[i],
                                                inc_img_map, INC_features)
            Y_answer_batch = GetAnswersMatrix(train_answer_batches[i],
                                              word_embedding, word_map)
            loss = model.train_on_batch(
                [X_question_batch, X_vgg_image_batch, X_inc_image_batch],
                Y_answer_batch)
            loss = loss[0].tolist()
            progbar.add(args.batch_size, values=[('train loss', loss)])
        print('Time: %f s' % (time.time() - start_time))

        # evaluate on dev set
        pbar = generic_utils.Progbar(
            len(dev_question_batches) * args.batch_size)

        dev_correct = 0

        # feed forward
        for i in range(len(dev_question_batches)):
            X_question_batch = GetQuestionsTensor(dev_question_batches[i],
                                                  word_embedding, word_map)
            X_vgg_image_batch = GetImagesMatrix(dev_image_batches[i],
                                                vgg_img_map, VGG_features)
            X_inc_image_batch = GetImagesMatrix(dev_image_batches[i],
                                                inc_img_map, INC_features)
            prob = model.predict_proba(
                [X_question_batch, X_vgg_image_batch, X_inc_image_batch],
                args.batch_size,
                verbose=0)

            # get word vecs of choices
            choice_feats = GetChoicesTensor(dev_choice_batches[i],
                                            word_embedding, word_map)
            similarity = np.zeros((5, args.batch_size), float)
            # calculate cosine distances
            for j in range(5):
                similarity[j] = np.diag(
                    cosine_similarity(prob, choice_feats[j]))
            # take argmax of cosine distances
            pred = np.argmax(similarity, axis=0) + 1

            if i != (len(dev_question_batches) - 1):
                dev_correct += np.count_nonzero(dev_answer_batches[i] == pred)
            else:
                num_padding = args.batch_size * len(
                    dev_question_batches) - len(dev_questions)
                last_idx = args.batch_size - num_padding
                dev_correct += np.count_nonzero(
                    dev_answer_batches[:last_idx] == pred[:last_idx])
            pbar.add(args.batch_size)

        dev_acc = float(dev_correct) / len(dev_questions)
        dev_accs.append(dev_acc)
        print('Validation Accuracy: %f' % dev_acc)
        print('Time: %f s' % (time.time() - start_time))

        if dev_acc > max_acc:
            max_acc = dev_acc
            max_acc_epoch = k
            model.save_weights(model_filename + '_best.hdf5', overwrite=True)

    model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k + 1))
    print(dev_accs)
    for acc in dev_accs:
        acc_file.write('%f\n' % acc)
    print('Best validation accuracy: %f; epoch#%i' % (max_acc,
                                                      (max_acc_epoch + 1)))
    acc_file.write('Best validation accuracy: %f; epoch#%i\n' %
                   (max_acc, (max_acc_epoch + 1)))
    print('Training finished.')
    acc_file.write('Training finished.\n')
    print('Time: %f s' % (time.time() - start_time))
    acc_file.write('Time: %f s\n' % (time.time() - start_time))
    acc_file.close()
Example #19
0
ACTIVATION = 'tanh'
LOSS = 'mean_squared_error'
OPTIMIZER = 'adam'

# Load the data.

accuracies = []
for speaker in SPEAKERS:
    features_test, labels_test, vocab = get_features_test(speakers=[speaker],
                                                          is_single=True,
                                                          is_demo=False)
    features_train, labels_train, vocab = get_features_train(
        speakers=[speaker], is_single=True)
    model = Sequential()
    model.add(
        LSTM(128, input_shape=features_train[0].shape, return_sequences=True))
    model.add(LSTM(128))
    model.add(Dense(len(vocab), activation=ACTIVATION))
    model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=['accuracy'])
    print(f"Current Speaker Training: {speaker}")
    print(model.summary())

    history = model.fit(
        features_train,
        labels_train,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        verbose=True,
        validation_split=0.2
    )  # 0.18 znaci da je ukupan dataset splitovan u ondnosu 70/15/15
Example #20
0
def model_lstm(len_seq=30,
               im_size=(40, 40),
               fc_size=128,
               save_weight='untrained_weight.h5',
               save_topo='untrained_topo.json',
               save_result=True,
               lr=0.001,
               momentum=0.6,
               decay=0.0005,
               nesterov=True,
               rho=0.9,
               epsilon=1e-6,
               opt='sgd',
               load_cache=False,
               cnn=False,
               dict_size=53,
               filter_len=5):

    try:
        if load_cache:
            return read_lstm(weights_filename=save_weight,
                             topo_filename=save_topo)
    except:
        pass

    start_time = time.time()

    #Starting LSTM Model here

    model = Sequential()

    model.add(Dense(fc_size, input_shape=(len_seq, im_size[0] * im_size[1])))

    # Masking layer

    model.add(Masking(mask_value=0.0))

    #First LSTM layer

    model.add(LSTM(fc_size, return_sequences=True))

    # Second LSTM layer

    model.add(LSTM(fc_size, return_sequences=False))

    # Final Dense layer

    model.add(Dense(dict_size))

    #softmax layer
    model.add(Activation('softmax'))

    #Build and pass optimizer

    if opt == 'sgd':
        optimizer = SGD(lr=lr,
                        momentum=momentum,
                        decay=decay,
                        nesterov=nesterov)

    model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    end_time = time.time()

    print(" Total time for compilation %d" % (end_time - start_time))

    if save_result:
        save_lstm(model, save_weight, save_topo)

    return model
Example #21
0
            for k, comp in enumerate(tag_index):
                if comp == col:
                    tag_row.append(k)
        tag.append(tag_row)
        #print(str(i) + "-th row:" + str(tag[i]))
    tag = MultiLabelBinarizer().fit_transform(tag)
    #print(tag.shape)

    ########## Layers #########

    print('Start Building Model...')

    model = Sequential()
    model.add(Embedding(len(text_index) + 1, 1024, input_length=306))

    model.add(LSTM(128, activation='relu', dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(38, activation='sigmoid'))
    '''
    embedding_layer = Embedding(len(text_index), 64, input_length = 306, trainable = False)
    '''
    ########## Compilation ###########
    adamax = Adamax(lr=0.002,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-08,
                    decay=0.0)
    model.compile(loss='binary_crossentropy',
                  metrics=[fmeasure, precision, recall],
Example #22
0
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
import lstm, time

# Step 1: load data
X_train, y_train, X_test, y_test = lstm.load_data('close.csv', 50, True)

# Step 2: build model
model = Sequential()

model.add(LSTM(input_dim=1, output_dim=50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(output_dim=1))
model.add(Activation('linear'))

start = time.time()
model.compile(loss='mse', optimizer='rmsprop')
print('compilation time : %f\n' % (time.time() - start))

# Step 3: train the model
model.fit(X_train, y_train, batch_size=128, nb_epoch=1, validation_split=0.05)
#model.fit(X_train, y_train, batch_size=512, nb_epoch=1, validation_split=0.05)

# Step 4: plot the predictions
predictions = lstm.predict_sequences_multiple(model, X_test, 50, 50)
lstm.plot_results_multiple(predictions, y_test, 50)
def main():
    input_text = ['1 2 3 4 5'
                  , '6 7 8 9 10'
                  , '11 12 13 14 15'
                  , '16 17 18 19 20'
                  , '21 22 23 24 25']
    tar_text = ['one two three four five'
                , 'six seven eight nine ten'
                , 'eleven twelve thirteen fourteen fifteen'
                , 'sixteen seventeen eighteen nineteen twenty'
                , 'twenty_one twenty_two twenty_three twenty_four twenty_five']

    input_list = []
    tar_list = []

    for tmp_input in input_text:
        input_list.append(tokenize(tmp_input))
    for tmp_tar in tar_text:
        tar_list.append(tokenize(tmp_tar))

    vocab = sorted(reduce(lambda x, y: x | y, (set(tmp_list) for tmp_list in input_list + tar_list)))
    # Reserve 0 for masking via pad_sequences
    vocab_size = len(vocab) + 1  # keras进行embedding的时候必须进行len(vocab)+1
    input_maxlen = max(map(len, (x for x in input_list)))
    tar_maxlen = max(map(len, (x for x in tar_list)))
    output_dim = vocab_size
    hidden_dim = 20

    print('-')
    print('Vocab size:', vocab_size, 'unique words')
    print('Input max length:', input_maxlen, 'words')
    print('Target max length:', tar_maxlen, 'words')
    print('Dimension of hidden vectors:', hidden_dim)
    print('Number of training stories:', len(input_list))
    print('Number of test stories:', len(input_list))
    print('-')
    print('Vectorizing the word sequences...')
    word_to_idx = dict((c, i + 1) for i, c in enumerate(vocab))  # 编码时需要将字符映射成数字index
    idx_to_word = dict((i + 1, c) for i, c in enumerate(vocab))  # 解码时需要将数字index映射成字符
    inputs_train, tars_train = vectorize_stories(input_list, tar_list, word_to_idx, input_maxlen, tar_maxlen, vocab_size)

    decoder_mode = 1  # 0 最简单模式,1 [1]向后模式,2 [2] Peek模式,3 [3]Attention模式
    if decoder_mode == 3:
        encoder_top_layer = LSTM(hidden_dim, return_sequences=True)
    else:
        encoder_top_layer = LSTM(hidden_dim)

    if decoder_mode == 0:
        decoder_top_layer = LSTM(hidden_dim, return_sequences=True)
        decoder_top_layer.get_weights()
    elif decoder_mode == 1:
        decoder_top_layer = LSTMDecoder(hidden_dim=hidden_dim, output_dim=hidden_dim
                                        , output_length=tar_maxlen, state_input=False, return_sequences=True)
    elif decoder_mode == 2:
        decoder_top_layer = LSTMDecoder2(hidden_dim=hidden_dim, output_dim=hidden_dim
                                         , output_length=tar_maxlen, state_input=False, return_sequences=True)
    elif decoder_mode == 3:
        decoder_top_layer = AttentionDecoder(hidden_dim=hidden_dim, output_dim=hidden_dim
                                             , output_length=tar_maxlen, state_input=False, return_sequences=True)

    en_de_model = Sequential()
    en_de_model.add(Embedding(input_dim=vocab_size,
                              output_dim=hidden_dim,
                              input_length=input_maxlen))
    en_de_model.add(encoder_top_layer)
    if decoder_mode == 0:
        en_de_model.add(RepeatVector(tar_maxlen))
    en_de_model.add(decoder_top_layer)

    en_de_model.add(TimeDistributedDense(output_dim))
    en_de_model.add(Activation('softmax'))
    print('Compiling...')
    time_start = time.time()
    en_de_model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    time_end = time.time()
    print('Compiled, cost time:%fsecond!' % (time_end - time_start))
    for iter_num in range(5000):
        en_de_model.fit(inputs_train, tars_train, batch_size=3, nb_epoch=1, show_accuracy=True)
        out_predicts = en_de_model.predict(inputs_train)
        for i_idx, out_predict in enumerate(out_predicts):
            predict_sequence = []
            for predict_vector in out_predict:
                next_index = np.argmax(predict_vector)
                next_token = idx_to_word[next_index]
                predict_sequence.append(next_token)
            print('Target output:', tar_text[i_idx])
            print('Predict output:', predict_sequence)

        print('Current iter_num is:%d' % iter_num)
if __name__ == "__main__":
    #This neural network is the the Q-function, run it like this:
    #model.predict(state.reshape(1,64), batch_size=1)
    batch_size = 7
    num_features = 7
    epochs = 10
    gamma = 0.95  # since the reward can be several time steps away, make gamma high
    epsilon = 1
    batchSize = 100
    buffer = 200
    replay = []
    learning_progress = []

    model = Sequential()
    model.add(LSTM(64,
                   input_shape=(1, num_features),
                   return_sequences=True,
                   stateful=False))
    model.add(Dropout(0.5))

    model.add(LSTM(64,
                   input_shape=(1, num_features),
                   return_sequences=False,
                   stateful=False))
    model.add(Dropout(0.5))

    model.add(Dense(7, init='lecun_uniform'))
    model.add(Activation('linear')) #linear output so we can have range of real-valued outputs

    rms = RMSprop()
    adam = Adam()
    model.compile(loss='mse', optimizer=adam)
Example #25
0
    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim + self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim + self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001 * np.ones((1,)).astype(floatX)))
        print(self.M)
        self.init_h = K.zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots,))
        self.init_ww = self.rnn.init((self.n_slots,))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = K.zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = K.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length,))
        self.W_c_read = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_read = K.zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = K.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length,))
        self.W_c_write = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = K.zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = K.zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.output_dim))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]
Example #26
0
# print (X_train.shape)
# print (Y_train.shape)
# print (X_test.shape)
# print (Y_test.shape)

# # X_train = np.random.randn(10,20,3)
# # Y_train = np.random.randn(10,20)
# print (X_train.shape)
# print ("++++++++")
# print (Y_train.shape)


learning_rate = 0.001
model = Sequential()
model.add(LSTM(
    input_dim=5,
    output_dim=512,
    return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(
    512,
    return_sequences=False))
model.add(Dropout(0.5))

model.add(Dense(
    output_dim=1))
model.add(Activation('linear'))

start = time.time()
model.compile(loss="mean_squared_error", optimizer=Adam(learning_rate))
print ('compilation time : ', time.time() - start)
    def _build_network(self, vocab_size, maxlen, emb_weights=[], c_emb_weights=[], hidden_units=256, trainable=True,
                       batch_size=1):

        print('Building model...')

        context_input = Input(name='context', batch_shape=(batch_size, maxlen))

        if (len(c_emb_weights) == 0):
            c_emb = Embedding(vocab_size, 256, input_length=maxlen, embeddings_initializer='glorot_normal',
                              trainable=trainable)(context_input)
        else:
            c_emb = Embedding(vocab_size, c_emb_weights.shape[1], input_length=maxlen, weights=[c_emb_weights],
                              trainable=trainable)(context_input)

        c_cnn1 = Convolution1D(int(hidden_units / 2), 5, kernel_initializer='he_normal', bias_initializer='he_normal',
                               activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen))(c_emb)
        c_cnn2 = Convolution1D(hidden_units, 5, kernel_initializer='he_normal', bias_initializer='he_normal',
                               activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen - 2))(
            c_cnn1)

        c_lstm1 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='orthogonal',
                       bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25, recurrent_dropout=.0, unit_forget_bias=False, return_sequences=False)(c_cnn2)

        c_lstm2 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='orthogonal',
                       bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25, recurrent_dropout=.0, unit_forget_bias=False, return_sequences=False,
                       go_backwards=True)(c_cnn2)

        c_merged = add([c_lstm1, c_lstm2])
        c_merged = Dropout(0.25)(c_merged)

        text_input = Input(name='text', batch_shape=(batch_size, maxlen))

        if (len(emb_weights) == 0):
            emb = Embedding(vocab_size, 256, input_length=maxlen, embeddings_initializer='glorot_normal',
                            trainable=trainable)(text_input)
        else:
            emb = Embedding(vocab_size, c_emb_weights.shape[1], input_length=maxlen, weights=[emb_weights],
                            trainable=trainable)(text_input)

        t_cnn1 = Convolution1D(int(hidden_units / 2), 5, kernel_initializer='he_normal', bias_initializer='he_normal',
                               activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen))(emb)
        t_cnn2 = Convolution1D(hidden_units, 5, kernel_initializer='he_normal', bias_initializer='he_normal',
                               activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen - 2))(
            t_cnn1)

        t_lstm1 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='he_normal',
                       bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25, recurrent_dropout=0.25, unit_forget_bias=False, return_sequences=False)(t_cnn2)

        t_lstm2 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='he_normal',
                       bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25, recurrent_dropout=0.25, unit_forget_bias=False, return_sequences=False,
                       go_backwards=True)(t_cnn2)

        t_merged = add([t_lstm1, t_lstm2])
        t_merged = Dropout(0.25)(t_merged)

        awc_input = Input(name='awc', batch_shape=(batch_size, 11))

        t_merged = Reshape((-1, 1))(t_merged)

        t_merged = multiply([t_merged, awc_input])

        t_merged = Flatten()(t_merged)

        merged = concatenate([c_merged, t_merged], axis=1)

        dnn_1 = Dense(hidden_units, kernel_initializer="he_normal", activation='sigmoid')(merged)
        dnn_1 = Dropout(0.25)(dnn_1)
        dnn_2 = Dense(2, activation='sigmoid')(dnn_1)

        softmax = Activation('softmax')(dnn_2)

        model = Model(inputs=[context_input, text_input, awc_input], outputs=softmax)

        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        print('No of parameter:', model.count_params())

        print(model.summary())
        return model
Example #28
0
class NeuralTuringMachine(Recurrent):
    def __init__(self, output_dim, memory_size, shift_range=3,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = memory_size[1]
        self.m_length = memory_size[0]
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self, input_shape):
        input_leng, input_dim = input_shape[1:]
       # self.input = T.tensor3()

        self.lstm = LSTM(
            input_dim=input_dim + self.m_length,
            input_length=input_leng,
            output_dim=self.output_dim, init=self.init,
            forget_bias_init='zero',
            inner_init=self.inner_init)

        self.lstm.build(input_shape)

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001 * np.ones((1,)).astype(floatX)))
        self.init_h = backend.zeros((self.output_dim))
        self.init_wr = self.lstm.init((self.n_slots,))
        self.init_ww = self.lstm.init((self.n_slots,))

        # write
        self.W_e = self.lstm.init((self.output_dim, self.m_length))  # erase
        self.b_e = backend.zeros((self.m_length))
        self.W_a = self.lstm.init((self.output_dim, self.m_length))  # add
        self.b_a = backend.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.lstm.init((self.output_dim, self.m_length))
        self.b_k_read = self.lstm.init((self.m_length,))
        self.W_c_read = self.lstm.init((self.output_dim, 3))
        self.b_c_read = backend.zeros((3))
        self.W_s_read = self.lstm.init((self.output_dim, self.shift_range))
        self.b_s_read = backend.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.lstm.init((self.output_dim, self.m_length))
        self.b_k_write = self.lstm.init((self.m_length,))
        self.W_c_write = self.lstm.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = backend.zeros((3))
        self.W_s_write = self.lstm.init((self.output_dim, self.shift_range))
        self.b_s_write = backend.zeros((self.shift_range))

        self.C = circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.lstm.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        self.init_c = backend.zeros((self.output_dim))
        self.trainable_weights = self.trainable_weights + [self.init_c, ]

    def read(self, w, M):
        return (w[:, :, None] * M).sum(axis=1)

    def write(self, w, e, a, M):
        Mtilda = M * (1 - w[:, :, None] * e[:, None, :])
        Mout = Mtilda + w[:, :, None] * a[:, None, :]
        return Mout

    def get_content_w(self, beta, k, M):
        num = beta[:, None] * cosine_similarity(M, k)
        return soft_max(num)

    def get_location_w(self, g, s, C, gamma, wc, w_tm1):
        wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = re_norm(wtilda ** gamma[:, None])
        return wout

    def get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-4
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1.0001
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def get_output_shape_for(self, input_shape):
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def call(self, x, mask = None):
        M_tm1, wr_tm1, ww_tm1 = mask[:3]
        # reshape
        M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length))
        # read
        h_tm1 = mask[3:]
        k_read, beta_read, g_read, gamma_read, s_read = self.get_controller_output(
            h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self.get_content_w(beta_read, k_read, M_tm1)
        wr_t = self.get_location_w(g_read, s_read, self.C, gamma_read,
                                   wc_read, wr_tm1)
        M_read = self.read(wr_t, M_tm1)

        # update controller
        h_t = update_controller(self, x, h_tm1, M_read)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self.get_controller_output(
            h_t[0], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self.get_content_w(beta_write, k_write, M_tm1)
        ww_t = self.get_location_w(g_write, s_write, self.C, gamma_write,
                                   wc_write, ww_tm1)
        e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a)
        M_t = self.write(ww_t, e, a, M_tm1)

        M_t = M_t.flatten(ndim=2)

        return h_t[0], [M_t, wr_t, ww_t] + h_t
    # Keras Model
    model = Sequential()
    # Embedding layer (lookup table of trainable word vectors)
    model.add(Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False))
    model.add(Dropout(0.25))
    model.add(Convolution1D(nb_filter=nb_filter,
                            filter_length=kernel_size,
                            border_mode='valid',
                            activation='relu',
                            subsample_length=1
                            ))
    model.add(MaxPooling1D(pool_length=2))

    # lstm layer:
    model.add(LSTM(hidden_dim))

    # We project onto a single unit output layer, and squash it with a sigmoid:
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='sgd')
    model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch)

    y_pred = model.predict(X_test, batch_size=batch_size).flatten()
    for i in range(len(y_pred)):
        if y_pred[i] >= 0.5:
            y_pred[i] = 1
        else:
            y_pred[i] = 0
Example #30
0
class NeuralTuringMachine(Recurrent):
    def __init__(self, output_dim, memory_size, shift_range=3,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = memory_size[1]
        self.m_length = memory_size[0]
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init
        self.input_dim = input_dim
        self.input_length = input_length
        self.u = None
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self, input_shape):
        self.u = input_shape
        input_leng, input_dim = input_shape[1:]
        # self.input = T.tensor3()

        self.rnn = LSTM(
            input_dim=input_dim + self.m_length,
            input_length=input_leng,
            output_dim=self.output_dim, init=self.init,
            forget_bias_init='zero',
            inner_init=self.inner_init)
        self.rnn.build(input_shape)
        self.M = theano.shared((.001 * np.ones((1,)).astype(floatX)))
        self.init_h = K.zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots,))
        self.init_ww = self.rnn.init((self.n_slots,))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = K.zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = K.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length,))
        self.W_c_read = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_read = K.zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = K.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length,))
        self.W_c_write = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = K.zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = K.zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        self.init_c = K.zeros((self.output_dim))
        self.trainable_weights = self.trainable_weights + [self.init_c, ]

    def _read(self, w, M):
        return (w[:, :, None] * M).sum(axis=1)

    def _write(self, w, e, a, M):
        Mtilda = M * (1 - w[:, :, None] * e[:, None, :])
        Mout = Mtilda + w[:, :, None] * a[:, None, :]
        return Mout

    def _get_content_w(self, beta, k, M):
        num = beta[:, None] * _cosine_distance(M, k)
        return _softmax(num)

    def _get_location_w(self, g, s, C, gamma, wc, w_tm1):
        wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = _renorm(wtilda ** gamma[:, None])
        return wout

    def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-4
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1.0001
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def get_initial_states(self, X):
        batch_size = X.shape[0]
        init_M = self.M.dimshuffle(0, 'x', 'x').repeat(
            batch_size, axis=0).repeat(self.n_slots, axis=1).repeat(
            self.m_length, axis=2)
        init_M = init_M.flatten(ndim=2)

        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww),
                init_h, init_c]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def call(self, x, mask=None):

        input_shape = self.u
        print(input_shape)
        if K._BACKEND == 'tensorflow':
            if not input_shape[1]:
                raise Exception('When using TensorFlow, you should define '
                                'explicitly the number of timesteps of '
                                'your sequences.\n'
                                'If your first layer is an Embedding, '
                                'make sure to pass it an "input_length" '
                                'argument. Otherwise, make sure '
                                'the first layer has '
                                'an "input_shape" or "batch_input_shape" '
                                'argument, including the time axis. '
                                'Found input shape at layer ' + self.name +
                                ': ' + str(input_shape))
        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(x)
        constants = self.get_constants(x)
        preprocessed_input = self.preprocess_input(x)

        last_output, outputs, states = K.rnn(self.step, preprocessed_input,
                                             initial_states,
                                             go_backwards=self.go_backwards,
                                             mask=mask,
                                             constants=constants,
                                             unroll=self.unroll,
                                             input_length=input_shape[1])
        if self.stateful:
            self.updates = []
            for i in range(len(states)):
                self.updates.append((self.states[i], states[i]))

        if self.return_sequences:
            return outputs
        else:
            return last_output

    def step(self, x, states):
        M_tm1, wr_tm1, ww_tm1 = states[:3]
        # reshape
        M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length))
        # read
        h_tm1 = states[3:]
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[0], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1)
        e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1)

        M_t = M_t.flatten(ndim=2)

        return h_t[0], [M_t, wr_t, ww_t] + h_t
Example #31
0
def get_lstm_layers(input, nhidden,  drop_rate=0.3, nlayers=1, droph=False,
		activation='tanh', inner_activation='relu', bidirectional=False,
		stateful=False, return_sequences=True, init='orthogonal', 
		bottleneck=None, prefix=""):

        logger.debug("get_lstm_layers")
	logger.debug("prefix %s" % prefix)
	xcurr = input

	if gethostname() == "schaffner.inf.ed.ac.uk":
		consume_less = "gpu"
	else:
		consume_less = "cpu"
	logger.debug(consume_less)

	if bottleneck != None:
		bottlelayer, bottlesize = bottleneck
		logger.info("****Bottleneck %d %d" % (bottlelayer, bottlesize))
	else:
		bottlelayer, bottlesize = (None, None)


       	for i in range(nlayers):
		if bottleneck != None:
			if i == bottlelayer:
				logger.info("bottleneck: %d %d %d" % (i, bottlelayer, bottlesize))
				xcurr = TimeDistributed(Dense(bottlesize, activation=activation))(xcurr)

		logger.info("layer: %d" % i)
		xl = LSTM(nhidden, activation=activation, inner_activation=inner_activation, inner_init=init, 
				#dropout_W=drop_rate, 
				#dropout_U=drop_rate, 
				#W_regularizer=l2(0.01), U_regularizer=l2(0.01), b_regularizer=l2(0.01), 
				consume_less=consume_less,
				return_sequences=return_sequences, stateful=stateful)
		xl.name = prefix + xl.name

		xf = xl(xcurr)
		if droph: 
			dl = Dropout(drop_rate)
			dl.name = prefix +  dl.name
			xf = dl(xf)
	
		if bidirectional:
			print "Bidirectional" 

			xbl = LSTM(nhidden, activation=activation, go_backwards=True, inner_activation=inner_activation, inner_init=init, 
				#	dropout_W=drop_rate, 
			#		dropout_U=drop_rate, 
					#W_regularizer=l2(0.01), U_regularizer=l2(0.01), b_regularizer=l2(0.01), 
					consume_less=consume_less,
					return_sequences=return_sequences, stateful=stateful)
				
			xbl.name = prefix + xbl.name
			xb = xbl(xcurr)

			if droph: 
				dlb = Dropout(drop_rate)
				dlb.name = prefix +  dlb.name
				xb = dlb(xb)

			xcurr = merge([xf, xb], mode='concat') 	
			#print type(xcurr)
			#print xcurr
			#xcurr.name = prefix + xcurr.name	
		else:
			xcurr = xf
        return xcurr 
    def lrcn(self):
        """Build a CNN into RNN.
        Starting version from:
            https://github.com/udacity/self-driving-car/blob/master/
                steering-models/community-models/chauffeur/models.py

        Heavily influenced by VGG-16:
            https://arxiv.org/abs/1409.1556

        Also known as an LRCN:
            https://arxiv.org/pdf/1411.4389.pdf
        """
        model = Sequential()

        model.add(
            TimeDistributed(Conv2D(32, (7, 7),
                                   strides=(2, 2),
                                   activation='relu',
                                   padding='same'),
                            input_shape=self.input_shape))
        model.add(
            TimeDistributed(
                Conv2D(32, (3, 3),
                       kernel_initializer="he_normal",
                       activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))

        model.add(
            TimeDistributed(
                Conv2D(64, (3, 3), padding='same', activation='relu')))
        model.add(
            TimeDistributed(
                Conv2D(64, (3, 3), padding='same', activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))

        model.add(
            TimeDistributed(
                Conv2D(128, (3, 3), padding='same', activation='relu')))
        model.add(
            TimeDistributed(
                Conv2D(128, (3, 3), padding='same', activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))

        model.add(
            TimeDistributed(
                Conv2D(256, (3, 3), padding='same', activation='relu')))
        model.add(
            TimeDistributed(
                Conv2D(256, (3, 3), padding='same', activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))

        model.add(
            TimeDistributed(
                Conv2D(512, (3, 3), padding='same', activation='relu')))
        model.add(
            TimeDistributed(
                Conv2D(512, (3, 3), padding='same', activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))

        model.add(TimeDistributed(Flatten()))

        model.add(Dropout(0.5))
        model.add(LSTM(256, return_sequences=False, dropout=0.5))
        model.add(Dense(self.nb_classes, activation='softmax'))

        return model
Example #33
0
                                                      random_state=43)

batch_size = 32
epochs = 1
hash_bits = 128


def custom_activation(x):
    return (K.sigmoid(x) * 10)


visible = Input(shape=(X.shape[1], X.shape[2]))
blstm_1 = Bidirectional(
    LSTM(1024,
         dropout=0.1,
         recurrent_dropout=0.5,
         input_shape=(X.shape[1], X.shape[2]),
         return_sequences=True))(visible)
blstm_2 = Bidirectional(
    LSTM(1024,
         dropout=0.1,
         recurrent_dropout=0.5,
         input_shape=(X.shape[1], X.shape[2]),
         return_sequences=False))(blstm_1)
Dense_2 = Dense(hash_bits, activation=custom_activation)(blstm_2)
batchNorm = BatchNormalization()(Dense_2)
enver = Dense(128, activation='sigmoid')(batchNorm)
batchNorm2 = BatchNormalization()(enver)
Dense_3 = Dense(4, activation='sigmoid')(batchNorm2)
model = Model(input=visible, output=Dense_3)
print(model.summary())
Example #34
0
print('Loading data...')
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features,
                                                      test_split=0.2)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

print("Pad sequences (samples x time)")
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(LSTM(128))  # try using a GRU instead, for fun
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              class_mode="binary")

print("Train...")
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=3,
          validation_data=(X_test, y_test), show_accuracy=True)
score, acc = model.evaluate(X_test, y_test,
                            batch_size=batch_size,
                            show_accuracy=True)
Example #35
0
File: stack.py Project: dytmas/seya
class Stack(Recurrent):
    """ Stack and queue network
    
    
    output_dim = output dimension
    n_slots = number of memory slot
    m_length = dimention of the memory
    rnn_size = output length of the memory controler
    inner_rnn = "lstm" only lstm is supported 
    stack = True to create neural stack or False to create neural queue
    
    
    from Learning to Transduce with Unbounded Memory
    [[http://arxiv.org/pdf/1506.02516.pdf]]
    """
    def __init__(self, output_dim, n_slots, m_length,
                 inner_rnn='lstm',rnn_size=64, stack=True,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots + 1  # because we start at time 1
        self.m_length = m_length
        self.init = init
        self.inner_init = inner_init
        if inner_rnn != "lstm":
            print "Only lstm is supported"
            raise
        self.inner_rnn = inner_rnn
        self.rnn_size = rnn_size
        self.stack = stack

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(Stack, self).__init__(**kwargs)

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        input_leng, input_dim = input_shape[1:]

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name))
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.rnn_size, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name))
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        inner_shape = list(input_shape)
        inner_shape[-1] = input_dim+self.m_length
        self.rnn.build(inner_shape)


        self.init_h = K.zeros((self.rnn_size), name="{}_init_h".format(self.name))

        self.W_d = self.rnn.init((self.rnn_size,1), name="{}_W_d".format(self.name))
        self.W_u = self.rnn.init((self.rnn_size,1), name="{}_W_u".format(self.name))

        self.W_v = self.rnn.init((self.rnn_size,self.m_length), name="{}_W_v".format(self.name))
        self.W_o = self.rnn.init((self.rnn_size,self.output_dim), name="{}_W_o".format(self.name))

        self.b_d = K.zeros((1,), name="{}_b_d".format(self.name))
        self.b_u = K.zeros((1,), name="{}_b_u".format(self.name))
        self.b_v = K.zeros((self.m_length,), name="{}_b_v".format(self.name))
        self.b_o = K.zeros((self.output_dim,), name="{}_b_o".format(self.name))

        
        self.trainable_weights = self.rnn.trainable_weights + [
           self.W_d, self.b_d,
           self.W_v, self.b_v,
           self.W_u,  self.b_u,
           self.W_o, self.b_o, self.init_h]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size), name="{}_init_c".format(self.name))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]
			
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weight

    def get_initial_states(self, X):

        batch_size = X.shape[0]
        
        init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0)
        init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

        itime = K.zeros((1,),dtype=np.int32)
        
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_r , init_V,init_S,itime,init_h,init_c]
      
    def get_output_shape_for(self, input_shape):
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def step(self, x, states):
        r_tm1, V_tm1,s_tm1,time = states[:4]
        h_tm1 = states[4:]

        op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1),
                                             h_tm1)
              
        d_t = K.sigmoid( K.dot(op_t, self.W_d)  + self.b_d)  
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) 
        
        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                             u_t[::,0], v_t,time[0],stack=self.stack)

        return o_t, [r_t, V_t, s_t, time] + h_t

    def get_config(self):
        config = {'output_dim': self.output_dim,
		          'n_slots': self.n_slots,
                  'm_length': self.m_length,
                  'init': self.init,
                  'inner_init': self.inner_init,
                  'inner_rnn ': self.inner_rnn,
                  'rnn_size': self.rnn_size,
                  'stack': self.stack}
        base_config = super(Stack, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
Example #36
0
File: ntm.py Project: dytmas/seya
    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        input_leng, input_dim = input_shape[1:]

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name))
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name))
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        inner_shape = list(input_shape)
        inner_shape[-1] = input_dim+self.m_length
        self.rnn.build(inner_shape)
		
        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001*np.ones((1,)).astype(floatX)), name="{}_M".format(self.name))
        self.init_h = K.zeros((self.output_dim), name="{}_init_h".format(self.name))
        self.init_wr = self.rnn.init((self.n_slots,), name="{}_init_wr".format(self.name))
        self.init_ww = self.rnn.init((self.n_slots,), name="{}_init_ww".format(self.name))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length), name="{}_W_e".format(self.name))  # erase
        self.b_e = K.zeros((self.m_length), name="{}_b_e".format(self.name))
        self.W_a = self.rnn.init((self.output_dim, self.m_length), name="{}_W_a".format(self.name))  # add
        self.b_a = K.zeros((self.m_length), name="{}_b_a".format(self.name))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length), name="{}_W_k_read".format(self.name))
        self.b_k_read = self.rnn.init((self.m_length, ), name="{}_b_k_read".format(self.name))
        self.W_c_read = self.rnn.init((self.output_dim, 3), name="{}_W_c_read".format(self.name))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_read = K.zeros((3), name="{}_b_c_read".format(self.name))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range), name="{}_W_s_read".format(self.name))
        self.b_s_read = K.zeros((self.shift_range), name="{}_b_s_read".format(self.name))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length), name="{}_W_k_write".format(self.name))
        self.b_k_write = self.rnn.init((self.m_length, ), name="{}_b_k_write".format(self.name))
        self.W_c_write = self.rnn.init((self.output_dim, 3), name="{}_W_c_write".format(self.name))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = K.zeros((3), name="{}_b_c_write".format(self.name))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range), name="{}_W_s_write".format(self.name))
        self.b_s_write = K.zeros((self.shift_range), name="{}_b_s_write".format(self.name))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.output_dim), name="{}_init_c".format(self.name))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weight
Example #37
0
    def __init__(self):
        self.word2em = load_glove()
        print("Length of word2em :: ", len(self.word2em))
        #print("start word :: \n ", self.word2em['start'])

        #self.target_word2idx = np.load(
        #    '../chatbot_train/models/' + DATA_SET_NAME + '/word-glove-target-word2idx.npy').item()
        #self.target_idx2word = np.load(
        #    '../chatbot_train/models/' + DATA_SET_NAME + '/word-glove-target-idx2word.npy').item()
        #context = np.load('../chatbot_train/models/' + DATA_SET_NAME + '/word-glove-context.npy').item()

        self.input_texts, self.target_texts, self.target_counter = read_input()

        for idx, (input_words, target_words) in enumerate(
                zip(self.input_texts, self.target_texts)):
            if idx > 10:
                break
                print([input_words, target_words])

        self.target_word2idx, self.target_idx2word, self.context, input_texts_word2em = get_target(
            self)

        self.max_encoder_seq_length = self.context['encoder_max_seq_length']
        self.max_decoder_seq_length = self.context['decoder_max_seq_length']
        self.num_decoder_tokens = self.context['num_decoder_tokens']

        print(self.context)

        encoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE),
                               name='encoder_inputs')
        encoder_lstm = LSTM(units=HIDDEN_UNITS,
                            return_state=True,
                            name="encoder_lstm")
        encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(
            encoder_inputs)
        encoder_states = [encoder_state_h, encoder_state_c]

        decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE),
                               name='decoder_inputs')
        decoder_lstm = LSTM(units=HIDDEN_UNITS,
                            return_sequences=True,
                            return_state=True,
                            name='decoder_lstm')
        decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                             initial_state=encoder_states)
        decoder_dense = Dense(self.num_decoder_tokens,
                              activation='softmax',
                              name='decoder_dense')
        decoder_outputs = decoder_dense(decoder_outputs)

        self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

        #plot_model(self.model, to_file='RNN_model.png', show_shapes=True)

        #self.model.load_weights('../chatbot_train/models/' + DATA_SET_NAME + '/word-glove-weights.h5')
        self.model.compile(optimizer='rmsprop',
                           loss='categorical_crossentropy')

        Xtrain, Xtest, Ytrain, Ytest = train_test_split(input_texts_word2em,
                                                        self.target_texts,
                                                        test_size=0.2,
                                                        random_state=42)

        print("Length of train data:: ", len(Xtrain))
        print("Length of test data:: ", len(Xtest))

        train_gen = generate_batch(Xtrain, Ytrain, self)
        test_gen = generate_batch(Xtest, Ytest, self)

        train_num_batches = len(Xtrain) // BATCH_SIZE
        test_num_batches = len(Xtest) // BATCH_SIZE

        #checkpoint = ModelCheckpoint(filepath=WEIGHT_FILE_PATH, save_best_only=True)
        self.model.fit_generator(
            generator=train_gen,
            steps_per_epoch=train_num_batches,
            epochs=NUM_EPOCHS,
            verbose=1,
            validation_data=test_gen,
            validation_steps=test_num_batches)  #, callbacks=[checkpoint])

        self.model.save_weights(WEIGHT_FILE_PATH)

        self.encoder_model = Model(encoder_inputs, encoder_states)

        decoder_state_inputs = [
            Input(shape=(HIDDEN_UNITS, )),
            Input(shape=(HIDDEN_UNITS, ))
        ]
        decoder_outputs, state_h, state_c = decoder_lstm(
            decoder_inputs, initial_state=decoder_state_inputs)
        decoder_states = [state_h, state_c]
        decoder_outputs = decoder_dense(decoder_outputs)
        self.decoder_model = Model([decoder_inputs] + decoder_state_inputs,
                                   [decoder_outputs] + decoder_states)
Example #38
0
# 訓練用のデータと、テスト用のデータに分ける
N_train = int(len(df) * 0.8)
N_test = len(df) - N_train
X_train, X_test, y_train, y_test = \
    train_test_split(X, Y, test_size=N_test, shuffle = False) 

# 隠れ層の数などを定義: 隠れ層の数が大きいほど精度が上がる?
n_in = 1 # len(X[0][0])
n_out = 1 # len(Y[0])
n_hidden = 300

#モデル作成 (Kerasのフレームワークで簡易に記載できる)
model = Sequential()
model.add(LSTM(n_hidden,
               batch_input_shape=(None, maxlen, n_in),
               kernel_initializer='random_uniform',
               return_sequences=False))
model.add(Dense(n_in, kernel_initializer='random_uniform'))
model.add(Activation("linear"))

opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
model.compile(loss = "mean_squared_error", optimizer=opt)

early_stopping = EarlyStopping(monitor='loss', patience=10, verbose=1)
hist = model.fit(X_train, y_train, batch_size=maxlen, epochs=50,
                 callbacks=[early_stopping])

# 損失のグラフ化
loss = hist.history['loss']
epochs = len(loss)
plt.rc('font', family='serif')
Example #39
0
model4.add(
    Convolution1D(nb_filter=nb_filter,
                  filter_length=filter_length,
                  border_mode='valid',
                  activation='relu',
                  subsample_length=1))

model4.add(GlobalMaxPooling1D())
model4.add(Dropout(0.2))

model4.add(Dense(300))
model4.add(Dropout(0.2))
model4.add(BatchNormalization())
model5 = Sequential()
model5.add(Embedding(len(word_index) + 1, 300, input_length=40, dropout=0.2))
model5.add(LSTM(300, dropout_W=0.2, dropout_U=0.2))

model6 = Sequential()
model6.add(Embedding(len(word_index) + 1, 300, input_length=40, dropout=0.2))
model6.add(LSTM(300, dropout_W=0.2, dropout_U=0.2))

merged_model = Sequential()
merged_model.add(
    Merge([model1, model2, model3, model4, model5, model6], mode='concat'))
merged_model.add(BatchNormalization())

merged_model.add(Dense(300))
merged_model.add(PReLU())
merged_model.add(Dropout(0.2))
merged_model.add(BatchNormalization())
Example #40
0
with open('conversation.pickle', 'rb') as f:
    vec_x, vec_y = pickle.load(f)

vec_x = np.array(vec_x, dtype=np.float64)
vec_y = np.array(vec_y, dtype=np.float64)

x_train, x_test, y_train, y_test = train_test_split(vec_x,
                                                    vec_y,
                                                    test_size=0.2,
                                                    random_state=1)

model = Sequential()
model.add(
    LSTM(output_dim=300,
         input_shape=x_train.shape[1:],
         return_sequences=True,
         init='glorot_normal',
         inner_init='glorot_normal',
         activation='sigmoid'))
model.add(
    LSTM(output_dim=300,
         input_shape=x_train.shape[1:],
         return_sequences=True,
         init='glorot_normal',
         inner_init='glorot_normal',
         activation='sigmoid'))
model.add(
    LSTM(output_dim=300,
         input_shape=x_train.shape[1:],
         return_sequences=True,
         init='glorot_normal',
         inner_init='glorot_normal',
Example #41
0
yx = kMagnData.shape[0]
yy = kMagnData.shape[1]
yz = kMagnData.shape[2]

ox = yConcat.shape[0]
oy = yConcat.shape[1]
oz = yConcat.shape[2]

phiModel = Sequential()
print yConcat.shape

phiModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True))
phiModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True))
phiModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True))
phiModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True))
phiLstmLayer1 = LSTM(input_dim=yz, output_dim=yz, return_sequences=True)
phiLstmLayer1.trainable = False
phiModel.add(phiLstmLayer1)

if os.path.isfile('./lstm-weights/phi-' + settings['lstm-file']):
    phiModel.load_weights('./lstm-weights/phi-' + settings['lstm-file'])

phiModel = modelWeightsLoader(phiModel, './autoencoder-weights/' + settings['phase-encoder'] + '-phase-AE', {18:4})
phiModel.compile(loss='mean_squared_error', optimizer='rmsprop')

magnModel = Sequential()
magnModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True))
magnModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True))
magnModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True))
magnModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True))
magLstmLayer1 = LSTM(input_dim=yz, output_dim=yz, return_sequences=True)
Example #42
0
    def build_doc_scorer(self, r_query_idf, permute_idxs):
        p = self.p
        ng_fsizes = self.NGRAM_NFILTER

        maxpool_poses = self._cascade_poses()

        filter_sizes = list()
        added_fs = set()
        for ng in sorted(ng_fsizes):
            # n-gram in input
            for n_x, n_y in ng_fsizes[ng]:
                dim_name = self._get_dim_name(n_x, n_y)
                if dim_name not in added_fs:
                    filter_sizes.append((n_x, n_y))
                    added_fs.add(dim_name)

        re_input, cov_sim_layers, pool_sdim_layer, pool_sdim_layer_context, pool_filter_layer, ex_filter_layer, re_lq_ds =\
        self._cov_dsim_layers(p['simdim'], p['maxqlen'], filter_sizes, p['nfilter'], top_k=p['kmaxpool'], poses=maxpool_poses, selecter=p['distill'])

        query_idf = Reshape(
            (p['maxqlen'],
             1))(Activation('softmax',
                            name='softmax_q_idf')(Flatten()(r_query_idf)))

        if p['combine'] < 0:
            raise RuntimeError(
                "combine should be 0 (LSTM) or the number of feedforward dimensions"
            )
        elif p['combine'] == 0:
            rnn_layer = LSTM(1, dropout=0.0, recurrent_regularizer=None, recurrent_dropout=0.0, unit_forget_bias=True, \
                    name="lstm_merge_score_idf", recurrent_activation="hard_sigmoid", bias_regularizer=None, \
                    activation="tanh", recurrent_initializer="orthogonal", kernel_regularizer=None, kernel_initializer="glorot_uniform")

        else:
            dout = Dense(1, name='dense_output')
            d1 = Dense(p['combine'], activation='relu', name='dense_1')
            d2 = Dense(p['combine'], activation='relu', name='dense_2')
            rnn_layer = lambda x: dout(d1(d2(Flatten()(x))))

        def _permute_scores(inputs):
            scores, idxs = inputs
            return tf.gather_nd(scores, backend.cast(idxs, 'int32'))

        self.vis_out = None
        self.visout_count = 0

        def _scorer(doc_inputs, dataid):
            self.visout_count += 1
            self.vis_out = {}
            doc_qts_scores = [query_idf]
            for ng in sorted(ng_fsizes):
                if p['distill'] == 'firstk':
                    input_ng = max(ng_fsizes)
                else:
                    input_ng = ng

                for n_x, n_y in ng_fsizes[ng]:
                    dim_name = self._get_dim_name(n_x, n_y)
                    if n_x == 1 and n_y == 1:
                        doc_cov = doc_inputs[input_ng]
                        re_doc_cov = doc_cov
                    else:
                        doc_cov = cov_sim_layers[dim_name](re_input(
                            doc_inputs[input_ng]))
                        re_doc_cov = re_lq_ds[dim_name](
                            pool_filter_layer[dim_name](Permute(
                                (1, 3, 2))(doc_cov)))
                    self.vis_out['conv%s' % ng] = doc_cov

                    if p['context']:
                        ng_signal = pool_sdim_layer_context[dim_name](
                            [re_doc_cov, doc_inputs['context']])
                    else:
                        ng_signal = pool_sdim_layer[dim_name](re_doc_cov)

                    doc_qts_scores.append(ng_signal)

            if len(doc_qts_scores) == 1:
                doc_qts_score = doc_qts_scores[0]
            else:
                doc_qts_score = Concatenate(axis=2)(doc_qts_scores)

            if permute_idxs is not None:
                doc_qts_score = Lambda(_permute_scores)(
                    [doc_qts_score, permute_idxs])

            doc_score = rnn_layer(doc_qts_score)
            return doc_score

        return _scorer
def main():
    f = open("X_train.pkl", 'r')
    X_train = pickle.load(f)
    '''
    f=open('word2index.pkl','r')
    word2index=pickle.load(f)
    f=open('index2word.pkl','r')
    index2word=pickle.load(f)

    inputs_train, tars_train = vectorize_stories(X_train, X_train, word2index, maxlen, maxlen, vocab_size)
    '''
    X_train=pad_sequences(X_train, maxlen=maxlen)

    decoder_mode = 1  # 0 最简单模式,1 [1]向后模式,2 [2] Peek模式,3 [3]Attention模式
    if decoder_mode == 3:
        encoder_top_layer = LSTM(hidden_dim, return_sequences=True)
    else:
        encoder_top_layer = LSTM(hidden_dim)

    if decoder_mode == 0:
        decoder_top_layer = LSTM(hidden_dim, return_sequences=True)
        decoder_top_layer.get_weights()
    elif decoder_mode == 1:
        decoder_top_layer = LSTMDecoder(hidden_dim=hidden_dim, output_dim=hidden_dim
                                        , output_length=maxlen, state_input=False, return_sequences=True)
    elif decoder_mode == 2:
        decoder_top_layer = LSTMDecoder2(hidden_dim=hidden_dim, output_dim=hidden_dim
                                         , output_length=maxlen, state_input=False, return_sequences=True)
    elif decoder_mode == 3:
        decoder_top_layer = AttentionDecoder(hidden_dim=hidden_dim, output_dim=hidden_dim
                                             , output_length=maxlen, state_input=False, return_sequences=True)

    en_de_model = Sequential()
    en_de_model.add(Embedding(input_dim=vocab_size,
                              output_dim=hidden_dim,
                              input_length=maxlen))
    en_de_model.add(encoder_top_layer)
    if decoder_mode == 0:
        en_de_model.add(RepeatVector(maxlen))
    en_de_model.add(decoder_top_layer)

    en_de_model.add(TimeDistributedDense(vocab_size))
    en_de_model.add(Activation('softmax'))
    print('Compiling...')
    time_start = time.time()
    en_de_model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    time_end = time.time()
    print('Compiled, cost time: %f second!' % (time_end - time_start))
    for iter_num in range(5000):
        en_de_model.fit(X_train, X_train, batch_size=3, nb_epoch=1, show_accuracy=True)
        out_predicts = en_de_model.predict(X_train)
        for i_idx, out_predict in enumerate(out_predicts):
            predict_sequence = []
            '''
            for predict_vector in out_predict:
                next_index = np.argmax(predict_vector)
                next_token = index2word[next_index]
                predict_sequence.append(next_token)
            '''
            print('Target output:', X_train[i_idx])
            print('Predict output:', predict_sequence)

        print('Current iter_num is:%d' % iter_num)
Example #44
0
data = data[:, :n_grams]

# making labels
labels = data[:, -1]
lables_final = np.zeros([len(labels), max(lables)+1])
for i in range(len(lables_final)):
	lables_final[i][int(labels[i])] = 1.0

'''
ML Part
'''

model = Sequential()
model.add(Embedding(len(id2char), 128, input_length = input_length))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(128))
model.add(Dense(len(id2char), activation = 'softmax'))
print(model.summary())

model.compile('RMSProp', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(data, lables_final, epochs = 1, batch_size = 5000, validation_split = 0.2)

input_string = u'No part of this book may be reproduced or transmitted in any form or by any means'
input_string = input_string[:50]
print(input_string, end = '')

# Encoding
for i in range(len(input_string)):
	input_string[i] = char2id[input_string[i]]
Example #45
0
class NeuralTuringMachine(Recurrent):
    """ Neural Turing Machines

    Non obvious parameter:
    ----------------------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location

    Known issues:
    -------------
    Theano may complain when n_slots == 1.

    """
    def __init__(self, output_dim, n_slots, m_length, shift_range=3,
                 inner_rnn='gru',
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots
        self.m_length = m_length
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001*np.ones((1,)).astype(floatX)))
        self.init_h = K.zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots,))
        self.init_ww = self.rnn.init((self.n_slots,))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = K.zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = K.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length, ))
        self.W_c_read = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_read = K.zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = K.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length, ))
        self.W_c_write = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = K.zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = K.zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.output_dim))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]

    def _read(self, w, M):
        return (w[:, :, None]*M).sum(axis=1)

    def _write(self, w, e, a, M):
        Mtilda = M * (1 - w[:, :, None]*e[:, None, :])
        Mout = Mtilda + w[:, :, None]*a[:, None, :]
        return Mout

    def _get_content_w(self, beta, k, M):
        num = beta[:, None] * _cosine_distance(M, k)
        return _softmax(num)

    def _get_location_w(self, g, s, C, gamma, wc, w_tm1):
        wg = g[:, None] * wc + (1-g[:, None])*w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = _renorm(wtilda ** gamma[:, None])
        return wout

    def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-4
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1.0001
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def get_initial_states(self, X):
        batch_size = X.shape[0]
        init_M = self.M.dimshuffle(0, 'x', 'x').repeat(
            batch_size, axis=0).repeat(self.n_slots, axis=1).repeat(
            self.m_length, axis=2)
        init_M = init_M.flatten(ndim=2)

        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww),
                    init_h, init_c]
        else:
            return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww),
                    init_h]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def get_full_output(self, train=False):
        """
        This method is for research and visualization purposes. Use it as
        X = model.get_input()  # full model
        Y = ntm.get_output()    # this layer
        F = theano.function([X], Y, allow_input_downcast=True)
        [memory, read_address, write_address, rnn_state] = F(x)

        if inner_rnn == "lstm" use it as
        [memory, read_address, write_address, rnn_cell, rnn_state] = F(x)

        """
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        X = self.get_input(train)
        assert K.ndim(X) == 3
        if K._BACKEND == 'tensorflow':
            if not self.input_shape[1]:
                raise Exception('When using TensorFlow, you should define ' +
                                'explicitely the number of timesteps of ' +
                                'your sequences. Make sure the first layer ' +
                                'has a "batch_input_shape" argument ' +
                                'including the samples axis.')

        mask = self.get_output_mask(train)
        if mask:
            # apply mask
            X *= K.cast(K.expand_dims(mask), X.dtype)
            masking = True
        else:
            masking = False

        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(X)

        states = rnn_states(self.step, X, initial_states,
                            go_backwards=self.go_backwards,
                            masking=masking)
        return states

    def step(self, x, states):
        M_tm1, wr_tm1, ww_tm1 = states[:3]
        # reshape
        M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length))
        # read
        h_tm1 = states[3:]
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[0], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1)
        e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1)

        M_t = M_t.flatten(ndim=2)

        return h_t[0], [M_t, wr_t, ww_t] + h_t
Example #46
0
def pre_train_model(cursor, word_model):
    checkpoint_path = "data/cp.ckpt"

    cursor.execute(f"SELECT count(cleaned) from tweets")
    (tweet_count, ) = cursor.fetchone()
    tweet_count = min(tweet_count, tweet_limit)
    cursor.execute("""
                    SELECT max_sentence_length
                    FROM metadata
                    WHERE id = (SELECT MAX(id) FROM metadata)
                    """)
    (max_sentence_len, ) = cursor.fetchone()

    pretrained_weights = word_model.wv.vectors
    vocab_size, emdedding_size = pretrained_weights.shape
    print('Result embedding shape:', pretrained_weights.shape)

    # print('Checking similar words:')
    # for word in ['model', 'network', 'train', 'learn']:
    #     most_similar = ', '.join(
    #         '%s (%.2f)' % (similar, dist) for similar, dist in word_model.wv.most_similar(word)[:8])
    #     print('  %s -> %s' % (word, most_similar))

    def word2idx(word):
        return word_model.wv.vocab[word].index

    def idx2word(idx):
        return word_model.wv.index2word[idx]

    print('\nPreparing the data for LSTM...')
    train_x = np.zeros([tweet_count, max_sentence_len], dtype=np.int32)
    train_y = np.zeros([tweet_count], dtype=np.int32)
    for i, sentence in enumerate(get_tokens(cursor)):
        for t, word in enumerate(sentence[:-1]):
            train_x[i, t] = word2idx(word)
        train_y[i] = word2idx(sentence[-1])
    print('train_x shape:', train_x.shape)
    print('train_y shape:', train_y.shape)

    print('\nTraining LSTM...')
    model = Sequential()
    model.add(
        Embedding(input_dim=vocab_size,
                  output_dim=emdedding_size,
                  weights=[pretrained_weights]))
    model.add(LSTM(units=emdedding_size))
    model.add(Dense(units=vocab_size))
    model.add(Activation('softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

    def sample(preds, temperature=1.0):
        if temperature <= 0:
            return np.argmax(preds)
        preds = np.asarray(preds).astype('float64')
        preds = np.log(preds) / temperature
        exp_preds = np.exp(preds)
        preds = exp_preds / np.sum(exp_preds)
        probas = np.random.multinomial(1, preds, 1)
        return np.argmax(probas)

    def generate_next(text, num_generated=15):
        word_idxs = [word2idx(word) for word in text.lower().split()]
        for _ in range(num_generated):
            prediction = model.predict(x=np.array(word_idxs))
            idx = sample(prediction[-1], temperature=0.7)
            word_idxs.append(idx)
            if idx == eos:
                break
        pieces = list(map(lambda idx: idx2word(idx), word_idxs))
        if use_nltk:
            return ' '.join(pieces)
        result = sp.decode_pieces(pieces)
        return result

    def on_epoch_end(epoch, _):
        print('\nGenerating text after epoch: %d' % epoch)
        texts = [sos, sos, sos]
        for text in texts:
            sample = generate_next(text)
            print('%s... -> %s' % (text, sample))

    # Create a callback that saves the model's weights
    cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                  save_weights_only=True,
                                                  verbose=1)

    model.summary()
    model.fit(
        train_x,
        train_y,
        batch_size=128,
        epochs=20,
        callbacks=[cp_callback,
                   LambdaCallback(on_epoch_end=on_epoch_end)])

    return model
Example #47
0
File: draw.py Project: samim23/seya
class DRAW(Recurrent):
    '''DRAW

    Parameters:
    ===========
    h_dim : encoder/decoder dimension
    z_dim : random sample dimension (reparametrization trick output)
    input_shape : (n_channels, rows, cols)
    N_enc : Size of the encoder's filter bank (MNIST default: 2)
    N_dec : Size of the decoder's filter bank (MNIST default: 5)
    n_steps : number of sampling steps (or how long it takes to draw, default 64)
    inner_rnn : str with rnn type ('gru' default)
    truncate_gradient : int (-1 default)
    return_sequences : bool (False default)
    '''
    theano_rng = theano_rng()

    def __init__(self, input_shape, h_dim, z_dim, N_enc=2, N_dec=5, n_steps=64,
                 inner_rnn='gru', truncate_gradient=-1, return_sequences=False,
                 canvas_activation=T.nnet.sigmoid, init='glorot_uniform',
                 inner_init='orthogonal'):
        self.input = T.tensor4()
        self.h_dim = h_dim  # this is 256 for MNIST
        self.z_dim = z_dim  # this is 100 for MNIST
        self.input_shape = input_shape
        self.N_enc = N_enc
        self.N_dec = N_dec
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences
        self.n_steps = n_steps
        self.canvas_activation = canvas_activation

        self.height = input_shape[1]
        self.width = input_shape[2]

        self.inner_rnn = inner_rnn
        if inner_rnn == 'gru':
            self.enc = GRU(input_dim=self.input_shape[0]*2*self.N_enc**2 +
                           h_dim, output_dim=h_dim, init=init,
                           inner_init=inner_init)
            self.dec = GRU(input_dim=z_dim, output_dim=h_dim, init=init,
                           inner_init=inner_init)

        elif inner_rnn == 'lstm':
            self.enc = LSTM(input_dim=self.input_shape[0]*2*self.N_enc**2 + h_dim,
                            output_dim=h_dim, init=init,
                            inner_init=inner_init)
            self.dec = LSTM(input_dim=z_dim, output_dim=h_dim, init=init,
                            inner_init=inner_init)
        else:
            raise ValueError('This type of inner_rnn is not supported')

        self.init_canvas = shared_zeros(input_shape)  # canvas and hidden state
        self.init_h_enc = shared_zeros((h_dim))     # initial values
        self.init_h_dec = shared_zeros((h_dim))     # should be trained
        self.L_enc = self.enc.init((h_dim, 5))  # "read" attention parameters (eq. 21)
        self.L_dec = self.enc.init((h_dim, 5))  # "write" attention parameters (eq. 28)
        self.b_enc = shared_zeros((5))  # "read" attention parameters (eq. 21)
        self.b_dec = shared_zeros((5))  # "write" attention parameters (eq. 28)
        self.W_patch = self.enc.init((h_dim, self.N_dec**2*self.input_shape[0]))
        self.b_patch = shared_zeros((self.N_dec**2*self.input_shape[0]))
        self.W_mean = self.enc.init((h_dim, z_dim))
        self.W_sigma = self.enc.init((h_dim, z_dim))
        self.b_mean = shared_zeros((z_dim))
        self.b_sigma = shared_zeros((z_dim))
        self.params = self.enc.params + self.dec.params + [
            self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch,
            self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma]
            # self.init_canvas, self.init_h_enc, self.init_h_dec]

    def init_updates(self):
        self.get_output(train=True)  # populate regularizers list

    def _get_attention_params(self, h, L, b, N):
        p = T.dot(h, L) + b
        gx = self.width * (p[:, 0]+1) / 2.
        gy = self.height * (p[:, 1]+1) / 2.
        sigma2 = T.exp(p[:, 2])
        delta = T.exp(p[:, 3]) * (max(self.width, self.height) - 1) / (N - 1.)
        gamma = T.exp(p[:, 4])
        return gx, gy, sigma2, delta, gamma

    def _get_filterbank(self, gx, gy, sigma2, delta, N):
        small = 1e-4
        i = T.arange(N)
        a = T.arange(self.width)
        b = T.arange(self.height)

        mx = gx[:, None] + delta[:, None] * (i - N/2. - .5)
        my = gy[:, None] + delta[:, None] * (i - N/2. - .5)

        Fx = T.exp(-(a - mx[:, :, None])**2 / 2. / sigma2[:, None, None])
        Fx /= (Fx.sum(axis=-1)[:, :, None] + small)
        Fy = T.exp(-(b - my[:, :, None])**2 / 2. / sigma2[:, None, None])
        Fy /= (Fy.sum(axis=-1)[:, :, None] + small)
        return Fx, Fy

    def _read(self, x, gamma, Fx, Fy):
        Fyx = (Fy[:, None, :, :, None] * x[:, :, None, :, :]).sum(axis=3)
        FxT = Fx.dimshuffle(0, 2, 1)
        FyxFx = (Fyx[:, :, :, :, None] * FxT[:, None, None, :, :]).sum(axis=3)
        return gamma[:, None, None, None] * FyxFx

    def _get_patch(self, h):
        write_patch = T.dot(h, self.W_patch) + self.b_patch
        write_patch = write_patch.reshape((h.shape[0], self.input_shape[0],
                                           self.N_dec, self.N_dec))
        return write_patch

    def _write(self, write_patch, gamma, Fx, Fy):
        Fyx = (Fy[:, None, :, :, None] * write_patch[:, :, :, None, :]).sum(axis=2)
        FyxFx = (Fyx[:, :, :, :, None] * Fx[:, None, None, :, :]).sum(axis=3)
        return FyxFx / gamma[:, None, None, None]

    def _get_sample(self, h, eps):
        mean = T.dot(h, self.W_mean) + self.b_mean
        # eps = self.theano_rng.normal(avg=0., std=1., size=mean.shape)
        logsigma = T.dot(h, self.W_sigma) + self.b_sigma
        sigma = T.exp(logsigma)
        if self._train_state:
            sample = mean + eps * sigma
        else:
            sample = mean + 0 * eps * sigma
        kl = -.5 - logsigma + .5 * (mean**2 + sigma**2)
        # kl = .5 * (mean**2 + sigma**2 - logsigma - 1)
        return sample, kl.sum(axis=-1)

    def _get_rnn_input(self, x, rnn):
        if self.inner_rnn == 'gru':
            x_z = T.dot(x, rnn.W_z) + rnn.b_z
            x_r = T.dot(x, rnn.W_r) + rnn.b_r
            x_h = T.dot(x, rnn.W_h) + rnn.b_h
            return x_z, x_r, x_h

        elif self.inner_rnn == 'lstm':
            xi = T.dot(x, rnn.W_i) + rnn.b_i
            xf = T.dot(x, rnn.W_f) + rnn.b_f
            xc = T.dot(x, rnn.W_c) + rnn.b_c
            xo = T.dot(x, rnn.W_o) + rnn.b_o
            return xi, xf, xc, xo

    def _get_rnn_state(self, rnn, *args):
        mask = 1.  # no masking
        if self.inner_rnn == 'gru':
            x_z, x_r, x_h, h_tm1 = args
            h = rnn._step(x_z, x_r, x_h, mask, h_tm1,
                          rnn.U_z, rnn.U_r, rnn.U_h)
            return h
        elif self.inner_rnn == 'lstm':
            xi, xf, xc, xo, h_tm1, cell_tm1 = args
            h, cell = rnn._step(xi, xf, xo, xc, mask,
                                h_tm1, cell_tm1,
                                rnn.U_i, rnn.U_f, rnn.U_o, rnn.U_c)
            return h, cell

    def _get_initial_states(self, X):
        if self.inner_rnn == 'gru':
            batch_size = X.shape[0]
            canvas = self.init_canvas.dimshuffle('x', 0, 1, 2).repeat(batch_size,
                                                                      axis=0)
            init_enc = self.init_h_enc.dimshuffle('x', 0).repeat(batch_size, axis=0)
            init_dec = self.init_h_dec.dimshuffle('x', 0).repeat(batch_size, axis=0)
        else:
            canvas = alloc_zeros_matrix(*X.shape)  # + self.init_canvas[None, :, :, :]
            init_enc = alloc_zeros_matrix(X.shape[0], self.h_dim)  # + self.init_h_enc[None, :]
            init_dec = alloc_zeros_matrix(X.shape[0], self.h_dim)  # + self.init_h_dec[None, :]
        return canvas, init_enc, init_dec

    def _step(self, eps, canvas, h_enc, h_dec, x, *args):
        x_hat = x - self.canvas_activation(canvas)
        gx, gy, sigma2, delta, gamma = self._get_attention_params(
            h_dec, self.L_enc, self.b_enc, self.N_enc)
        Fx, Fy = self._get_filterbank(gx, gy, sigma2, delta, self.N_enc)
        read_x = self._read(x, gamma, Fx, Fy).flatten(ndim=2)
        read_x_hat = self._read(x_hat, gamma, Fx, Fy).flatten(ndim=2)
        enc_input = T.concatenate([read_x, read_x_hat, h_dec], axis=-1)

        x_enc_z, x_enc_r, x_enc_h = self._get_rnn_input(enc_input, self.enc)
        new_h_enc = self._get_rnn_state(self.enc, x_enc_z, x_enc_r, x_enc_h,
                                        h_enc)
        sample, kl = self._get_sample(new_h_enc, eps)

        x_dec_z, x_dec_r, x_dec_h = self._get_rnn_input(sample, self.dec)
        new_h_dec = self._get_rnn_state(self.dec, x_dec_z, x_dec_r, x_dec_h,
                                        h_dec)

        gx_w, gy_w, sigma2_w, delta_w, gamma_w = self._get_attention_params(
            new_h_dec, self.L_dec, self.b_dec, self.N_dec)
        Fx_w, Fy_w = self._get_filterbank(gx_w, gy_w, sigma2_w, delta_w,
                                          self.N_dec)
        write_patch = self._get_patch(new_h_dec)
        new_canvas = canvas + self._write(write_patch, gamma_w, Fx_w, Fy_w)
        return new_canvas, new_h_enc, new_h_dec, kl

    def _step_lstm(self, eps, canvas, h_enc, cell_enc,
                   h_dec, cell_dec, x, *args):
        x_hat = x - self.canvas_activation(canvas)
        gx, gy, sigma2, delta, gamma = self._get_attention_params(
            h_dec, self.L_enc, self.b_enc, self.N_enc)
        Fx, Fy = self._get_filterbank(gx, gy, sigma2, delta, self.N_enc)
        read_x = self._read(x, gamma, Fx, Fy).flatten(ndim=2)
        read_x_hat = self._read(x_hat, gamma, Fx, Fy).flatten(ndim=2)
        enc_input = T.concatenate([read_x, read_x_hat, h_dec.flatten(ndim=2)], axis=1)

        x_enc_i, x_enc_f, x_enc_c, x_enc_o = self._get_rnn_input(enc_input,
                                                                 self.enc)
        new_h_enc, new_cell_enc = self._get_rnn_state(
            self.enc, x_enc_i, x_enc_f, x_enc_c, x_enc_o, h_enc, cell_enc)
        sample, kl = self._get_sample(new_h_enc, eps)

        x_dec_i, x_dec_f, x_dec_c, x_dec_o = self._get_rnn_input(sample,
                                                                 self.dec)
        new_h_dec, new_cell_dec = self._get_rnn_state(
            self.dec, x_dec_i, x_dec_f, x_dec_c, x_dec_o, h_dec, cell_dec)

        gx_w, gy_w, sigma2_w, delta_w, gamma_w = self._get_attention_params(
            new_h_dec, self.L_dec, self.b_dec, self.N_dec)
        Fx_w, Fy_w = self._get_filterbank(gx_w, gy_w, sigma2_w, delta_w,
                                          self.N_dec)
        write_patch = self._get_patch(new_h_dec)
        new_canvas = canvas + self._write(write_patch, gamma_w, Fx_w, Fy_w)
        return new_canvas, new_h_enc, new_cell_enc, new_h_dec, new_cell_dec, kl

    def get_output(self, train=False):
        self._train_state = train
        X, eps = self.get_input(train).values()
        eps = eps.dimshuffle(1, 0, 2)
        canvas, init_enc, init_dec = self._get_initial_states(X)

        if self.inner_rnn == 'gru':
            outputs, updates = scan(self._step,
                                    sequences=eps,
                                    outputs_info=[canvas, init_enc, init_dec, None],
                                    non_sequences=[X, ] + self.params,
                                    # n_steps=self.n_steps,
                                    truncate_gradient=self.truncate_gradient)

        elif self.inner_rnn == 'lstm':
            outputs, updates = scan(self._step_lstm,
                                    sequences=eps,
                                    outputs_info=[0*canvas, 0*init_enc, 0*init_enc,
                                                  0*init_dec, 0*init_dec, None],
                                    non_sequences=[X, ] + self.params,
                                    truncate_gradient=self.truncate_gradient)

        kl = outputs[-1].sum(axis=0).mean()
        if train:
            # self.updates = updates
            self.regularizers = [SimpleCost(kl), ]
        if self.return_sequences:
            return [outputs[0].dimshuffle(1, 0, 2, 3, 4), kl]
        else:
            return [outputs[0][-1], kl]
Example #48
0
print('Loading data...')
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features,
                                                      test_split=0.2)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

print("Pad sequences (samples x time)")
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen, dropout=0.5))
model.add(LSTM(128, dropout_W=0.5,
               dropout_U=0.5))  # try using a GRU instead, for fun
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              class_mode="binary")

print("Train...")
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          nb_epoch=15,
          validation_data=(X_test, y_test),
Example #49
0
class NeuralTuringMachine(Recurrent):
    """ Neural Turing Machines
    
    Parameters:
    -----------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location
    inner_rnn: str, supported values are 'gru' and 'lstm'
    output_dim: hidden state size (RNN controller output_dim)

    Known issues and TODO:
    ----------------------
    Theano may complain when n_slots == 1.
    Add multiple reading and writing heads.

    """
    def __init__(self, output_dim, n_slots, m_length, shift_range=3,
                 inner_rnn='gru', truncate_gradient=-1, return_sequences=False,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots
        self.m_length = m_length
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn
        self.return_sequences = return_sequences
        self.truncate_gradient = truncate_gradient

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001*np.ones((1,)).astype(floatX)))
        self.init_h = shared_zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots,))
        self.init_ww = self.rnn.init((self.n_slots,))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = shared_zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = shared_zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length, ))
        self.W_c_read = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9 in Graves et. al 2014
        self.b_c_read = shared_zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = shared_zeros((self.shift_range)) 

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length, ))
        self.W_c_write = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = shared_zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = shared_zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.params = self.rnn.params + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        if self.inner_rnn == 'lstm':
            self.init_c = shared_zeros((self.output_dim))
            self.params = self.params + [self.init_c, ]

    def _read(self, w, M):
        return (w[:, :, None]*M).sum(axis=1)

    def _write(self, w, e, a, M, mask):
        Mtilda = M * (1 - w[:, :, None]*e[:, None, :])
        Mout = Mtilda + w[:, :, None]*a[:, None, :]
        return mask[:, None, None]*Mout + (1-mask[:, None, None])*M

    def _get_content_w(self, beta, k, M):
        num = beta[:, None] * _cosine_distance(M, k)
        return _softmax(num)

    def _get_location_w(self, g, s, C, gamma, wc, w_tm1, mask):
        wg = g[:, None] * wc + (1-g[:, None])*w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = _renorm(wtilda ** gamma[:, None])
        return mask[:, None] * wout + (1-mask[:, None])*w_tm1

    def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-6
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def _get_initial_states(self, batch_size):
        init_M = self.M.dimshuffle(0, 'x', 'x').repeat(
           batch_size, axis=0).repeat(self.n_slots, axis=1).repeat(
               self.m_length, axis=2)

        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h, init_c
        else:
            return init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h

    def _step(self, x, mask, M_tm1, wr_tm1, ww_tm1, *args):
        # read
        if self.inner_rnn == 'lstm':
            h_tm1 = args[0:2][::-1]  # (cell_tm1, h_tm1)
        else:
            h_tm1 = args[0:1]  # (h_tm1, )
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[-1], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1, mask)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read, mask)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[-1], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1, mask)
        e = T.nnet.sigmoid(T.dot(h_t[-1], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[-1], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1, mask)

        return (M_t, wr_t, ww_t) + h_t

    def get_output(self, train=False):
        outputs = self.get_full_output(train)

        if self.return_sequences:
            return outputs[-1]
        else:
            return outputs[-1][:, -1]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def get_full_output(self, train=False):
        """
        This method is for research and visualization purposes. Use it as:
        X = model.get_input()  # full model
        Y = ntm.get_output()    # this layer
        F = theano.function([X], Y, allow_input_downcast=True)
        [memory, read_address, write_address, rnn_state] = F(x)

        if inner_rnn == "lstm" use it as:
        [memory, read_address, write_address, rnn_cell, rnn_state] = F(x)

        """
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)[:, :, 0]
        X = X.dimshuffle((1, 0, 2))

        init_states = self._get_initial_states(X.shape[1])
        outputs, updates = theano.scan(self._step,
                                       sequences=[X, padded_mask],
                                       outputs_info=init_states,
                                       non_sequences=self.params,
                                       truncate_gradient=self.truncate_gradient)

        out = [outputs[0].dimshuffle((1, 0, 2, 3)),
               outputs[1].dimshuffle(1, 0, 2),
               outputs[2].dimshuffle((1, 0, 2)),
               outputs[3].dimshuffle((1, 0, 2))]
        if self.inner_rnn == 'lstm':
            out + [outputs[4].dimshuffle((1, 0, 2))]
        return out
                     MAX_SEQLEN,
                     make_categorical=True)
print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape)

# define network
EMBED_SIZE = 32
HIDDEN_SIZE = 32

BATCH_SIZE = 32
NUM_EPOCHS = 5

model = Sequential()
model.add(
    Embedding(len(s_word2id), EMBED_SIZE, input_length=MAX_SEQLEN,
              dropout=0.2))
model.add(LSTM(HIDDEN_SIZE, dropout_W=0.2, dropout_U=0.2))
#model.add(GRU(HIDDEN_SIZE, dropout_W=0.2, dropout_U=0.2))
#model.add(Bidirectional(LSTM(HIDDEN_SIZE, dropout_W=0.2, dropout_U=0.2)))
model.add(RepeatVector(MAX_SEQLEN))
model.add(LSTM(HIDDEN_SIZE, return_sequences=True))
#model.add(GRU(HIDDEN_SIZE, return_sequences=True))
#model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True)))
model.add(TimeDistributed(Dense(len(t_pos2id))))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

history = model.fit(Xtrain,
                    Ytrain,
Example #51
0
class NeuralTuringMachine(Recurrent):
    print(7)
    """ Neural Turing Machines
    Non obvious parameter:
    ----------------------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location
    Known issues:
    -------------
    Theano may complain when n_slots == 1.
    """

    def __init__(self, output_dim, n_slots, m_length, shift_range=3,
                 inner_rnn='lstm',
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=4, input_length=5, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots
        self.m_length = m_length
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim + self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim + self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001 * np.ones((1,)).astype(floatX)))
        print(self.M)
        self.init_h = K.zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots,))
        self.init_ww = self.rnn.init((self.n_slots,))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = K.zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = K.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length,))
        self.W_c_read = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_read = K.zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = K.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length,))
        self.W_c_write = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = K.zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = K.zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.output_dim))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]

    def _read(self, w, M):
        return (w[:, :, None] * M).sum(axis=1)

    def _write(self, w, e, a, M):
        Mtilda = M * (1 - w[:, :, None] * e[:, None, :])
        Mout = Mtilda + w[:, :, None] * a[:, None, :]
        return Mout

    def _get_content_w(self, beta, k, M):
        num = beta[:, None] * _cosine_distance(M, k)
        return _softmax(num)

    def _get_location_w(self, g, s, C, gamma, wc, w_tm1):
        wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = _renorm(wtilda ** gamma[:, None])
        return wout

    def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-4
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1.0001
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def get_initial_states(self, X):
        batch_size = X.shape[0]
        init_M = self.M.dimshuffle(0, 'x', 'x').repeat(
            batch_size, axis=0).repeat(self.n_slots, axis=1).repeat(
            self.m_length, axis=2)
        init_M = init_M.flatten(ndim=2)

        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww),
                    init_h, init_c]
        else:
            return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww),
                    init_h]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim


    def step(self, x, states):
        '''print(self.input_shape)
        print(self.n_slots)
        print(self.m_length)'''
        M_tm1, wr_tm1, ww_tm1 = states[:3]
        # reshape
        M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length))
        # read
        h_tm1 = states[3:]
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[0], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1)
        e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1)

        M_t = M_t.flatten(ndim=2)
        print(h_t[0], [M_t, wr_t, ww_t] + h_t)


        return h_t[0], [M_t, wr_t, ww_t] + h_t
Example #52
0
class Stack(Recurrent):
    """ Stack and queue network
    
    
    output_dim = output dimension
    n_slots = number of memory slot
    m_length = dimention of the memory
    rnn_size = output length of the memory controler
    inner_rnn = "lstm" only lstm is supported 
    stack = True to create neural stack or False to create neural queue
    
    
    from Learning to Transduce with Unbounded Memory
    [[http://arxiv.org/pdf/1506.02516.pdf]]
    """
    def __init__(self, output_dim, n_slots, m_length,
                 inner_rnn='lstm',rnn_size=64, stack=True,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots + 1  # because we start at time 1
        self.m_length = m_length
        self.init = init
        self.inner_init = inner_init
        if inner_rnn != "lstm":
            print "Only lstm is supported"
            raise
        self.inner_rnn = inner_rnn
        self.rnn_size = rnn_size
        self.stack = stack

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(Stack, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.rnn_size, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()


        self.init_h = K.zeros((self.rnn_size))

        self.W_d = self.rnn.init((self.rnn_size,1))
        self.W_u = self.rnn.init((self.rnn_size,1))

        self.W_v = self.rnn.init((self.rnn_size,self.m_length))
        self.W_o = self.rnn.init((self.rnn_size,self.output_dim))

        self.b_d = K.zeros((1,),name="b_d")
        self.b_u = K.zeros((1,),name="b_u")
        self.b_v = K.zeros((self.m_length,))
        self.b_o = K.zeros((self.output_dim,))

        
        self.trainable_weights = self.rnn.trainable_weights + [
           self.W_d, self.b_d,
            self.W_v, self.b_v,
            self.W_u,  self.b_u,
            self.W_o, self.b_o, self.init_h]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]
        #self.trainable_weights =[self.W_d]
       

    def get_initial_states(self, X):
        
        
        batch_size = X.shape[0]
        
        init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0)
        init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

        itime = K.zeros((1,),dtype=np.int32)
        

        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_r , init_V,init_S,itime,init_h,init_c]
      
    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def step(self, x, states):
        
        r_tm1, V_tm1,s_tm1,time = states[:4]
        h_tm1 = states[4:]
 
        
        
        r_tm1 = r_tm1
        
        op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1),
                                             h_tm1)
              
       # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value()) 
        op_t = op_t
        #op_t = op_t[:,0,:]
        d_t = K.sigmoid( K.dot(op_t, self.W_d)  + self.b_d)  
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) 
        
        
        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                             u_t[::,0], v_t,time[0],stack=self.stack)
        

       
        return o_t, [r_t, V_t, s_t, time] + h_t