Ejemplo n.º 1
0
    def _define_topology(self, network):

        with tf.variable_scope(self.scope_name):

            self.network, self.params = [], []
            if network is not None:
                self.network = network
                for layer in self.network:
                    self.params += layer.params

            else:

                self.embed_layer = EMBED_Layer(self.D, self.num_hids[1],
                                               'embedding_layer')
                #TODO :
                # At the moment, only single layer encoder and decoder
                # But should be flexible enough to have deeper layers
                # when they are required
                self.encoder     = [BiGRU_LN_Layer(self.num_hids[1], \
                                                   self.num_hids[2],   \
                                                   'char_encoder_l1'), \
                                    BiGRU_LN_Layer(self.num_hids[2], \
                                                   self.num_hids[2],   \
                                                   'char_encoder_l2')]

                self.decoder     = [GRU_LN_Layer(self.num_hids[2], \
                                                 self.num_hids[2], \
                                                 'char_decoder_l1'), \
                                    GRU_LN_Layer(self.num_hids[2],
                                                 self.num_hids[1],
                                                 'char_decoder_l2')]

                self.softmax_layer = Layer(self.num_hids[1], self.D,\
                                     'softmax', 'l'+str(self.num_layers))


                self.params  = self.embed_layer.params  + \
                               self.encoder[0].params   + self.encoder[1].params +\
                               self.decoder[0].params   + self.decoder[1].params +\
                               self.softmax_layer.params

                self.network = [self.embed_layer] + \
                                self.encoder + \
                                self.decoder + \
                               [self.softmax_layer]
Ejemplo n.º 2
0
    def __init__(self, model_params, config, network=None):
        super(Convnet, self).__init__(config)

        self.config = config
        self.Xdim, self.num_hids, self.O, self.strides, \
                                  self.scope_name, self.atypes =\
                model_params['num_hids'][0] , model_params['num_hids']  ,\
                model_params['num_hids'][-1], model_params['num_stides'], \
                model_params['scope_name']  , model_params['atypes']

        #Defining Model Topology
        with tf.variable_scope(self.scope_name):

            self.network, self.params = [], []
            if network is not None:
                self.network = network
                for layer in self.network:
                    self.params += layer.params

            else:

                layer_dim = self.Xdim
                for i in xrange(1, len(self.num_hids) - 1):

                    # Fully connected layer
                    if len(self.num_hid[i]) == 1:

                        # Case when previous layer was convolutional layer
                        layer_dim = num_hids[-1]
                        if len(layer_dim) != 3:

                            layer_dim = np.prod(self.num_hid[-1]) / \
                                        np.prod (self.strides[-1])


                        layer_i = Layer(layer_dim           , \
                                        self.num_hids[i]  ,\
                                        self.atypes[i]      , \
                                        'l'+str(i))

                    # Convolutional layer:
                    # 3 Dim. corresponds to (num_kern, filter_sz_x, filter_sz_y)
                    elif len(self.num_hid[i]) == 3:

                        #TODO layer_dim
                        layer_i = Conv_Layer(  layer_dim,
                                               self.num_hids[i][0], \
                                               self.num_hids[i][1:],\
                                               self.atypes[i], \
                                               self.strides[i],\
                                               'conv_l'+str(i)  )

                    self.network.append(layer_i)
                    self.params += self.network[i].params
Ejemplo n.º 3
0
    def __init__(self, model_params, config, network=None):
        super(MLP, self).__init__(config)

        self.config = config
        self.D, self.num_hids, self.O, self.scope_name, self.atypes =\
                model_params['num_hids'][0] , model_params['num_hids']  ,\
                model_params['num_hids'][-1], model_params['scope_name'],\
                model_params['atypes']

        #Defining Model Topology
        with tf.variable_scope(self.scope_name):

            self.network, self.params = [], []
            if network is not None:
                self.network = network
                for layer in self.network:
                    self.params += layer.params

            else:
                for i in xrange(len(self.num_hids) - 1):
                    self.network.append(Layer(\
                            self.num_hids[i], self.num_hids[i+1],\
                            self.atypes[i]  , 'l'+str(i)))
                    self.params += self.network[i].params
Ejemplo n.º 4
0
    def __init__(self, model_config, network=None, summary_writer=None):
        super(CHAR_RNN, self).__init__(model_config)

        self.model_config = model_config
        self.vocab_sz = model_config['vocab_sz']
        self.num_hids = model_config['num_hids']
        self.D = model_config['num_hids'][0]
        self.O = model_config['num_hids'][-1]
        self.TT = model_config['seq_length']
        self.scope_name = model_config['scope_name'] or 'char_rnn'
        self.rnn_type = model_config['rnn_type']
        self.atype = model_config['atype']
        self.batch_sz = model_config['batch_sz']
        self.num_layers = len(self.num_hids) - 1
        self.summary_writer = summary_writer
        self.ind2voc = model_config['ind2voc']
        self.voc2ind = model_config['voc2ind']

        #Defining Model Topology
        with tf.variable_scope(self.scope_name):

            self.network, self.params = [], []
            if network is not None:
                self.network = network
                for layer in self.network:
                    self.params += layer.params

            else:

                self.embed_layer = EMBED_Layer(self.D, self.num_hids[1],
                                               'embedding_layer')
                self.network.append(self.embed_layer)

                for i in xrange(1, self.num_layers - 1):
                    if self.rnn_type == 'rnn' or self.rnn_type == 'irnn':

                        ## TODO
                        # The implementation of RNN has not tested at all. This is
                        # not urgent since we don't use RNN (no one uses RNN these days)
                        self.rnn_layer_i = RNNLayer(self.num_hids[i],
                                                    self.num_hids[i + 1],
                                                    self.atype,
                                                    'rnn_ly' + str(i))

                    elif self.rnn_type == 'lstm':
                        self.rnn_layer_i = LSTMLayer(self.num_hids[i],
                                                     self.num_hids[i + 1],
                                                     'rnn_ly' + str(i))

                    ## TODO GRU not available yet

                    self.network.append(self.rnn_layer_i)
                    self.params += self.rnn_layer_i.params

                self.softmax_layer = Layer(self.num_hids[-2], self.num_hids[-1],\
                                     'softmax', 'l'+str(self.num_layers))

                self.params  =  self.embed_layer.params +\
                                self.softmax_layer.params

                self.network.append(self.softmax_layer)
Ejemplo n.º 5
0
class CHAR_RNN(BaseModel):
    def __init__(self, model_config, network=None, summary_writer=None):
        super(CHAR_RNN, self).__init__(model_config)

        self.model_config = model_config
        self.vocab_sz = model_config['vocab_sz']
        self.num_hids = model_config['num_hids']
        self.D = model_config['num_hids'][0]
        self.O = model_config['num_hids'][-1]
        self.TT = model_config['seq_length']
        self.scope_name = model_config['scope_name'] or 'char_rnn'
        self.rnn_type = model_config['rnn_type']
        self.atype = model_config['atype']
        self.batch_sz = model_config['batch_sz']
        self.num_layers = len(self.num_hids) - 1
        self.summary_writer = summary_writer
        self.ind2voc = model_config['ind2voc']
        self.voc2ind = model_config['voc2ind']

        #Defining Model Topology
        with tf.variable_scope(self.scope_name):

            self.network, self.params = [], []
            if network is not None:
                self.network = network
                for layer in self.network:
                    self.params += layer.params

            else:

                self.embed_layer = EMBED_Layer(self.D, self.num_hids[1],
                                               'embedding_layer')
                self.network.append(self.embed_layer)

                for i in xrange(1, self.num_layers - 1):
                    if self.rnn_type == 'rnn' or self.rnn_type == 'irnn':

                        ## TODO
                        # The implementation of RNN has not tested at all. This is
                        # not urgent since we don't use RNN (no one uses RNN these days)
                        self.rnn_layer_i = RNNLayer(self.num_hids[i],
                                                    self.num_hids[i + 1],
                                                    self.atype,
                                                    'rnn_ly' + str(i))

                    elif self.rnn_type == 'lstm':
                        self.rnn_layer_i = LSTMLayer(self.num_hids[i],
                                                     self.num_hids[i + 1],
                                                     'rnn_ly' + str(i))

                    ## TODO GRU not available yet

                    self.network.append(self.rnn_layer_i)
                    self.params += self.rnn_layer_i.params

                self.softmax_layer = Layer(self.num_hids[-2], self.num_hids[-1],\
                                     'softmax', 'l'+str(self.num_layers))

                self.params  =  self.embed_layer.params +\
                                self.softmax_layer.params

                self.network.append(self.softmax_layer)

    def __call__(self, X, Hts, Ots=None):

        with tf.variable_scope(self.scope):

            #TODO: consider outputting error
            return self.fp(X, Hts, Ots)

    def fp(self, X, Hts, Ots=None, num_steps=None):
        """ Forward pass """

        if num_steps is None: num_steps = self.TT
        with tf.variable_scope(self.scope_name):

            ## Embedding Layer
            #XX  = tf.split(1, num_steps, self.embed_layer.fp(X))
            #XXl = [tf.squeeze(input_, [1]) for input_ in XX]

            ## Intermediate Layers
            Yts = []
            for t in xrange(num_steps):
                Xt = X[:, t]
                Et = self.embed_layer.fp(Xt)
                Ot, Hts, Ots = self.step(Et, Hts, Ots)
                Yts.append(Ot)

            ## Softmax Layer
            YYY = tf.reshape(tf.concat(1, Yts), [-1, self.num_hids[-2]])
            logits = self.softmax_layer.get_logit(YYY)
            preds = tf.nn.softmax(logits)

            return preds, [logits], Hts, Ots

    def cost(self,
             X,
             Y,
             Hts_init=None,
             Ots_init=None,
             batch_sz=None,
             lam=0.0005,
             TT=None):

        if batch_sz is None: batch_sz = self.batch_sz
        if Hts_init is None: Hts_init, Ots_init = self._init_states(Ots_init)

        preds, logits, _, _ = self.fp(X, Hts_init, Ots_init, num_steps=TT)

        ## Measured based on perplexity - measures how surprised the network
        ## is to see the next character in a sequence.
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            logits, [tf.reshape(Y, [-1])], [tf.ones([batch_sz * self.TT])],
            self.vocab_sz)

        cost = tf.reduce_sum(loss) / tf.to_float(batch_sz) / tf.to_float(
            self.TT)
        l2_loss = tf.add_n([tf.nn.l2_loss(v) \
                        for v in tf.trainable_variables()])

        with tf.variable_scope('summary'):
            tf.histogram_summary("prediction_error", preds)
            tf.scalar_summary("Cost", cost)
            self.summarize = tf.merge_all_summaries()

        return cost + lam * l2_loss

    def step(self, Xt, Hts, Ots=None):

        Ot = Xt
        for i in xrange(1, self.num_layers - 1):

            Ht = Hts.popleft()
            if self.rnn_type == 'lstm':

                Ot, Ht = self.network[i].fp(Ot, Ht)
                #if i != self.num_layers-2:
                #    Ots.append(Ot)
                #    Ot = Ots.popleft() # Pop init_y from lstm
            else:
                Ht = self.network[i].fp(Xt, Ht)
            Hts.append(Ht)

        return Ot, Hts, Ots

    def get_context(self, X, Hts_init=None, Ots_init=None, num_steps=None):
        """
        Returns the last hidden representation H_T (feature) of the RNN 
        with input X = {x_1, ..., x_T}

        Note that lstm layer returns two item, ouputs Ots and hiddens Hts,
        where as RNN returns one item, hiddens Hts (outputs and hiddens are the same).
        Hence, argument Ots is an option when lstm layer is used.
        """

        if num_steps is None: num_steps = self.TT
        if Hts_init is None: Hts_init, Ots_init = self._init_states(Ots_init)

        preds, logits, Hts, Ots  = self.fp(X,   Hts=Hts_init, \
                                                Ots=Ots_init, \
                                                num_steps=num_steps)
        symbols = tf.stop_gradient(tf.argmax(preds, 1))

        return tf.reshape(symbols[num_steps - 1], [1]), Hts, Ots

    def sample(self, Ct, num_steps, Hts=None, Ots=None):
        """
        Generates sample of text

        Note that lstm layer returns two item, ouputs Ots and hiddens Hts,
        where as RNN returns one item, hiddens Hts (outputs and hiddens are the same).
        Hence, argument Ots is an option when lstm layer is used.
        """

        char_inds = [Ct]

        #TODO : At test time, the prediction is currently done iteratively
        #character by character in a greedy fashion, but eventually needs to be
        #implemented more sophisticated methods (e.g. beam search).
        with tf.variable_scope(self.scope_name):

            for t in xrange(num_steps):
                Xt = self.embed_layer.fp(Ct)
                Ht, Hts, Ots = self.step(Xt, Hts, Ots)
                Ht = tf.reshape([Ht], [-1, self.num_hids[-2]])
                Ct = self.get_character(Ht, stype='argmax')
                char_inds.append(Ct)

            return char_inds

    def get_character(self, Ht, stype='multinomial'):
        """stype - sample type, either 'argmax' | 'multinomial' """

        #Get prediction
        pred = self.softmax_layer.fp(Ht)

        #Sample a character
        if stype == 'multinomial':
            pred = tf.multinomial(pred, 1, seed=1234, name=None)

        symbol = tf.stop_gradient(tf.argmax(pred, 1))
        return symbol

    def clone(self, new_scope_name=None):

        #TODO
        pass

    def _init_states(self, Ots_init=None):
        Hts_init = deque()
        for i in xrange(2, self.num_layers):
            Hts_init.append(tf.zeros([1, self.num_hids[i]], tf.float32))

        if self.rnn_type == 'lstm':
            Ots_init = deque()
            for i in xrange(2, self.num_layers - 1):
                Ots_init.append(tf.zeros([1, self.num_hids[i]], tf.float32))

        return Hts_init, Ots_init
Ejemplo n.º 6
0
class CHAR_SEQ2SEQ_CHAR2WORD_SKIP(BaseModel):
    def __init__(self, model_config, network=None, summary_writer=None):

        self.model_config = model_config
        self.vocab_sz = model_config['vocab_sz']
        self.num_hids = model_config['num_hids']
        self.D = model_config['num_hids'][0]
        self.O = model_config['num_hids'][-1]
        self.TT = model_config['max_steps']
        self.scope_name = model_config['scope_name'] or 'char_rnn'
        self.rnn_type = model_config['rnn_type']
        self.batch_sz = model_config['batch_sz']
        self.num_layers = len(self.num_hids) - 1
        self.summary_writer = summary_writer
        self.ind2voc = model_config['ind2voc']
        self.voc2ind = model_config['voc2ind']

        #Defining Model Topology
        self._define_topology(network)

    def _define_topology(self, network):

        with tf.variable_scope(self.scope_name):

            self.network, self.params = [], []
            if network is not None:
                self.network = network
                for layer in self.network:
                    self.params += layer.params

            else:

                self.embed_layer = EMBED_Layer(self.D, self.num_hids[1],
                                               'embedding_layer')
                #TODO :
                # At the moment, only single layer encoder and decoder
                # But should be flexible enough to have deeper layers
                # when they are required
                self.encoder     = [BiGRU_LN_Layer(self.num_hids[1], \
                                                   self.num_hids[2],   \
                                                   'char_encoder_l1'), \
                                    BiGRU_LN_Layer(self.num_hids[2], \
                                                   self.num_hids[2],   \
                                                   'char_encoder_l2')]

                self.decoder     = [GRU_LN_Layer(self.num_hids[2], \
                                                 self.num_hids[2], \
                                                 'char_decoder_l1'), \
                                    GRU_LN_Layer(self.num_hids[2],
                                                 self.num_hids[1],
                                                 'char_decoder_l2')]

                self.skip_decoder = [GRU_LN_Layer(self.num_hids[2], \
                                                  self.num_hids[2], \
                                                  'char_skip_decoder_l1'), \
                                     GRU_LN_Layer(self.num_hids[2],\
                                                  self.num_hids[1],
                                                  'char_skip_decoder_l2')]


                self.softmax_layer = Layer(self.num_hids[1], self.D,\
                                     'softmax', 'l'+str(self.num_layers))

                self.skip_softmax_layer = Layer(self.num_hids[1], \
                                                self.num_hids[-1],\
                                                'softmax',
                                                'skip_l'+str(self.num_layers))


                self.params  = self.embed_layer.params  + \
                               self.encoder[0].params   + self.encoder[1].params +\
                               self.decoder[0].params   + self.decoder[1].params +\
                               self.softmax_layer.params   + \
                               self.skip_decoder[0].params + self.skip_decoder[1].params + \
                               self.skip_softmax_layer.params

                self.network = [self.embed_layer] + \
                                self.encoder + \
                                self.decoder + \
                               [self.softmax_layer] + \
                                self.skip_decoder + \
                               [self.skip_softmax_layer]

    def __call__(self, X):

        with tf.variable_scope(self.scope):

            #TODO: consider outputting error
            return self.fp(X)

    def load_params(self, np_param_dict):

        assign_ops = []
        for param, np_param_key in zip(self.params, np_param_dict):
            if param.get_shape() == np_param_dict[np_param_key].shape:
                assign_ops.append(tf.assign(param,
                                            np_param_dict[np_param_key]))
            else:
                print 'LOAD FAIL: paramter mis-match'
                print param.name
                print np_param_key
        return assign_ops

    def cost(self, X, Y, XXM, YYM, batch_sz=None, num_steps=None, lam=0.0005):
        ''' Returns loss
                X - source indice
                Y - target indice
                
                Note that number of batch size is not fixed per update'''

        if batch_sz is None: batch_sz = tf.shape(Y)[0]
        if num_steps is None: num_steps = self.TT

        preds = self.fp(X, XXM, batch_sz, \
                    num_esteps=num_steps, num_dsteps=num_steps)
        preds = tf.transpose(preds, perm=[1, 0, 2])

        ## Measured based on perplexity - measures how surprised the network
        ## is to see the next character in a sequence.

        py = preds.reshape((batch_sz * num_steps, self.D))
        Y_len = tf.cast(tf.sum(YYM, 1), 'float32')
        cost = -tf.log(py)[tf.arange(batch_sz * num_steps),
                           Y.flatten()] * YYM.flatten()
        cost = cost.reshape((batch_sz, num_steps)) / Y_len.dimshuffle(0, 'x')
        cost = tf.exp(tf.sum(cost, axis=1))
        cost = tf.sum(cost) / tf.cast(batch_sz, 'float32')

        #l2_loss = tf.add_n([tf.nn.l2_loss(v) \
        #                for v in tf.trainable_variables()])
        with tf.variable_scope('summary'):
            tf.histogram_summary("prediction_error", preds)
            tf.scalar_summary("Cost", cost)
            self.summarize = tf.merge_all_summaries()

        return cost  #+ lam * l2_loss

    def encoder_fp(self, X, XXM, batch_sz, num_steps=None):

        if num_steps is None: num_steps = self.TT

        #Encoder pass
        E = self.embed_layer.fp(X)
        Hts_enc = self._init_states(self.encoder, batch_sz)
        Hts_enc = self.estep(E, Hts_enc, num_steps)

        #Decoder hidden initialization
        last_indice = tf.cast(tf.reduce_sum(XXM, 1) - 1, 'int32')

        #Hts = tf.pack(Hts_enc)
        content = extract_last_relevant(Hts_enc, last_indice + 1)

        return content

    def estep(self, E, Hs, num_esteps=None):

        if num_esteps is None: num_esteps = self.TT

        Oi = tf.transpose(E, perm=[1, 0, 2])
        with tf.variable_scope(self.scope_name):
            for i in xrange(len(self.encoder)):

                Hi = Hs[i]
                Oi = self.encoder[i].propagate(Oi, h0=Hi,\
                                     n_steps=tf.cast(num_esteps, 'int32'))
            return Oi

    def decoder_fp(self,
                   Ct,
                   Hts,
                   decoder,
                   softmax_layer,
                   num_steps=None,
                   scanF=False):
        """ Decoder RNN 
            S           - Summary vector
            Hts, Ots    - initial state of the RNN
            num_steps   - number of total steps """

        if num_steps is None: num_steps = self.TT

        with tf.variable_scope(self.scope_name):

            if scanF:
                O10, O20 = Hts

                def decode_t(ctm1, h10, h20):

                    Et = self.embed_layer.fp(ctm1)
                    O1t = decoder[0].propagate(Et, n_steps=1, h0=h10)
                    O2t = decoder[1].propagate(O1t, n_steps=1, h0=h20)
                    pred = softmax_layer.propagate(O2t, atype='softmax')
                    ct = self.get_character(pred, stype='argmax')
                    return ct, O1t, O2t, pred

                [Cts, Hts1, Hts2,
                 preds], updates = tf.scan(decode_t,
                                           outputs_info=[C0, O10, O20, None],
                                           n_steps=tf.cast(num_steps,
                                                           dtype='int32'))
            else:
                O1t, O2t = Hts
                preds = []
                for t in xrange(num_steps):

                    Et = self.embed_layer.fp(Ct)
                    O1t = decoder[0].propagate(Et, n_steps=1, h0=O1t)
                    O2t = decoder[1].propagate(O1t, n_steps=1, h0=O2t)
                    pred = softmax_layer.propagate(O2t, atype='softmax')
                    Ct = self.get_character(pred, stype='argmax')
                    preds.append(pred)
                preds = tf.pack(preds)
            return preds

    def fp(self, X, XXM, batch_sz, num_esteps, num_dsteps):

        if num_esteps is None: num_esteps = self.TT
        if num_dsteps is None: num_dsteps = self.TT

        #Encoder pass
        Hts_dec, Ots_dec = [], []
        Hts_dec_init = self.encoder_fp(X, XXM, batch_sz, num_steps=num_esteps)
        Hts_dec = self._init_states(self.decoder, batch_sz)
        Hts_dec.pop(0)
        Hts_dec.insert(0, Hts_dec_init)

        #Decoder pass
        #C0 = last_relevant2D(X, last_indice)
        C0 = tf.constant(
            np.ones((tr_config['batch_sz'], ), dtype='int32') * model.vocab_sz)
        pred = self.decoder_fp(C0, Hts_dec, self.decoder, \
                                            self.softmax_layer, \
                                            num_steps=num_dsteps)

        pred_skip = self.decoder_fp(C0, Hts_dec, self.skip_decoder, \
                                            self.skip_softmax_layer, \
                                            num_steps=num_dsteps)

        return pred, pred_skip

    def get_character(self, preds, stype='argmax'):
        """stype - sample type, either 'argmax' | 'multinomial' """

        #Get prediction
        #pred    = self.softmax_layer.propagate(Ht)

        #Sample a character
        if stype == 'multinomial':
            pred = tf.multinomial(preds, 1, seed=1234, name=None)

        if len(preds.get_shape()) == 2:
            sample_char = tf.argmax(preds, dimension=1)
        else:
            sample_char = tf.argmax(preds, dimension=2)
        symbol = tf.stop_gradient(sample_char)

        return symbol

    def get_fp_embedding(self, X, _):

        pred = self.softmax_layer.fp(X)
        symbol = tf.stop_gradient(tf.argmax(pred, 1))

        return self.embed_layer.fp(symbol)

    def get_context(self, X, Hts_init=None, Ots_init=None, num_steps=None):
        """
        Returns the last hidden representation H_T (feature) of the RNN 
        with input X = {x_1, ..., x_T}

        Note that lstm layer returns two item, ouputs Ots and hiddens Hts,
        where as RNN returns one item, hiddens Hts (outputs and hiddens are the same).
        Hence, argument Ots is an option when lstm layer is used.
        """

        if num_steps is None: num_steps = self.TT
        if Hts_init is None: Hts_init, Ots_init = self._init_states('bilstm')

        Ht, Hts, Ots = self.encoder_fp(X,
                                       Hts_init,
                                       Ots=Ots_init,
                                       num_steps=num_steps)
        C0 = self.get_character(Ht, stype='argmax')
        return C0, Hts, Ots

    def get_samples(self, X_prior, XXM, num_esteps, num_dsteps, stype='rec'):
        """Samples refers to reconstruction (only in this case)
            S           - summary variable
            num_steps   - number of total character generated 
            Hts, Ots    - is dummy variable. Needed this to synchronize 
            with sample method in char_rnn.py """

        #TODO : At test time, the prediction is currently done iteratively
        #character by character in a greedy fashion, but eventually needs to be
        #implemented more sophisticated methods (e.g. beam search).

        preds, preds_skip  = self.fp(X_prior, XXM, X_prior.shape[0], \
                                       num_fsteps=num_esteps, \
                                       num_bsteps=num_dsteps)

        if stype == 'skip':
            ct = self.get_character(preds_skip)
            return ct, preds_skip

        ct = self.get_character(preds)
        return ct, preds

    def _init_states(self, submodel, batch_sz):

        Hts = []

        for i, rnn in enumerate(submodel):
            if isinstance(rnn, BiGRU_LN_Layer) or \
               isinstance(rnn, GRU_LN_Layer) :
                Hts.append(tf.zeros((batch_sz, rnn.M), dtype='float32'))
            #elif isinstance(rnn, LSTM_Content_Decoding_Layer) or \
            #     isinstance(rnn, LSTM_Content_Decoding_LN_Layer):
            #    Hts.append(T.zeros((batch_sz, rnn.M*4), dtype=theano.config.floatX))

        return Hts

    def clone(self, new_scope_name=None):

        #TODO
        pass
Ejemplo n.º 7
0
class CHAR_SEQ2SEQ(BaseModel):
    def __init__(self, model_config, network=None, summary_writer=None):
        super(CHAR_SEQ2SEQ, self).__init__(model_config)

        self.model_config = model_config
        self.vocab_sz = model_config['vocab_sz']
        self.num_hids = model_config['num_hids']
        self.D = model_config['num_hids'][0]
        self.O = model_config['num_hids'][-1]
        self.TT = model_config['seq_length']
        self.scope_name = model_config['scope_name'] or 'char_rnn'
        self.rnn_type = model_config['rnn_type']
        self.atype = model_config['atype']
        self.batch_sz = model_config['batch_sz']
        self.num_layers = len(self.num_hids) - 1
        self.summary_writer = summary_writer
        self.ind2voc = model_config['ind2voc']
        self.voc2ind = model_config['voc2ind']

        #Defining Model Topology
        self._define_topology(network)

    def _define_topology(self, network):

        with tf.variable_scope(self.scope_name):

            self.network, self.params = [], []
            if network is not None:
                self.network = network
                for layer in self.network:
                    self.params += layer.params

            else:

                self.embed_layer = EMBED_Layer(self.D, self.num_hids[1],
                                               'embedding_layer')
                #TODO :
                # At the moment, only single layer encoder and decoder
                # But should be flexible enough to have deeper layers
                # when they are required
                self.encoder     = [LSTMLayer(  self.num_hids[1], \
                                                self.num_hids[2], \
                                                'char_encoder_l1') , \
                                    LSTMLayer(  self.num_hids[2],
                                                self.num_hids[3],
                                                'char_encoder_l2')]

                self.decoder     = [LSTMLayer(  self.num_hids[1], \
                                                self.num_hids[2], \
                                                'char_decoder_l1') , \
                                    LSTMLayer(  self.num_hids[2],
                                                self.num_hids[3],
                                                'char_decoder_l2')]

                self.softmax_layer = Layer(self.num_hids[3], self.num_hids[-1],\
                                     'softmax', 'l'+str(self.num_layers))


                self.params  = self.embed_layer.params  + self.encoder[0].params +\
                               self.decoder[0].params   + self.softmax_layer.params

                self.network = [self.embed_layer] + \
                                self.encoder + \
                                self.decoder + \
                               [self.softmax_layer]

    def __call__(self, X):

        with tf.variable_scope(self.scope):

            #TODO: consider outputting error
            return self.fp(X)

    def encoder(self, X, batch_sz, num_steps=None):

        if num_steps is None: num_steps = self.TT

        #Encoder pass
        E = self.embed_layer.fp(X)
        Hts_enc = self._init_states(self.encoder, batch_sz)
        Hts_enc = self.estep(E, Hts_enc, num_steps)
        return Hts_enc

    def estep(self, E, Hs, num_esteps=None):

        if num_esteps is None: num_esteps = self.TT

        Oi = tf.transpose(E, perm=[1, 0, 2])
        with tf.variable_scope(self.scope_name):
            for i in xrange(len(self.encoder)):

                Hi = Hs[i]
                Oi = self.encoder[i].propagate(Oi, h0=Hi,\
                                     n_steps=tf.cast(num_esteps, 'int32'))
            return Oi

    def decoder(self, predt, Hts, Ots=None, num_steps=None, scanF=False):
        """ Decoder RNN 
            S           - Summary vector
            Hts, Ots    - initial state of the RNN
            num_steps   - number of total steps """

        if num_steps is None: num_steps = self.TT

        with tf.variable_scope(self.scope_name):

            if scanF:
                O10, O20 = Hts

                def decode_t(ctm1, h10, h20):

                    Et = self.embed_layer.fp(ctm1)
                    O1t = self.decoder[0].propagate(Et, n_steps=1, h0=h10)
                    O2t = self.decoder[1].propagate(O1t, n_steps=1, h0=h20)
                    predt = self.pred_layer.propagate(O2t, atype='softmax')
                    return predt, O1t, O2t

                [preds, Hts1,
                 Hts2], updates = tf.scan(decode_t,
                                          outputs_info=[C0, O10, O20],
                                          n_steps=tf.cast(num_steps,
                                                          dtype='int32'))
            else:
                O1t, O2t = Hts
                preds = []
                for t in xrange(num_steps):

                    Et = self.embed_layer.fp(predt)
                    O1t = self.decoder[0].propagate(Et, n_steps=1, h0=O1t)
                    O2t = self.decoder[1].propagate(O1t, n_steps=1, h0=O2t)
                    pred = self.softmax_layer.propagate(O2t, atype='softmax')
                    preds.append(predt)
                preds = tf.pack(preds)
            return preds

    def fp(self, X, batch_sz, num_esteps, num_dsteps):

        if num_esteps is None: num_esteps = self.TT
        if num_dsteps is None: num_dsteps = self.TT

        #Encoder pass
        Hts_dec, Ots_dec = [], []
        Hts_dec_init = self.encoder(X, batch_sz, num_steps=num_esteps)
        Hts_dec = self._init_states(self.decoder, batch_sz)
        Hts_dec.pop(0)
        Hts_dec.insert(0, Hts_dec_init)

        #Decoder pass
        #C0 = last_relevant2D(X, last_indice)
        pred0 = tf.constant(np.ones((tr_config['batch_sz'], ), dtype='int32'))
        pred = self.decoder(pred0, Hts_dec, num_steps=num_dsteps)

        return pred