コード例 #1
0
def ada_updates(params, grads, rho=0.95, eps=1e-6):
    '''
    Ada-delta algorithm
    reference: http://www.cnblogs.com/neopenx/p/4768388.html
    '''
    # initialization:
    #   dp    : delta params
    #   dp_sqr: (delta params) ** 2
    #   gr_sqr: gradient ** 2
    running_gr = [theano.shared(p.get_value() * th_floatX(0.)) for p in params]
    running_dp_sqr = [
        theano.shared(p.get_value() * th_floatX(0.)) for p in params
    ]
    running_gr_sqr = [
        theano.shared(p.get_value() * th_floatX(0.)) for p in params
    ]
    # update gr
    gr_updates = [(gr_i, new_gr_i)
                  for gr_i, new_gr_i in zip(running_gr, grads)]
    # update gr_sqr
    gr_sqr_updates = [(gr_sqr_i, rho * gr_sqr_i + (1 - rho) * gr_i**2)
                      for gr_sqr_i, gr_i in zip(running_gr_sqr, running_gr)]
    # calculate (delta params) by RMS
    # NOTE: here dp_sqr is from last time calculation, because dp has not be calculated!
    dp = [
        -gr_i * (dp_sqr_i + eps)**0.5 / (gr_sqr_i + eps)**0.5 for gr_i,
        dp_sqr_i, gr_sqr_i in zip(running_gr, running_dp_sqr, running_gr_sqr)
    ]
    # update dx_sqr
    dp_sqr_updates = [(dp_sqr_i, rho * dp_sqr_i + (1 - rho) * dp_i**2)
                      for dp_sqr_i, dp_i in zip(running_dp_sqr, dp)]
    # update params
    param_updates = [(param_i, param_i + dp_i)
                     for param_i, dp_i in zip(params, dp)]

    return gr_updates, gr_sqr_updates, dp_sqr_updates, param_updates
コード例 #2
0
    def __init__(self,
                 corpus,
                 n_emb,
                 n_hidden,
                 batch_size,
                 conv_size,
                 pooling,
                 rng=None,
                 th_rng=None,
                 load_from=None,
                 gensim_w2v=None):
        '''
        n_hidden: output conv stack size
        conv_size: filter height size
        '''
        self.corpus = corpus
        self.n_emb = n_emb
        self.n_hidden = n_hidden
        self.batch_size = batch_size
        self.conv_size = conv_size
        self.pooling = pooling
        assert pooling in ('mean', 'max')

        if rng is None:
            rng = np.random.RandomState(1226)
        if th_rng is None:
            th_rng = RandomStreams(1226)

        # x/mask: (batch size, nsteps)
        x = T.matrix('x', dtype='int32')
        mask = T.matrix('mask', dtype=theano.config.floatX)
        y = T.vector('y', dtype='int32')
        batch_idx_seq = T.vector('index', dtype='int32')
        use_noise = theano.shared(th_floatX(0.))
        self.x, self.mask, self.y, self.batch_idx_seq, self.use_noise = x, mask, y, batch_idx_seq, use_noise

        # No need for transpose of x/mask in CNN
        n_samples, n_steps = x.shape
        # transpose mask-matrix to be consistent with pooling-layer-inputs
        trans_mask = mask.T
        # truncate mask-matrix to be consistent with conv-outputs
        trunc_mask = trans_mask[(conv_size - 1):]

        # list of model layers
        model_layers = []
        model_layers.append(
            EmbLayer(x,
                     load_from=load_from,
                     rand_init_params=(rng, (corpus.dic.size, n_emb)),
                     gensim_w2v=gensim_w2v,
                     dic=corpus.dic))
        # emb-out: (batch size, n_words/steps, emb_dim)
        # conv-in: (batch size, 1(input stack size), n_words/steps, emb_dim)
        # conv-out: (batch size, n_hidden(output stack size), output feature map height, 1(output feature map width))
        # pooling-in: (output feature map height, batch size, output stack size)
        conv_in = model_layers[-1].outputs[:, None, :, :]
        model_layers.append(
            ConvLayer(conv_in,
                      image_shape=(batch_size, 1, corpus.maxlen, n_emb),
                      load_from=load_from,
                      rand_init_params=(rng, (n_hidden, 1, conv_size, n_emb))))
        pooling_in = T.transpose(model_layers[-1].outputs.flatten(3),
                                 axes=(2, 0, 1))
        if pooling == 'mean':
            model_layers.append(MeanPoolingLayer(pooling_in, trunc_mask))
        else:
            model_layers.append(MaxPoolingLayer(pooling_in, trunc_mask))
        model_layers.append(
            DropOutLayer(model_layers[-1].outputs, use_noise, th_rng))
        model_layers.append(
            HiddenLayer(model_layers[-1].outputs,
                        activation=T.nnet.softmax,
                        load_from=load_from,
                        rand_init_params=(rng, (n_hidden, corpus.n_type))))
        self.model_layers = model_layers

        model_params = []
        for layer in model_layers:
            model_params += layer.params

        self.pred_prob = model_layers[-1].outputs
        self.pred = T.argmax(self.pred_prob, axis=1)
        off = 1e-8
        self.cost = -T.mean(
            T.log(self.pred_prob[T.arange(n_samples), y] + off))

        # attributes with `func` suffix is compiled function
        self.predict_func = theano.function(inputs=[x, mask],
                                            outputs=self.pred)
        self.predict_prob_func = theano.function(inputs=[x, mask],
                                                 outputs=self.pred_prob)

        grads = T.grad(self.cost, model_params)
        self.gr_updates, self.gr_sqr_updates, self.dp_sqr_updates, self.param_updates = ada_updates(
            model_params, grads)
コード例 #3
0
    def __init__(self,
                 corpus,
                 n_emb,
                 n_hidden,
                 pooling,
                 rng=None,
                 th_rng=None,
                 load_from=None,
                 gensim_w2v=None):
        self.corpus = corpus
        self.n_emb = n_emb
        self.n_hidden = n_hidden
        self.pooling = pooling
        assert pooling in ('mean', 'max')

        if rng is None:
            rng = np.random.RandomState(1226)
        if th_rng is None:
            th_rng = RandomStreams(1226)

        # x/mask: (batch size, nsteps)
        x = T.matrix('x', dtype='int32')
        mask = T.matrix('mask', dtype=theano.config.floatX)
        y = T.vector('y', dtype='int32')
        batch_idx_seq = T.vector('index', dtype='int32')
        use_noise = theano.shared(th_floatX(0.))
        self.x, self.mask, self.y, self.batch_idx_seq, self.use_noise = x, mask, y, batch_idx_seq, use_noise

        # TRANSPOSE THE AXIS!
        trans_x, trans_mask = x.T, mask.T
        # trancate the useless data
        trunc_x, trunc_mask = RNNModel.trunc_inputs_mask(trans_x, trans_mask)
        n_steps, n_samples = trunc_x.shape

        # list of model layers
        model_layers = []
        model_layers.append(
            EmbLayer(trunc_x,
                     load_from=load_from,
                     rand_init_params=(rng, (corpus.dic.size, n_emb)),
                     gensim_w2v=gensim_w2v,
                     dic=corpus.dic))
        model_layers.append(
            RNNLayer(model_layers[-1].outputs,
                     trunc_mask,
                     load_from=load_from,
                     rand_init_params=(rng, (n_emb, n_hidden))))
        if pooling == 'mean':
            model_layers.append(
                MeanPoolingLayer(model_layers[-1].outputs, trunc_mask))
        else:
            model_layers.append(
                MaxPoolingLayer(model_layers[-1].outputs, trunc_mask))
        model_layers.append(
            DropOutLayer(model_layers[-1].outputs, use_noise, th_rng))
        model_layers.append(
            HiddenLayer(model_layers[-1].outputs,
                        activation=T.nnet.softmax,
                        load_from=load_from,
                        rand_init_params=(rng, (n_hidden, corpus.n_type))))
        self.model_layers = model_layers

        model_params = []
        for layer in model_layers:
            model_params += layer.params

        self.pred_prob = model_layers[-1].outputs
        self.pred = T.argmax(self.pred_prob, axis=1)
        off = 1e-8
        self.cost = -T.mean(
            T.log(self.pred_prob[T.arange(n_samples), y] + off))

        # attributes with `func` suffix is compiled function
        self.predict_func = theano.function(inputs=[x, mask],
                                            outputs=self.pred)
        self.predict_prob_func = theano.function(inputs=[x, mask],
                                                 outputs=self.pred_prob)

        grads = T.grad(self.cost, model_params)
        self.gr_updates, self.gr_sqr_updates, self.dp_sqr_updates, self.param_updates = ada_updates(
            model_params, grads)