예제 #1
0
파일: rnn.py 프로젝트: windweller/Parable
 def __init__(self,
              hs,
              x,
              mask,
              seqmask,
              x_dim,
              outputs_info,
              args,
              suffix=''):
     self.recdrop = args.recdrop
     self.W_concat, self.b_concat = _linear_params(args.rnn_dim * 2,
                                                   args.rnn_dim,
                                                   'concat%s' % suffix)
     self.W_att1, self.b_att1 = _linear_params(args.rnn_dim, args.rnn_dim,
                                               'att1%s' % suffix)
     self.W_att2, self.b_att2 = _linear_params(args.rnn_dim, args.rnn_dim,
                                               'att2%s' % suffix)
     self.hs = hs  # e.g. from encoder
     self.phi_hs = T.tanh(T.dot(self.hs, self.W_att1) + self.b_att1)
     super(GRULayerAttention, self).__init__(x,
                                             mask,
                                             seqmask,
                                             x_dim,
                                             outputs_info,
                                             args,
                                             suffix=suffix)
예제 #2
0
파일: rnn.py 프로젝트: windweller/Parable
 def __init__(self, hs, x, mask, seqmask, x_dim, outputs_info, args, suffix=""):
     self.recdrop = args.recdrop
     self.W_concat, self.b_concat = _linear_params(args.rnn_dim * 2, args.rnn_dim, "concat%s" % suffix)
     self.W_att1, self.b_att1 = _linear_params(args.rnn_dim, args.rnn_dim, "att1%s" % suffix)
     self.W_att2, self.b_att2 = _linear_params(args.rnn_dim, args.rnn_dim, "att2%s" % suffix)
     self.hs = hs  # e.g. from encoder
     self.phi_hs = T.tanh(T.dot(self.hs, self.W_att1) + self.b_att1)
     super(GRULayerAttention, self).__init__(x, mask, seqmask, x_dim, outputs_info, args, suffix=suffix)
예제 #3
0
파일: rnn.py 프로젝트: windweller/Parable
    def __init__(self, x, mask, seqmask, x_dim, outputs_info, args, suffix=''):
        # NOTE if want to stack should equal hdim
        self.xdim = x_dim
        self.hdim = args.rnn_dim
        self.recdrop = args.recdrop
        self.stocdrop = args.stocdrop
        self.batch_norm = args.batch_norm

        if args.ortho:
            W = np.concatenate([norm_init(self.xdim, self.hdim, scale=0.01)] *
                               4,
                               axis=1)

            U = np.concatenate([ortho_init(self.hdim, self.hdim, scale=0.05)] *
                               4,
                               axis=1)
            b = np.zeros((4 * self.hdim, )).astype(floatX)

            self.W = theano.shared(W, name='W%s' % suffix)
            self.b = theano.shared(b, name='b%s' % suffix)
            self.U = theano.shared(U, name='U%s' % suffix)
        else:
            self.W, self.b = _linear_params(self.xdim, 4 * self.hdim,
                                            'W%s' % suffix)
            self.U = _linear_params(self.hdim,
                                    4 * self.hdim,
                                    'U%s' % suffix,
                                    bias=False)

        self.params = [self.W, self.b, self.U]

        initial_gamma = 0.1

        if self.batch_norm:
            self.gamma_inputs = theano.shared(
                initial_gamma * np.ones(4 * self.hdim, ).astype('float32'))
            self.gamma_hiddens = theano.shared(
                initial_gamma * np.ones(4 * self.hdim, ).astype('float32'))
            self.gamma_outputs = theano.shared(
                initial_gamma * np.ones(self.hdim, ).astype('float32'))
            self.beta_outputs = theano.shared(
                np.zeros(self.hdim, ).astype('float32'))

            self.params += [
                self.gamma_inputs, self.gamma_hiddens, self.gamma_outputs,
                self.beta_outputs
            ]

        rval, updates = theano.scan(self._step,
                                    sequences=[x, seqmask],
                                    outputs_info=outputs_info)

        # out should be of dim (sequence length, batch size, hidden size)
        self.out = rval[0] * mask[:, :, None]
        self.cell = rval[1] * mask[:, :, None]
예제 #4
0
파일: rnn.py 프로젝트: windweller/Parable
 def __init__(self, inp, n_in, n_out):
     # initialize w/ zeros
     self.W, self.b = _linear_params(n_in, n_out, 'sm')
     E = T.dot(inp, self.W) + self.b
     # time, batch, cat (None just keeps dimension)
     E = T.exp(E - T.max(E, axis=2, keepdims=True))
     pmf = E / T.sum(E, axis=2, keepdims=True)
     self.p_y_given_x = pmf
     self.y_pred = T.argmax(self.p_y_given_x, axis=1)
     self.out = self.p_y_given_x
     # parameters of the model
     self.params = [self.W, self.b]
예제 #5
0
파일: rnn.py 프로젝트: windweller/Parable
 def __init__(self, inp, n_in, n_out):
     # initialize w/ zeros
     self.W, self.b = _linear_params(n_in, n_out, "sm")
     E = T.dot(inp, self.W) + self.b
     # time, batch, cat (None just keeps dimension)
     E = T.exp(E - T.max(E, axis=2, keepdims=True))
     pmf = E / T.sum(E, axis=2, keepdims=True)
     self.p_y_given_x = pmf
     self.y_pred = T.argmax(self.p_y_given_x, axis=1)
     self.out = self.p_y_given_x
     # parameters of the model
     self.params = [self.W, self.b]
예제 #6
0
파일: rnn.py 프로젝트: windweller/Parable
    def __init__(self, x, mask, seqmask, x_dim, outputs_info, args, suffix=""):
        # NOTE if want to stack should equal hdim
        self.xdim = x_dim
        self.hdim = args.rnn_dim
        self.recdrop = args.recdrop
        self.stocdrop = args.stocdrop
        self.batch_norm = args.batch_norm

        if args.ortho:
            W = np.concatenate([norm_init(self.xdim, self.hdim, scale=0.01)] * 4, axis=1)

            U = np.concatenate([ortho_init(self.hdim, self.hdim, scale=0.05)] * 4, axis=1)
            b = np.zeros((4 * self.hdim,)).astype(floatX)

            self.W = theano.shared(W, name="W%s" % suffix)
            self.b = theano.shared(b, name="b%s" % suffix)
            self.U = theano.shared(U, name="U%s" % suffix)
        else:
            self.W, self.b = _linear_params(self.xdim, 4 * self.hdim, "W%s" % suffix)
            self.U = _linear_params(self.hdim, 4 * self.hdim, "U%s" % suffix, bias=False)

        self.params = [self.W, self.b, self.U]

        initial_gamma = 0.1

        if self.batch_norm:
            self.gamma_inputs = theano.shared(initial_gamma * np.ones(4 * self.hdim).astype("float32"))
            self.gamma_hiddens = theano.shared(initial_gamma * np.ones(4 * self.hdim).astype("float32"))
            self.gamma_outputs = theano.shared(initial_gamma * np.ones(self.hdim).astype("float32"))
            self.beta_outputs = theano.shared(np.zeros(self.hdim).astype("float32"))

            self.params += [self.gamma_inputs, self.gamma_hiddens, self.gamma_outputs, self.beta_outputs]

        rval, updates = theano.scan(self._step, sequences=[x, seqmask], outputs_info=outputs_info)

        # out should be of dim (sequence length, batch size, hidden size)
        self.out = rval[0] * mask[:, :, None]
        self.cell = rval[1] * mask[:, :, None]
예제 #7
0
파일: rnn.py 프로젝트: windweller/Parable
    def __init__(self, x, dim, suffix=""):
        # NOTE if want to stack should equal hdim

        self.W, self.b = _linear_params(dim * 2, dim, "ds%s" % suffix)

        # x.shape = [seq_len, batch_size, hdim]
        # x1.shape = [batch_size, seq_len / 2, hdim * 2]
        x1 = x.dimshuffle([1, 0, 2]).reshape([x.shape[1], x.shape[0] / 2, x.shape[2] * 2])

        # x2.shape = [batch_size, seq_len / 2, hdim]
        x2 = x1.dot(self.W) + self.b

        # x3.shape = [seq_len / 2, batch_size, hdim]
        x3 = x2.dimshuffle([1, 0, 2])

        self.out = T.tanh(x3)

        self.params = [self.W, self.b]
예제 #8
0
파일: rnn.py 프로젝트: windweller/Parable
    def __init__(self, x, dim, suffix=''):
        # NOTE if want to stack should equal hdim

        self.W, self.b = _linear_params(dim * 2, dim, 'ds%s' % suffix)

        # x.shape = [seq_len, batch_size, hdim]
        # x1.shape = [batch_size, seq_len / 2, hdim * 2]
        x1 = x.dimshuffle([1, 0, 2]).reshape(
            [x.shape[1], x.shape[0] / 2, x.shape[2] * 2])

        # x2.shape = [batch_size, seq_len / 2, hdim]
        x2 = x1.dot(self.W) + self.b

        # x3.shape = [seq_len / 2, batch_size, hdim]
        x3 = x2.dimshuffle([1, 0, 2])

        self.out = T.tanh(x3)

        self.params = [self.W, self.b]
예제 #9
0
파일: rnn.py 프로젝트: windweller/Parable
 def __init__(self, x, mask, seqmask, x_dim, outputs_info, args, suffix="", backwards=False):
     # NOTE if want to stack should equal hdim
     self.xdim = x_dim
     self.hdim = args.rnn_dim
     self.backwards = backwards
     self.recdrop = args.recdrop
     self.stocdrop = args.stocdrop
     # initialize parameters
     # TODO maybe try initialization here: https://github.com/kyunghyuncho/dl4mt-material/blob/master/session1/nmt.py, helps for memorizing long sequences
     self.W_z, self.b_wz = _linear_params(self.xdim, self.hdim, "wz%s" % suffix, act=T.nnet.sigmoid)
     self.U_z, self.b_uz = _linear_params(self.hdim, self.hdim, "uz%s" % suffix, act=T.nnet.sigmoid)
     self.W_r, self.b_wr = _linear_params(self.xdim, self.hdim, "wr%s" % suffix, act=T.nnet.sigmoid)
     self.U_r, self.b_ur = _linear_params(self.hdim, self.hdim, "ur%s" % suffix, act=T.nnet.sigmoid)
     self.W_h, self.b_wh = _linear_params(self.xdim, self.hdim, "wh%s" % suffix)
     self.U_h, self.b_uh = _linear_params(self.hdim, self.hdim, "uh%s" % suffix)
     self.setup(x, mask, seqmask, outputs_info)
예제 #10
0
파일: rnn.py 프로젝트: windweller/Parable
 def __init__(self,
              x,
              mask,
              seqmask,
              x_dim,
              outputs_info,
              args,
              suffix='',
              backwards=False):
     # NOTE if want to stack should equal hdim
     self.xdim = x_dim
     self.hdim = args.rnn_dim
     self.backwards = backwards
     self.recdrop = args.recdrop
     self.stocdrop = args.stocdrop
     # initialize parameters
     # TODO maybe try initialization here: https://github.com/kyunghyuncho/dl4mt-material/blob/master/session1/nmt.py, helps for memorizing long sequences
     self.W_z, self.b_wz = _linear_params(self.xdim,
                                          self.hdim,
                                          'wz%s' % suffix,
                                          act=T.nnet.sigmoid)
     self.U_z, self.b_uz = _linear_params(self.hdim,
                                          self.hdim,
                                          'uz%s' % suffix,
                                          act=T.nnet.sigmoid)
     self.W_r, self.b_wr = _linear_params(self.xdim,
                                          self.hdim,
                                          'wr%s' % suffix,
                                          act=T.nnet.sigmoid)
     self.U_r, self.b_ur = _linear_params(self.hdim,
                                          self.hdim,
                                          'ur%s' % suffix,
                                          act=T.nnet.sigmoid)
     self.W_h, self.b_wh = _linear_params(self.xdim, self.hdim,
                                          'wh%s' % suffix)
     self.U_h, self.b_uh = _linear_params(self.hdim, self.hdim,
                                          'uh%s' % suffix)
     self.setup(x, mask, seqmask, outputs_info)