Ejemplo n.º 1
0
	def __init__(self,hid_dim,bptt_truncate = -1):
		self.hidden_dim = hid_dim
		self.bptt_truncate = bptt_truncate
		
		# input lstm parameters
		self.Ui = normal_param(std=0.008, shape=(self.hidden_dim, word_vector_size))
		self.Uf = normal_param(std=0.008, shape=(self.hidden_dim, word_vector_size))
		self.Uo = normal_param(std=0.008, shape=(self.hidden_dim, word_vector_size))
		self.Ug = normal_param(std=0.008, shape=(self.hidden_dim, word_vector_size))
		self.Wi = normal_param(std=0.02, shape=(self.hidden_dim, self.hidden_dim))
		self.Wf = normal_param(std=0.02, shape=(self.hidden_dim, self.hidden_dim))
		self.Wo = normal_param(std=0.02, shape=(self.hidden_dim, self.hidden_dim))
		self.Wg = normal_param(std=0.02, shape=(self.hidden_dim, self.hidden_dim))
		self.bi = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.bf = constant_param(value=1.5, shape=(self.hidden_dim,))
		self.bo = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.bg = constant_param(value=0.0, shape=(self.hidden_dim,))	
		"""
		#gru sentence parameters
		self.U0_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.U1_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.U2_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.W0_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.W1_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.W2_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim,))	
		"""
	
		q = T.matrix()
		a = T.matrix()
		t = T.scalar()

		s_a,a_updates = theano.scan(self.lstm_inp_next_state,sequences=a,outputs_info=[T.zeros_like(self.bi),T.zeros_like(self.bi)])
		s_q,q_updates = theano.scan(self.lstm_inp_next_state,sequences=q,outputs_info=[T.zeros_like(self.bi),T.zeros_like(self.bi)])

		q_q = s_q[1][-1]
		a_a = s_a[1][-1]	

		scr,scr_updates = theano.scan(self.l1,sequences = [q_q,a_a],outputs_info = None)
		self.score = T.exp(T.sum(scr))	

		self.loss = (t-self.score)*(t-self.score)

		self.params = [self.Ui,self.Wi,self.bi,
				self.Uf,self.Wf,self.bf,
				self.Uo,self.Wo,self.bo,
				self.Ug,self.Wg,self.bg]

		#self.loss = self.loss + 0.00003*l2_reg(self.params)
		updts = upd.adam(self.loss,self.params)

		self.train_fn = theano.function(inputs = [q,a,t], outputs = [self.score,self.loss], updates = updts)
		self.test_fn = theano.function(inputs = [q,a], outputs = self.score)
Ejemplo n.º 2
0
	def __init__(self,hid_dim,bptt_truncate = -1):
		self.hidden_dim = hid_dim
		self.bptt_truncate = bptt_truncate
		
		#gru sentence parameters
		self.U0_i = normal_param(std=0.01, shape=(self.hidden_dim, word_vector_size))
		self.U1_i = normal_param(std=0.01, shape=(self.hidden_dim, word_vector_size))
		self.U2_i = normal_param(std=0.01, shape=(self.hidden_dim, word_vector_size))
		self.W0_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.W1_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.W2_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		
		#attention mechanism parameters
		self.W1 = normal_param(std=0.0033, shape=(2*self.hidden_dim, (4*self.hidden_dim)+1))
		self.W2 = normal_param(std=0.01, shape=(2,2*self.hidden_dim))
		self.b1 = constant_param(value=0.0, shape=(2*self.hidden_dim,))
		self.b2 = constant_param(value=0.0, shape=(2,))
		self.Wb = normal_param(std=0.01, shape=(self.hidden_dim,self.hidden_dim))		

		q = T.matrix()
		a = T.matrix()
		t = T.iscalar()
		
		q = dropout(q,0.08)		
		a = dropout(a,0.16)

		s_a,a_updates = theano.scan(self.input_next_state,sequences=a,outputs_info=T.zeros_like(self.b2_i))
		s_q,q_updates = theano.scan(self.input_next_state,sequences=q,outputs_info=T.zeros_like(self.b2_i))

		q_q = s_q[-1]
		a_a = s_a[-1]	

		self.pred = self.attn_step(a_a,q_q)	

		self.loss = T.mean(T.nnet.categorical_crossentropy(self.pred,T.stack([t])))

		self.params = [self.U0_i,self.W0_i,self.b0_i,
				self.U1_i,self.W1_i,self.b1_i,
				self.U2_i,self.W2_i,self.b2_i,
				self.W1,self.W2,self.b1,self.b2,self.Wb]

		self.loss = self.loss + 0.00007*l2_reg(self.params)
		updts = upd.adam(self.loss,self.params)

		self.train_fn = theano.function(inputs = [q,a,t], outputs = [self.pred,self.loss], updates = updts)
		self.test_fn = theano.function(inputs = [q,a], outputs = self.pred)
Ejemplo n.º 3
0
	def __init__(self,f_match,f_decomp,hidden_dim):
		fm = f_match.split("-")
		self.f_match = fm[0]
		self.fm_win = 0
		if len(fm) != 1:
			self.fm_win = int(fm[1])
		self.f_decomp = f_decomp
		self.hidden_dim = hidden_dim
		
		#gru sentence parameters
		self.U0_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.U1_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.U2_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.W0_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.W1_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.W2_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim,))

		spl = T.matrix()
		smin = T.matrix()
		tpl = T.matrix()
		tmin = T.matrix()
		scr = T.scalar()
		
		s_plus,spl_updates = theano.scan(self.input_next_state,sequences=spl,outputs_info=T.zeros_like(self.b0_i))
		s_minus,smin_updates = theano.scan(self.input_next_state,sequences=smin,outputs_info=T.zeros_like(self.b0_i))
		t_plus,tpl_updates = theano.scan(self.input_next_state,sequences=tpl,outputs_info=T.zeros_like(self.b0_i))
		t_minus,tmin_updates = theano.scan(self.input_next_state,sequences=tmin,outputs_info=T.zeros_like(self.b0_i))

		s = T.concatenate([s_plus,s_minus],axis = 0)
		t = T.concatenate([t_plus,t_minus],axis=0)

		sc,sc_updates = theano.scan(self.l1,sequences = [s,t],outputs_info = None)
		self.score = T.exp(T.sum(sc))	

		self.loss = (scr-self.score)*(scr-self.score)

		self.params = [self.U0_i,self.W0_i,self.b0_i,
				self.U1_i,self.W1_i,self.b1_i,
				self.U2_i,self.W2_i,self.b2_i]

		#self.loss = self.loss + 0.00003*l2_reg(self.params)
		updts = upd.adam(self.loss,self.params)

		self.train_fn = theano.function(inputs = [spl,smin,tpl,tmin,scr], outputs = [self.score,self.loss], updates = updts)
		self.test_fn = theano.function(inputs = [spl,smin,tpl,tmin], outputs = self.score)
Ejemplo n.º 4
0
    def __init__(self, hid_dim, bptt_truncate=-1):
        self.hidden_dim = hid_dim
        self.bptt_truncate = bptt_truncate

        #gru sentence parameters
        self.U0_i = normal_param(std=0.01,
                                 shape=(self.hidden_dim, word_vector_size))
        self.U1_i = normal_param(std=0.01,
                                 shape=(self.hidden_dim, word_vector_size))
        self.U2_i = normal_param(std=0.01,
                                 shape=(self.hidden_dim, word_vector_size))
        self.W0_i = normal_param(std=0.01,
                                 shape=(self.hidden_dim, self.hidden_dim))
        self.W1_i = normal_param(std=0.01,
                                 shape=(self.hidden_dim, self.hidden_dim))
        self.W2_i = normal_param(std=0.01,
                                 shape=(self.hidden_dim, self.hidden_dim))
        self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim, ))
        self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim, ))
        self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim, ))

        #attention mechanism parameters
        self.W1 = normal_param(std=0.0033,
                               shape=(2 * self.hidden_dim,
                                      (4 * self.hidden_dim) + 1))
        self.W2 = normal_param(std=0.01, shape=(2, 2 * self.hidden_dim))
        self.b1 = constant_param(value=0.0, shape=(2 * self.hidden_dim, ))
        self.b2 = constant_param(value=0.0, shape=(2, ))
        self.Wb = normal_param(std=0.01,
                               shape=(self.hidden_dim, self.hidden_dim))

        q = T.matrix()
        a = T.matrix()
        t = T.iscalar()

        q = dropout(q, 0.08)
        a = dropout(a, 0.16)

        s_a, a_updates = theano.scan(self.input_next_state,
                                     sequences=a,
                                     outputs_info=T.zeros_like(self.b2_i))
        s_q, q_updates = theano.scan(self.input_next_state,
                                     sequences=q,
                                     outputs_info=T.zeros_like(self.b2_i))

        q_q = s_q[-1]
        a_a = s_a[-1]

        self.pred = self.attn_step(a_a, q_q)

        self.loss = T.mean(
            T.nnet.categorical_crossentropy(self.pred, T.stack([t])))

        self.params = [
            self.U0_i, self.W0_i, self.b0_i, self.U1_i, self.W1_i, self.b1_i,
            self.U2_i, self.W2_i, self.b2_i, self.W1, self.W2, self.b1,
            self.b2, self.Wb
        ]

        self.loss = self.loss + 0.00007 * l2_reg(self.params)
        updts = upd.adam(self.loss, self.params)

        self.train_fn = theano.function(inputs=[q, a, t],
                                        outputs=[self.pred, self.loss],
                                        updates=updts)
        self.test_fn = theano.function(inputs=[q, a], outputs=self.pred)
    def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000):
        ####################################### SETTINGS ###################################
        self.x_train = x_train
        self.x_test = x_test
        self.diff = diff
        self.batch_size = 100.0
        self.learning_rate = theano.shared(np.float32(0.0008))
        self.momentum = 0.3
        self.performance = {"train": [], "test": []}
        self.inpt = T.ftensor4(name="input")
        self.df = T.fmatrix(name="differential")
        self.dim_z = dim_z
        self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z])))
        self.activation = relu
        self.generative = False
        self.out_distribution = False
        # self.y = T.matrix(name="y")
        self.in_filters = [64, 64, 64]
        self.filter_lengths = [10.0, 10.0, 10.0]
        self.params = []
        # magic = 73888.
        self.magic = magic

        self.dropout_symbolic = T.fscalar()
        self.dropout_prob = theano.shared(np.float32(0.0))
        ####################################### LAYERS ######################################
        # LAYER 1 ##############################
        self.conv1 = one_d_conv_layer(
            self.inpt, self.in_filters[0], 1, self.filter_lengths[0], param_names=["W1", "b1"]
        )
        self.params += self.conv1.params
        self.bn1 = batchnorm(self.conv1.output)
        self.nl1 = self.activation(self.bn1.X)
        self.maxpool1 = pool_2d(self.nl1, [3, 1], stride=[2, 1], mode="average_exc_pad").astype(theano.config.floatX)
        self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic)
        # self.layer1_out = self.maxpool1
        # LAYER2 ################################
        self.flattened = T.flatten(self.layer1_out, outdim=2)
        # Variational Layer #####################
        self.latent_layer = variational_gauss_layer(self.flattened, self.magic, dim_z)
        self.params += self.latent_layer.params
        self.latent_out = self.latent_layer.output
        # Hidden Layer #########################
        self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic)
        self.params += self.hidden_layer.params
        self.hid_out = dropout(
            self.activation(self.hidden_layer.output).reshape(
                (self.inpt.shape[0], self.in_filters[-1], int(self.magic / self.in_filters[-1]), 1)
            ),
            self.dropout_symbolic,
        )
        # Devonvolutional 1 ######################
        self.deconv1 = one_d_deconv_layer(
            self.hid_out,
            1,
            self.in_filters[2],
            self.filter_lengths[2],
            pool=2.0,
            param_names=["W3", "b3"],
            distribution=False,
        )
        self.params += self.deconv1.params
        # self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic)
        self.tanh_out = self.deconv1.output
        self.last_layer = self.deconv1

        if self.out_distribution == True:
            self.trunk_sigma = self.last_layer.log_sigma[:, :, : self.inpt.shape[2], :]
        self.trunc_output = self.tanh_out[:, :, : self.inpt.shape[2], :]

        ################################### FUNCTIONS ######################################################
        self.get_latent_states = theano.function(
            [self.inpt], self.latent_out, givens=[[self.dropout_symbolic, self.dropout_prob]]
        )
        # self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior])
        # self.get_prior = theano.function([self.inpt],self.latent_layer.prior)
        # self.convolve1 = theano.function([self.inpt],self.layer1_out)
        # self.convolve2 = theano.function([self.inpt],self.layer2_out)
        self.output = theano.function(
            [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]]
        )
        self.get_flattened = theano.function(
            [self.inpt], self.flattened, givens=[[self.dropout_symbolic, self.dropout_prob]]
        )
        # self.deconvolve1 = theano.function([self.inpt],self.deconv1.output)
        # self.deconvolve2 = theano.function([self.inpt],self.deconv2.output)
        # self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2))
        self.output = theano.function(
            [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]]
        )
        # self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
        self.generate_from_z = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob], [self.latent_out, self.generative_z]],
        )

        self.cost = self.MSE()
        self.mse = self.MSE()
        # self.likelihood = self.log_px_z()
        # self.get_cost = theano.function([self.inpt],[self.cost,self.mse])

        # self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
        self.derivatives = T.grad(self.cost, self.params)
        # self.get_gradients = theano.function([self.inpt],self.derivatives)
        self.updates = adam(self.params, self.derivatives, self.learning_rate)
        # self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum)
        self.train_model = theano.function(
            inputs=[self.inpt, self.df],
            outputs=self.cost,
            updates=self.updates,
            givens=[[self.dropout_symbolic, self.dropout_prob]],
        )
Ejemplo n.º 6
0
    def __init__(self, f_match, f_decomp, filter_no, att_hid_dim):
        fm = f_match.split("-")
        self.f_match = fm[0]
        self.fm_win = 0
        if len(fm) != 1:
            self.fm_win = int(fm[1])
        self.f_decomp = f_decomp
        self.fn = filter_no
        self.att_hid_dim = att_hid_dim

        s = T.tensor4()
        t = T.tensor4()
        scr = T.vector()
        """
		#  1 attention mechanism parameters
		self.W1 = normal_param(std=0.0033, shape=(self.hidden_dim, (4*self.hidden_dim)+1))
		self.W2 = normal_param(std=0.01, shape=(res,self.hidden_dim))
		self.b1 = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b2 = constant_param(value=0.0, shape=(res,))
		self.Wb = normal_param(std=0.008, shape=(self.hidden_dim,self.hidden_dim))
		"""
        #  2 attention mechanism parameters
        self.Wx = normal_param(std=(2.0 / (self.att_hid_dim + (3 * self.fn))),
                               shape=(self.att_hid_dim, 3 * self.fn))
        self.Wd = normal_param(std=(2.0 / (self.att_hid_dim + (3 * self.fn))),
                               shape=(self.att_hid_dim, 3 * self.fn))
        self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim, ))
        self.b_p = constant_param(value=0.0, shape=(res, ))
        self.Wp = normal_param(std=(2.0 / (res + self.att_hid_dim)),
                               shape=(res, self.att_hid_dim))

        w_shp1 = (self.fn, 2, 3, word_vector_size)
        w_shp2 = (self.fn, 2, 2, word_vector_size)
        w_shp3 = (self.fn, 2, 1, word_vector_size)
        w_bound1 = 2 * 3 * word_vector_size
        w_bound2 = 2 * 2 * word_vector_size
        w_bound3 = 2 * 1 * word_vector_size
        b_shp = (self.fn, )
        self.W1 = theano.shared(np.asarray(np.random.uniform(
            low=-1.0 / w_bound1, high=1.0 / w_bound1, size=w_shp1),
                                           dtype=s.dtype),
                                name='W1')
        self.W2 = theano.shared(np.asarray(np.random.uniform(
            low=-1.0 / w_bound2, high=1.0 / w_bound2, size=w_shp2),
                                           dtype=s.dtype),
                                name='W2')
        self.W3 = theano.shared(np.asarray(np.random.uniform(
            low=-1.0 / w_bound3, high=1.0 / w_bound3, size=w_shp3),
                                           dtype=s.dtype),
                                name='W3')
        self.b1 = theano.shared(np.asarray(np.random.uniform(low=-.5,
                                                             high=.5,
                                                             size=b_shp),
                                           dtype=s.dtype),
                                name='b1')
        self.b2 = theano.shared(np.asarray(np.random.uniform(low=-.5,
                                                             high=.5,
                                                             size=b_shp),
                                           dtype=s.dtype),
                                name='b2')
        self.b3 = theano.shared(np.asarray(np.random.uniform(low=-.5,
                                                             high=.5,
                                                             size=b_shp),
                                           dtype=s.dtype),
                                name='b3')

        conv_out_s1 = conv2d(s, self.W1)
        output_s1 = T.tanh(conv_out_s1 + self.b1.dimshuffle('x', 0, 'x', 'x'))
        output_s1 = output_s1.reshape((output_s1.shape[1], output_s1.shape[2]))
        o_s1, os1_updates = theano.scan(self.max_pool,
                                        sequences=output_s1,
                                        outputs_info=None)
        conv_out_s2 = conv2d(s, self.W2)
        output_s2 = T.tanh(conv_out_s2 + self.b2.dimshuffle('x', 0, 'x', 'x'))
        output_s2 = output_s2.reshape((output_s2.shape[1], output_s2.shape[2]))
        o_s2, os2_updates = theano.scan(self.max_pool,
                                        sequences=output_s2,
                                        outputs_info=None)
        conv_out_s3 = conv2d(s, self.W3)
        output_s3 = T.tanh(conv_out_s3 + self.b3.dimshuffle('x', 0, 'x', 'x'))
        output_s3 = output_s3.reshape((output_s3.shape[1], output_s3.shape[2]))
        o_s3, os3_updates = theano.scan(self.max_pool,
                                        sequences=output_s3,
                                        outputs_info=None)
        self.o_s = T.concatenate([o_s1, o_s2, o_s3], axis=0)

        conv_out_t1 = conv2d(t, self.W1)
        output_t1 = T.tanh(conv_out_t1 + self.b1.dimshuffle('x', 0, 'x', 'x'))
        output_t1 = output_t1.reshape((output_t1.shape[1], output_t1.shape[2]))
        o_t1, ot1_updates = theano.scan(self.max_pool,
                                        sequences=output_t1,
                                        outputs_info=None)
        conv_out_t2 = conv2d(t, self.W2)
        output_t2 = T.tanh(conv_out_t2 + self.b2.dimshuffle('x', 0, 'x', 'x'))
        output_t2 = output_t2.reshape((output_t2.shape[1], output_t2.shape[2]))
        o_t2, ot2_updates = theano.scan(self.max_pool,
                                        sequences=output_t2,
                                        outputs_info=None)
        conv_out_t3 = conv2d(t, self.W3)
        output_t3 = T.tanh(conv_out_t3 + self.b3.dimshuffle('x', 0, 'x', 'x'))
        output_t3 = output_t3.reshape((output_t3.shape[1], output_t3.shape[2]))
        o_t3, ot3_updates = theano.scan(self.max_pool,
                                        sequences=output_t3,
                                        outputs_info=None)
        self.o_t = T.concatenate([o_t1, o_t2, o_t3], axis=0)

        self.pred = self.attn_step_2(self.o_s, self.o_t)
        self.loss = self.kl_div(scr, self.pred)

        self.params = [
            self.Wx, self.Wd, self.b_h, self.b_p, self.Wp, self.W1, self.W2,
            self.W3, self.b1, self.b2, self.b3
        ]

        #self.loss = self.loss + 0.00003*l2_reg(self.params)
        updts = upd.adam(self.loss, self.params)

        self.train_fn = theano.function(inputs=[s, t, scr],
                                        outputs=[self.pred, self.loss],
                                        updates=updts)
        self.test_fn = theano.function(inputs=[s, t], outputs=self.pred)
        self.f = theano.function([s, t], [self.o_s, self.o_t])
Ejemplo n.º 7
0
Archivo: demo_ui.py Proyecto: pyx123/QA
    def __init__(self, f_match, f_decomp, hidden_dim, att_hid_dim):
        fm = f_match.split("-")
        self.f_match = fm[0]
        self.fm_win = 0
        if len(fm) != 1:
            self.fm_win = int(fm[1])
        self.f_decomp = f_decomp
        self.hidden_dim = hidden_dim
        self.att_hid_dim = att_hid_dim

        #gru sentence parameters
        self.U0_i = normal_param(std=(2.0 /
                                      (self.hidden_dim + word_vector_size)),
                                 shape=(self.hidden_dim, word_vector_size))
        self.U1_i = normal_param(std=(2.0 /
                                      (self.hidden_dim + word_vector_size)),
                                 shape=(self.hidden_dim, word_vector_size))
        self.U2_i = normal_param(std=(2.0 /
                                      (self.hidden_dim + word_vector_size)),
                                 shape=(self.hidden_dim, word_vector_size))
        self.W0_i = normal_param(std=(2.0 /
                                      (self.hidden_dim + self.hidden_dim)),
                                 shape=(self.hidden_dim, self.hidden_dim))
        self.W1_i = normal_param(std=(2.0 /
                                      (self.hidden_dim + self.hidden_dim)),
                                 shape=(self.hidden_dim, self.hidden_dim))
        self.W2_i = normal_param(std=(2.0 /
                                      (self.hidden_dim + self.hidden_dim)),
                                 shape=(self.hidden_dim, self.hidden_dim))
        self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim, ))
        self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim, ))
        self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim, ))
        #  1 attention mechanism parameters
        self.W1 = normal_param(
            std=(2.0 / (self.hidden_dim + (4 * self.hidden_dim))),
            shape=(self.hidden_dim, (4 * self.hidden_dim) + 1))
        self.W2 = normal_param(std=(2.0 / (res + self.hidden_dim)),
                               shape=(res, self.hidden_dim))
        self.b1 = constant_param(value=0.0, shape=(self.hidden_dim, ))
        self.b2 = constant_param(value=0.0, shape=(res, ))
        self.Wb = normal_param(std=(2.0 / (self.hidden_dim + self.hidden_dim)),
                               shape=(self.hidden_dim, self.hidden_dim))
        #  2 attention mechanism parameters
        self.Wx = normal_param(std=(2.0 / (self.att_hid_dim +
                                           (2 * self.hidden_dim))),
                               shape=(self.att_hid_dim, 2 * self.hidden_dim))
        self.Wd = normal_param(std=(2.0 / (self.att_hid_dim +
                                           (2 * self.hidden_dim))),
                               shape=(self.att_hid_dim, 2 * self.hidden_dim))
        self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim, ))
        self.b_p = constant_param(value=0.0, shape=(res, ))
        self.Wp = normal_param(std=(2.0 / (res + self.att_hid_dim)),
                               shape=(res, self.att_hid_dim))

        spl = T.matrix()
        smin = T.matrix()
        tpl = T.matrix()
        tmin = T.matrix()
        scr = T.scalar()

        s_p, spl_updates = theano.scan(self.input_next_state,
                                       sequences=spl,
                                       outputs_info=T.zeros_like(self.b0_i))
        s_m, smin_updates = theano.scan(self.input_next_state,
                                        sequences=smin,
                                        outputs_info=T.zeros_like(self.b0_i))
        t_p, tpl_updates = theano.scan(self.input_next_state,
                                       sequences=tpl,
                                       outputs_info=T.zeros_like(self.b0_i))
        t_m, tmin_updates = theano.scan(self.input_next_state,
                                        sequences=tmin,
                                        outputs_info=T.zeros_like(self.b0_i))

        s_plus = s_p[-1]
        s_minus = s_m[-1]
        t_plus = t_p[-1]
        t_minus = t_m[-1]

        s = T.concatenate([s_plus, s_minus], axis=0)
        t = T.concatenate([t_plus, t_minus], axis=0)

        self.pred = self.attn_step_2(s, t)

        self.loss = (scr - self.pred) * (scr - self.pred)
        #self.loss = -(scr*T.log(self.pred)) - ((1-scr)*T.log(1-self.pred))	   #for binary class for QA

        self.params = [
            self.U0_i, self.W0_i, self.b0_i, self.U1_i, self.W1_i, self.b1_i,
            self.U2_i, self.W2_i, self.b2_i, self.Wx, self.Wd, self.b_h,
            self.b_p, self.Wp
        ]

        #self.loss = self.loss + 0.00003*l2_reg(self.params)
        updts = upd.adam(self.loss, self.params)

        self.train_fn = theano.function(inputs=[spl, smin, tpl, tmin, scr],
                                        outputs=[self.pred, self.loss],
                                        updates=updts)
        self.test_fn = theano.function(inputs=[spl, smin, tpl, tmin],
                                       outputs=self.pred)
Ejemplo n.º 8
0
    def build(self):
        print 'building rnn cell...'
        hidden_layer = None
        input = self.x
        self.params = []
        print range(1, len(self.nlayers) - 1)
        for i in range(1, len(self.nlayers) - 1):
            if self.type == 'sigmoid':
                hidden_layer = sigmoid_layer(input,
                                             self.nlayers[i - 1],
                                             self.nlayers[i],
                                             prefix='hid_' + str(i))
            elif self.type == 'relu':
                hidden_layer = relu_layer(input,
                                          self.nlayers[i - 1],
                                          self.nlayers[i],
                                          prefix='hid_' + str(i))
            elif self.type == 'selu':
                hidden_layer = selu_layer(input,
                                          self.nlayers[i - 1],
                                          self.nlayers[i],
                                          prefix='hid_' + str(i))
            # Dropout
            if self.p > 0:
                drop_mask = self.rng.binomial(
                    n=1,
                    p=1 - self.p,
                    size=hidden_layer.activation.shape,
                    dtype=theano.config.floatX)
                input = T.switch(self.is_train,
                                 hidden_layer.activation * drop_mask,
                                 hidden_layer.activation * (1 - self.p))
            else:
                input = T.switch(self.is_train, hidden_layer.activation,
                                 hidden_layer.activation)
            self.params += hidden_layer.params

        print 'building softmax output layer...'
        output_layer = softmax(input, self.nlayers[-2], self.nlayers[-1])
        self.params += output_layer.params
        cost = T.sum(
            T.nnet.categorical_crossentropy(output_layer.activation, self.y))
        acc = T.sum(T.eq(output_layer.predict, T.max(self.y, axis=-1)))

        lr = T.scalar("lr")
        gparams = [T.clip(T.grad(cost, p), -3, 3) for p in self.params]
        updates = None
        if self.optimizer == 'sgd':
            updates = sgd(self.params, gparams, lr)
        elif self.optimizer == 'adam':
            updates = adam(self.params, gparams, lr)
        elif self.optimizer == 'rmsprop':
            updates = rmsprop(params=self.params,
                              grads=gparams,
                              learning_rate=lr)

        self.train = theano.function(
            inputs=[self.x, self.y, lr],
            outputs=[cost, acc],
            updates=updates,
            givens={self.is_train: np.cast['int32'](1)})

        self.test = theano.function(
            inputs=[self.x],
            outputs=output_layer.predict,
            givens={self.is_train: np.cast['int32'](0)})
Ejemplo n.º 9
0
    def __init__(self, hid_dim, att_hid_dim, bptt_truncate=-1):
        self.hidden_dim = hid_dim
        self.bptt_truncate = bptt_truncate
        self.att_hid_dim = att_hid_dim
        """
		# input lstm parameters
		self.Ui = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.Uf = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.Uo = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.Ug = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.Wi = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.Wf = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.Wo = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.Wg = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.bi = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.bf = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.bo = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.bg = constant_param(value=0.0, shape=(self.hidden_dim,))	
		"""
        #gru sentence parameters
        self.U0_i = normal_param(std=0.006,
                                 shape=(self.hidden_dim, word_vector_size))
        self.U1_i = normal_param(std=0.006,
                                 shape=(self.hidden_dim, word_vector_size))
        self.U2_i = normal_param(std=0.006,
                                 shape=(self.hidden_dim, word_vector_size))
        self.W0_i = normal_param(std=0.01,
                                 shape=(self.hidden_dim, self.hidden_dim))
        self.W1_i = normal_param(std=0.01,
                                 shape=(self.hidden_dim, self.hidden_dim))
        self.W2_i = normal_param(std=0.01,
                                 shape=(self.hidden_dim, self.hidden_dim))
        self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim, ))
        self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim, ))
        self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim, ))
        #  1 attention mechanism parameters
        self.W1 = normal_param(std=0.0033,
                               shape=(self.hidden_dim,
                                      (4 * self.hidden_dim) + 1))
        self.W2 = normal_param(std=0.01, shape=(res, self.hidden_dim))
        self.b1 = constant_param(value=0.0, shape=(self.hidden_dim, ))
        self.b2 = constant_param(value=0.0, shape=(res, ))
        self.Wb = normal_param(std=0.008,
                               shape=(self.hidden_dim, self.hidden_dim))
        #  2 attention mechanism parameters
        self.Wx = normal_param(std=0.015,
                               shape=(self.att_hid_dim, self.hidden_dim))
        self.Wd = normal_param(std=0.015,
                               shape=(self.att_hid_dim, self.hidden_dim))
        self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim, ))
        self.b_p = constant_param(value=0.0, shape=(res, ))
        self.Wp = normal_param(std=0.05, shape=(res, self.att_hid_dim))

        q = T.matrix()
        a = T.matrix()
        t = T.vector()

        s_a, a_updates = theano.scan(self.input_next_state,
                                     sequences=a,
                                     outputs_info=T.zeros_like(self.b0_i))
        s_q, q_updates = theano.scan(self.input_next_state,
                                     sequences=q,
                                     outputs_info=T.zeros_like(self.b0_i))

        q_q = s_q[-1]
        a_a = s_a[-1]

        self.pred = self.attn_step_2(a_a, q_q)

        self.loss = self.kl_div(t, self.pred)

        self.params = [
            self.U0_i, self.W0_i, self.b0_i, self.U1_i, self.W1_i, self.b1_i,
            self.U2_i, self.W2_i, self.b2_i, self.Wx, self.Wd, self.b_h,
            self.b_p, self.Wp
        ]

        #self.loss = self.loss + 0.00003*l2_reg(self.params)
        updts = upd.adam(self.loss, self.params)

        self.train_fn = theano.function(inputs=[q, a, t],
                                        outputs=[self.pred, self.loss],
                                        updates=updts)
        self.test_fn = theano.function(inputs=[q, a], outputs=self.pred)
Ejemplo n.º 10
0
    def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000):
        ####################################### SETTINGS ###################################
        self.x_train = x_train
        self.x_test = x_test
        self.diff = diff
        self.batch_size = 100.
        self.learning_rate = theano.shared(np.float32(0.0008))
        self.momentum = 0.3
        self.performance = {"train": [], "test": []}
        self.inpt = T.ftensor4(name='input')
        self.df = T.fmatrix(name='differential')
        self.dim_z = dim_z
        self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z])))
        self.activation = relu
        self.generative = False
        self.out_distribution = False
        #self.y = T.matrix(name="y")
        self.in_filters = [5, 5, 5]
        self.filter_lengths = [10., 10., 10.]
        self.params = []
        #magic = 73888.
        self.magic = magic

        self.dropout_symbolic = T.fscalar()
        self.dropout_prob = theano.shared(np.float32(0.0))
        ####################################### LAYERS ######################################
        # LAYER 1 ##############################
        self.conv1 = one_d_conv_layer(self.inpt,
                                      self.in_filters[0],
                                      1,
                                      self.filter_lengths[0],
                                      param_names=["W1", 'b1'])
        self.params += self.conv1.params
        self.bn1 = batchnorm(self.conv1.output)
        self.nl1 = self.activation(self.bn1.X)
        self.maxpool1 = ds.max_pool_2d(self.nl1, [3, 1],
                                       st=[2, 1],
                                       ignore_border=False).astype(
                                           theano.config.floatX)
        self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic)
        #self.layer1_out = self.maxpool1
        # LAYER2 ################################
        self.flattened = T.flatten(self.layer1_out, outdim=2)
        # Variational Layer #####################
        self.latent_layer = variational_gauss_layer(self.flattened, self.magic,
                                                    dim_z)
        self.params += self.latent_layer.params
        self.latent_out = self.latent_layer.output
        # Hidden Layer #########################
        self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic)
        self.params += self.hidden_layer.params
        self.hid_out = dropout(
            self.activation(self.hidden_layer.output).reshape(
                (self.inpt.shape[0], self.in_filters[-1],
                 int(self.magic / self.in_filters[-1]), 1)),
            self.dropout_symbolic)
        # Devonvolutional 1 ######################
        self.deconv1 = one_d_deconv_layer(self.hid_out,
                                          1,
                                          self.in_filters[2],
                                          self.filter_lengths[2],
                                          pool=2.,
                                          param_names=["W3", 'b3'],
                                          distribution=False)
        self.params += self.deconv1.params
        #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic)
        self.tanh_out = self.deconv1.output
        self.last_layer = self.deconv1

        if self.out_distribution == True:
            self.trunk_sigma = self.last_layer.log_sigma[:, :, :self.inpt.
                                                         shape[2], :]
        self.trunc_output = self.tanh_out[:, :, :self.inpt.shape[2], :]

        ################################### FUNCTIONS ######################################################
        self.get_latent_states = theano.function(
            [self.inpt],
            self.latent_out,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior])
        #self.get_prior = theano.function([self.inpt],self.latent_layer.prior)
        #self.convolve1 = theano.function([self.inpt],self.layer1_out)
        #self.convolve2 = theano.function([self.inpt],self.layer2_out)
        self.output = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        self.get_flattened = theano.function(
            [self.inpt],
            self.flattened,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output)
        #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output)
        #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2))
        self.output = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
        self.generate_from_z = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob],
                    [self.latent_out, self.generative_z]])

        self.cost = self.MSE()
        self.mse = self.MSE()
        #self.likelihood = self.log_px_z()
        #self.get_cost = theano.function([self.inpt],[self.cost,self.mse])

        #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
        self.derivatives = T.grad(self.cost, self.params)
        #self.get_gradients = theano.function([self.inpt],self.derivatives)
        self.updates = adam(self.params, self.derivatives, self.learning_rate)
        #self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum)
        self.train_model = theano.function(
            inputs=[self.inpt, self.df],
            outputs=self.cost,
            updates=self.updates,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
Ejemplo n.º 11
0
	def __init__(self,x_train,dim_z=10,batch_size = 10,filter_no = [5.,5.,5.],filter_l = [10.,10.,10.],
		pooling_d=3,pooling_s=2,learning_rate = 0.0008,dim_y=None,y_train=None,diff=None,magic=5000):
		####################################### SETTINGS ###################################
		self.x_train = x_train
		self.y_train = y_train
		if y_train !=None:
			self.dim_y = dim_y
		self.diff=diff
		self.batch_size = batch_size
		self.learning_rate = theano.shared(np.float32(learning_rate))
		self.performance = {"train":[]}
		self.inpt = T.ftensor4(name='input')
		self.Y = T.fcol(name= 'label')
		self.df = T.fmatrix(name='differential')
		self.dim_z = dim_z
		self.magic =magic
		self.pooling_d = pooling_d
		self.pooling_s = pooling_s
		self.generative_z = theano.shared(np.float32(np.zeros([1,dim_z])))
		self.generative_hid = theano.shared(np.float32(np.zeros([1,magic])))
		self.activation =relu
		self.out_distribution=False
		self.in_filters = filter_l
		self.filter_lengths = filter_no
		self.params = []


		self.d_o_prob = theano.shared(np.float32(0.0))
		####################################### LAYERS ######################################
		# LAYER 1 ##############################
		self.conv1 = one_d_conv_layer(self.inpt,self.in_filters[0],1,self.filter_lengths[0],param_names = ["W1",'b1']) 
		self.params+=self.conv1.params
		self.bn1 = batchnorm(self.conv1.output)
		self.nl1 = self.activation(self.bn1.X)
		self.maxpool1 = ds.max_pool_2d(self.nl1,[self.pooling_d,1],st=[self.pooling_s,1],ignore_border = False).astype(theano.config.floatX)
		self.layer1_out = dropout(self.maxpool1,self.d_o_prob)
		self.flattened = T.flatten(self.layer1_out,outdim = 2)
		# Conditional +variational layer layer #####################
		if y_train != None:
			self.c_enc =hidden_layer(self.Y,1,self.dim_y)
			self.c_dec = hidden_layer(self.Y,1,self.dim_y,param_names = ["W10",'b10'])
			self.params+=self.c_enc.params
			self.params+=self.c_dec.params
			self.c_nl = self.activation(self.c_enc.output)
			self.c_nl_dec = self.activation(self.c_dec.output)
			self.concatenated = T.concatenate((self.flattened,self.c_nl),axis = 1)
			self.latent_layer = variational_gauss_layer(self.concatenated,self.magic+self.dim_y,dim_z)
		else:
			self.latent_layer = variational_gauss_layer(self.flattened,self.magic,dim_z)
		self.params+=self.latent_layer.params
		self.latent_out = self.latent_layer.output
		# Hidden Layer #########################
		if y_train!= None:
			self.dec_concat = T.concatenate((self.latent_out,self.c_nl_dec),axis = 1)
			self.hidden_layer = hidden_layer(self.dec_concat,self.dim_z+self.dim_y,self.magic)
		else:
			self.hidden_layer = hidden_layer(self.latent_out,dim_z,self.magic)
		self.params+=self.hidden_layer.params
		self.hid_out = dropout(self.activation(self.hidden_layer.output).reshape((self.inpt.shape[0],self.in_filters[-1],int(self.magic/self.in_filters[-1]),1)),self.d_o_prob)
		# Devonvolutional 1 ######################
		self.deconv1 = one_d_deconv_layer(self.hid_out,1,self.in_filters[2],self.filter_lengths[2],pool=self.pooling_d,param_names = ["W3",'b3'],distribution=False)
		self.params+=self.deconv1.params
		#self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic)
		self.tanh_out = self.deconv1.output
		self.last_layer = self.deconv1

		if self.out_distribution==True:
			self.trunk_sigma =  self.last_layer.log_sigma[:,:,:self.inpt.shape[2],:]
		self.trunc_output = self.tanh_out[:,:,:self.inpt.shape[2],:]
		self.cost = self.MSE()
		self.mse = self.MSE()
		#self.likelihood = self.log_px_z()
		#self.get_cost = theano.function([self.inpt],[self.cost,self.mse])

		#self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
		self.derivatives = T.grad(self.cost,self.params)
		#self.get_gradients = theano.function([self.inpt],self.derivatives)
		self.updates =adam(self.params,self.derivatives,self.learning_rate)
		
		################################### FUNCTIONS ######################################################
		#self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior])
		#self.get_prior = theano.function([self.inpt],self.latent_layer.prior)
		#self.convolve1 = theano.function([self.inpt],self.layer1_out)
		#self.convolve2 = theano.function([self.inpt],self.layer2_out)
		#self.deconvolve1 = theano.function([self.inpt],self.deconv1.output)
		#self.deconvolve2 = theano.function([self.inpt],self.deconv2.output)
		#self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2))
		#self.output = theano.function([self.inpt],self.trunc_output,givens=[[self.dropout_symbolic,self.dropout_prob]])
		#self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
		#self.get_cost = theano.function([self.inpt],[self.cost,self.mse])
		#self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
		#self.get_gradients = theano.function([self.inpt],self.derivatives)

		self.generate_from_hid = theano.function([self.inpt],self.trunc_output,givens = [[self.hidden_layer.output,self.generative_hid]])
		self.get_flattened = theano.function([self.inpt],self.flattened)
		if self.y_train!=None:
			self.generate_from_z = theano.function([self.inpt,self.Y],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
			self.train_model = theano.function(inputs = [self.inpt,self.df,self.Y],outputs = self.cost,updates = self.updates)
			self.get_latent_states = theano.function([self.inpt,self.Y],self.latent_out)
			self.get_c_enc = theano.function([self.Y],self.c_enc.output)
			self.output = theano.function([self.inpt,self.Y],self.trunc_output)
			self.get_concat = theano.function([self.inpt,self.Y],self.concatenated)
		else:
			self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
			self.train_model = theano.function(inputs = [self.inpt,self.df],outputs = self.cost,updates = self.updates)
			self.output = theano.function([self.inpt],self.trunc_output)
			self.get_latent_states = theano.function([self.inpt],self.latent_out)
Ejemplo n.º 12
0
	def __init__(self,hid_dim,att_hid_dim,bptt_truncate = -1):
		self.hidden_dim = hid_dim
		self.bptt_truncate = bptt_truncate
		self.att_hid_dim = att_hid_dim
		"""
		# input lstm parameters
		self.Ui = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.Uf = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.Uo = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.Ug = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.Wi = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.Wf = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.Wo = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.Wg = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.bi = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.bf = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.bo = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.bg = constant_param(value=0.0, shape=(self.hidden_dim,))	
		"""
		#gru sentence parameters
		self.U0_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.U1_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.U2_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size))
		self.W0_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.W1_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.W2_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim))
		self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim,))	
		#  1 attention mechanism parameters
		self.W1 = normal_param(std=0.0033, shape=(self.hidden_dim, (4*self.hidden_dim)+1))
		self.W2 = normal_param(std=0.01, shape=(res,self.hidden_dim))
		self.b1 = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b2 = constant_param(value=0.0, shape=(res,))
		self.Wb = normal_param(std=0.008, shape=(self.hidden_dim,self.hidden_dim))
		#  2 attention mechanism parameters
		self.Wx = normal_param(std=0.015, shape=(self.att_hid_dim, self.hidden_dim))
		self.Wd = normal_param(std=0.015, shape=(self.att_hid_dim, self.hidden_dim))
		self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim,))
		self.b_p = constant_param(value=0.0, shape=(res,))
		self.Wp = normal_param(std=0.05, shape=(res,self.att_hid_dim))

		q = T.matrix()
		a = T.matrix()
		t = T.vector()

		s_a,a_updates = theano.scan(self.input_next_state,sequences=a,outputs_info=T.zeros_like(self.b0_i))
		s_q,q_updates = theano.scan(self.input_next_state,sequences=q,outputs_info=T.zeros_like(self.b0_i))

		q_q = s_q[-1]
		a_a = s_a[-1]	

		self.pred = self.attn_step_2(a_a,q_q)	

		self.loss = self.kl_div(t,self.pred)

		self.params = [self.U0_i,self.W0_i,self.b0_i,
				self.U1_i,self.W1_i,self.b1_i,
				self.U2_i,self.W2_i,self.b2_i,
				self.Wx,self.Wd,self.b_h,self.b_p,self.Wp]

		#self.loss = self.loss + 0.00003*l2_reg(self.params)
		updts = upd.adam(self.loss,self.params)

		self.train_fn = theano.function(inputs = [q,a,t], outputs = [self.pred,self.loss], updates = updts)
		self.test_fn = theano.function(inputs = [q,a], outputs = self.pred)
Ejemplo n.º 13
0
	def __init__(self,f_match,f_decomp,filter_no):
		fm = f_match.split("-")
		self.f_match = fm[0]
		self.fm_win = 0
		if len(fm) != 1:
			self.fm_win = int(fm[1])
		self.f_decomp = f_decomp
		self.fn = filter_no

		s = T.tensor4()
		t = T.tensor4()
		scr = T.scalar()

		w_shp1 = (self.fn,2,3,word_vector_size)
		w_shp2 = (self.fn,2,2,word_vector_size)
		w_shp3 = (self.fn,2,1,word_vector_size)
		w_bound1 = 2*3*word_vector_size
		w_bound2 = 2*2*word_vector_size
		w_bound3 = 2*1*word_vector_size
		b_shp = (self.fn,)
		self.W1 = theano.shared( np.asarray(np.random.uniform(
							low=-1.0 / w_bound1,
							high=1.0 / w_bound1,
							size=w_shp1),
						    dtype=s.dtype), name ='W1')
		self.W2 = theano.shared( np.asarray(np.random.uniform(
							low=-1.0 / w_bound2,
							high=1.0 / w_bound2,
							size=w_shp2),
						    dtype=s.dtype), name ='W2')
		self.W3 = theano.shared( np.asarray(np.random.uniform(
							low=-1.0 / w_bound3,
							high=1.0 / w_bound3,
							size=w_shp3),
						    dtype=s.dtype), name ='W3')
		self.b1 = theano.shared(np.asarray(
						    np.random.uniform(low=-.5, high=.5, size=b_shp),
						    dtype=s.dtype), name ='b1')
		self.b2 = theano.shared(np.asarray(
						    np.random.uniform(low=-.5, high=.5, size=b_shp),
						    dtype=s.dtype), name ='b2')
		self.b3 = theano.shared(np.asarray(
						    np.random.uniform(low=-.5, high=.5, size=b_shp),
						    dtype=s.dtype), name ='b3')

		conv_out_s1 = conv2d(s,self.W1)
		output_s1 = T.tanh(conv_out_s1 + self.b1.dimshuffle('x', 0, 'x', 'x'))
		output_s1 = output_s1.reshape((output_s1.shape[1],output_s1.shape[2]))
		o_s1,os1_updates = theano.scan(self.max_pool,sequences = output_s1,outputs_info = None)
		conv_out_s2 = conv2d(s,self.W2)
		output_s2 = T.tanh(conv_out_s2 + self.b2.dimshuffle('x', 0, 'x', 'x'))
		output_s2 = output_s2.reshape((output_s2.shape[1],output_s2.shape[2]))
		o_s2,os2_updates = theano.scan(self.max_pool,sequences = output_s2,outputs_info = None)
		conv_out_s3 = conv2d(s,self.W3)
		output_s3 = T.tanh(conv_out_s3 + self.b3.dimshuffle('x', 0, 'x', 'x'))
		output_s3 = output_s3.reshape((output_s3.shape[1],output_s3.shape[2]))
		o_s3,os3_updates = theano.scan(self.max_pool,sequences = output_s3,outputs_info = None)
		self.o_s = T.concatenate([o_s1,o_s2,o_s3],axis=0)

		conv_out_t1 = conv2d(t,self.W1)
		output_t1 = T.tanh(conv_out_t1 + self.b1.dimshuffle('x', 0, 'x', 'x'))
		output_t1 = output_t1.reshape((output_t1.shape[1],output_t1.shape[2]))
		o_t1,ot1_updates = theano.scan(self.max_pool,sequences = output_t1,outputs_info = None)
		conv_out_t2 = conv2d(t,self.W2)
		output_t2 = T.tanh(conv_out_t2 + self.b2.dimshuffle('x', 0, 'x', 'x'))
		output_t2 = output_t2.reshape((output_t2.shape[1],output_t2.shape[2]))
		o_t2,ot2_updates = theano.scan(self.max_pool,sequences = output_t2,outputs_info = None)
		conv_out_t3 = conv2d(t,self.W3)
		output_t3 = T.tanh(conv_out_t3 + self.b3.dimshuffle('x', 0, 'x', 'x'))
		output_t3 = output_t3.reshape((output_t3.shape[1],output_t3.shape[2]))
		o_t3,ot3_updates = theano.scan(self.max_pool,sequences = output_t3,outputs_info = None)
		self.o_t = T.concatenate([o_t1,o_t2,o_t3],axis=0)

		sc,sc_updates = theano.scan(self.l1,sequences = [self.o_s,self.o_t],outputs_info = None)
		self.score = T.exp(T.sum(sc))	

		self.loss = (scr-self.score)*(scr-self.score)

		self.params = [self.W1,self.W2,self.W3,self.b1,self.b2,self.b3]

		#self.loss = self.loss + 0.00003*l2_reg(self.params)
		updts = upd.adam(self.loss,self.params)

		self.train_fn = theano.function(inputs = [s,t,scr], outputs = [self.score,self.loss], updates = updts)
		self.test_fn = theano.function(inputs = [s,t], outputs = self.score)
		self.f = theano.function([s,t], [self.o_s,self.o_t])
Ejemplo n.º 14
0
	def __init__(self,f_match,f_decomp,hidden_dim,att_hid_dim):
		fm = f_match.split("-")
		self.f_match = fm[0]
		self.fm_win = 0
		if len(fm) != 1:
			self.fm_win = int(fm[1])
		self.f_decomp = f_decomp
		self.hidden_dim = hidden_dim
		self.att_hid_dim = att_hid_dim
		
		#gru sentence parameters
		self.U0_i = normal_param(std=(2.0/(self.hidden_dim+word_vector_size)), shape=(self.hidden_dim, word_vector_size))
		self.U1_i = normal_param(std=(2.0/(self.hidden_dim+word_vector_size)), shape=(self.hidden_dim, word_vector_size))
		self.U2_i = normal_param(std=(2.0/(self.hidden_dim+word_vector_size)), shape=(self.hidden_dim, word_vector_size))
		self.W0_i = normal_param(std=(2.0/(self.hidden_dim+self.hidden_dim)), shape=(self.hidden_dim, self.hidden_dim))
		self.W1_i = normal_param(std=(2.0/(self.hidden_dim+self.hidden_dim)), shape=(self.hidden_dim, self.hidden_dim))
		self.W2_i = normal_param(std=(2.0/(self.hidden_dim+self.hidden_dim)), shape=(self.hidden_dim, self.hidden_dim))
		self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim,))
		#  1 attention mechanism parameters
		self.W1 = normal_param(std=(2.0/(self.hidden_dim+(4*self.hidden_dim))), shape=(self.hidden_dim, (4*self.hidden_dim)+1))
		self.W2 = normal_param(std=(2.0/(res+self.hidden_dim)), shape=(res,self.hidden_dim))
		self.b1 = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b2 = constant_param(value=0.0, shape=(res,))
		self.Wb = normal_param(std=(2.0/(self.hidden_dim+self.hidden_dim)), shape=(self.hidden_dim,self.hidden_dim))
		#  2 attention mechanism parameters
		self.Wx = normal_param(std=(2.0/(self.att_hid_dim+(2*self.hidden_dim))), shape=(self.att_hid_dim, 2*self.hidden_dim))
		self.Wd = normal_param(std=(2.0/(self.att_hid_dim+(2*self.hidden_dim))), shape=(self.att_hid_dim, 2*self.hidden_dim))
		self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim,))
		self.b_p = constant_param(value=0.0, shape=(res,))
		self.Wp = normal_param(std=(2.0/(res+self.att_hid_dim)), shape=(res,self.att_hid_dim))

		spl = T.matrix()
		smin = T.matrix()
		tpl = T.matrix()
		tmin = T.matrix()
		scr = T.vector()
		
		s_p,spl_updates = theano.scan(self.input_next_state,sequences=spl,outputs_info=T.zeros_like(self.b0_i))
		s_m,smin_updates = theano.scan(self.input_next_state,sequences=smin,outputs_info=T.zeros_like(self.b0_i))
		t_p,tpl_updates = theano.scan(self.input_next_state,sequences=tpl,outputs_info=T.zeros_like(self.b0_i))
		t_m,tmin_updates = theano.scan(self.input_next_state,sequences=tmin,outputs_info=T.zeros_like(self.b0_i))

		s_plus = s_p[-1]
		s_minus = s_m[-1]
		t_plus = t_p[-1]
		t_minus = t_m[-1]

		s = T.concatenate([s_plus,s_minus],axis = 0)
		t = T.concatenate([t_plus,t_minus],axis=0)

		self.pred = self.attn_step_2(s,t)	

		self.loss = self.kl_div(scr,self.pred)

		self.params = [self.U0_i,self.W0_i,self.b0_i,
				self.U1_i,self.W1_i,self.b1_i,
				self.U2_i,self.W2_i,self.b2_i,
				self.Wx,self.Wd,self.b_h,self.b_p,self.Wp]

		#self.loss = self.loss + 0.00003*l2_reg(self.params)
		updts = upd.adam(self.loss,self.params)

		self.train_fn = theano.function(inputs = [spl,smin,tpl,tmin,scr], outputs = [self.pred,self.loss], updates = updts)
		self.test_fn = theano.function(inputs = [spl,smin,tpl,tmin], outputs = self.pred)
Ejemplo n.º 15
0
	def __init__(self,f_match,f_decomp,filter_no,att_hid_dim):
		fm = f_match.split("-")
		self.f_match = fm[0]
		self.fm_win = 0
		if len(fm) != 1:
			self.fm_win = int(fm[1])
		self.f_decomp = f_decomp
		self.fn = filter_no
		self.att_hid_dim = att_hid_dim

		s = T.tensor4()
		t = T.tensor4()
		scr = T.vector()
		"""
		#  1 attention mechanism parameters
		self.W1 = normal_param(std=0.0033, shape=(self.hidden_dim, (4*self.hidden_dim)+1))
		self.W2 = normal_param(std=0.01, shape=(res,self.hidden_dim))
		self.b1 = constant_param(value=0.0, shape=(self.hidden_dim,))
		self.b2 = constant_param(value=0.0, shape=(res,))
		self.Wb = normal_param(std=0.008, shape=(self.hidden_dim,self.hidden_dim))
		"""
		#  2 attention mechanism parameters
		self.Wx = normal_param(std=(2.0/(self.att_hid_dim+(3*self.fn))), shape=(self.att_hid_dim, 3*self.fn))
		self.Wd = normal_param(std=(2.0/(self.att_hid_dim+(3*self.fn))), shape=(self.att_hid_dim, 3*self.fn))
		self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim,))
		self.b_p = constant_param(value=0.0, shape=(res,))
		self.Wp = normal_param(std=(2.0/(res+self.att_hid_dim)), shape=(res,self.att_hid_dim))

		w_shp1 = (self.fn,2,3,word_vector_size)
		w_shp2 = (self.fn,2,2,word_vector_size)
		w_shp3 = (self.fn,2,1,word_vector_size)
		w_bound1 = 2*3*word_vector_size
		w_bound2 = 2*2*word_vector_size
		w_bound3 = 2*1*word_vector_size
		b_shp = (self.fn,)
		self.W1 = theano.shared( np.asarray(np.random.uniform(
							low=-1.0 / w_bound1,
							high=1.0 / w_bound1,
							size=w_shp1),
						    dtype=s.dtype), name ='W1')
		self.W2 = theano.shared( np.asarray(np.random.uniform(
							low=-1.0 / w_bound2,
							high=1.0 / w_bound2,
							size=w_shp2),
						    dtype=s.dtype), name ='W2')
		self.W3 = theano.shared( np.asarray(np.random.uniform(
							low=-1.0 / w_bound3,
							high=1.0 / w_bound3,
							size=w_shp3),
						    dtype=s.dtype), name ='W3')
		self.b1 = theano.shared(np.asarray(
						    np.random.uniform(low=-.5, high=.5, size=b_shp),
						    dtype=s.dtype), name ='b1')
		self.b2 = theano.shared(np.asarray(
						    np.random.uniform(low=-.5, high=.5, size=b_shp),
						    dtype=s.dtype), name ='b2')
		self.b3 = theano.shared(np.asarray(
						    np.random.uniform(low=-.5, high=.5, size=b_shp),
						    dtype=s.dtype), name ='b3')

		conv_out_s1 = conv2d(s,self.W1)
		output_s1 = T.tanh(conv_out_s1 + self.b1.dimshuffle('x', 0, 'x', 'x'))
		output_s1 = output_s1.reshape((output_s1.shape[1],output_s1.shape[2]))
		o_s1,os1_updates = theano.scan(self.max_pool,sequences = output_s1,outputs_info = None)
		conv_out_s2 = conv2d(s,self.W2)
		output_s2 = T.tanh(conv_out_s2 + self.b2.dimshuffle('x', 0, 'x', 'x'))
		output_s2 = output_s2.reshape((output_s2.shape[1],output_s2.shape[2]))
		o_s2,os2_updates = theano.scan(self.max_pool,sequences = output_s2,outputs_info = None)
		conv_out_s3 = conv2d(s,self.W3)
		output_s3 = T.tanh(conv_out_s3 + self.b3.dimshuffle('x', 0, 'x', 'x'))
		output_s3 = output_s3.reshape((output_s3.shape[1],output_s3.shape[2]))
		o_s3,os3_updates = theano.scan(self.max_pool,sequences = output_s3,outputs_info = None)
		self.o_s = T.concatenate([o_s1,o_s2,o_s3],axis=0)

		conv_out_t1 = conv2d(t,self.W1)
		output_t1 = T.tanh(conv_out_t1 + self.b1.dimshuffle('x', 0, 'x', 'x'))
		output_t1 = output_t1.reshape((output_t1.shape[1],output_t1.shape[2]))
		o_t1,ot1_updates = theano.scan(self.max_pool,sequences = output_t1,outputs_info = None)
		conv_out_t2 = conv2d(t,self.W2)
		output_t2 = T.tanh(conv_out_t2 + self.b2.dimshuffle('x', 0, 'x', 'x'))
		output_t2 = output_t2.reshape((output_t2.shape[1],output_t2.shape[2]))
		o_t2,ot2_updates = theano.scan(self.max_pool,sequences = output_t2,outputs_info = None)
		conv_out_t3 = conv2d(t,self.W3)
		output_t3 = T.tanh(conv_out_t3 + self.b3.dimshuffle('x', 0, 'x', 'x'))
		output_t3 = output_t3.reshape((output_t3.shape[1],output_t3.shape[2]))
		o_t3,ot3_updates = theano.scan(self.max_pool,sequences = output_t3,outputs_info = None)
		self.o_t = T.concatenate([o_t1,o_t2,o_t3],axis=0)

		self.pred = self.attn_step_2(self.o_s,self.o_t)	
		self.loss = self.kl_div(scr,self.pred)	

		self.params = [self.Wx,self.Wd,self.b_h,self.b_p,self.Wp,
				self.W1,self.W2,self.W3,self.b1,self.b2,self.b3]

		#self.loss = self.loss + 0.00003*l2_reg(self.params)
		updts = upd.adam(self.loss,self.params)

		self.train_fn = theano.function(inputs = [s,t,scr], outputs = [self.pred,self.loss], updates = updts)
		self.test_fn = theano.function(inputs = [s,t], outputs = self.pred)
		self.f = theano.function([s,t], [self.o_s,self.o_t])