def __init__(self,hid_dim,bptt_truncate = -1): self.hidden_dim = hid_dim self.bptt_truncate = bptt_truncate # input lstm parameters self.Ui = normal_param(std=0.008, shape=(self.hidden_dim, word_vector_size)) self.Uf = normal_param(std=0.008, shape=(self.hidden_dim, word_vector_size)) self.Uo = normal_param(std=0.008, shape=(self.hidden_dim, word_vector_size)) self.Ug = normal_param(std=0.008, shape=(self.hidden_dim, word_vector_size)) self.Wi = normal_param(std=0.02, shape=(self.hidden_dim, self.hidden_dim)) self.Wf = normal_param(std=0.02, shape=(self.hidden_dim, self.hidden_dim)) self.Wo = normal_param(std=0.02, shape=(self.hidden_dim, self.hidden_dim)) self.Wg = normal_param(std=0.02, shape=(self.hidden_dim, self.hidden_dim)) self.bi = constant_param(value=0.0, shape=(self.hidden_dim,)) self.bf = constant_param(value=1.5, shape=(self.hidden_dim,)) self.bo = constant_param(value=0.0, shape=(self.hidden_dim,)) self.bg = constant_param(value=0.0, shape=(self.hidden_dim,)) """ #gru sentence parameters self.U0_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.U1_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.U2_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.W0_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W1_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W2_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim,)) """ q = T.matrix() a = T.matrix() t = T.scalar() s_a,a_updates = theano.scan(self.lstm_inp_next_state,sequences=a,outputs_info=[T.zeros_like(self.bi),T.zeros_like(self.bi)]) s_q,q_updates = theano.scan(self.lstm_inp_next_state,sequences=q,outputs_info=[T.zeros_like(self.bi),T.zeros_like(self.bi)]) q_q = s_q[1][-1] a_a = s_a[1][-1] scr,scr_updates = theano.scan(self.l1,sequences = [q_q,a_a],outputs_info = None) self.score = T.exp(T.sum(scr)) self.loss = (t-self.score)*(t-self.score) self.params = [self.Ui,self.Wi,self.bi, self.Uf,self.Wf,self.bf, self.Uo,self.Wo,self.bo, self.Ug,self.Wg,self.bg] #self.loss = self.loss + 0.00003*l2_reg(self.params) updts = upd.adam(self.loss,self.params) self.train_fn = theano.function(inputs = [q,a,t], outputs = [self.score,self.loss], updates = updts) self.test_fn = theano.function(inputs = [q,a], outputs = self.score)
def __init__(self,hid_dim,bptt_truncate = -1): self.hidden_dim = hid_dim self.bptt_truncate = bptt_truncate #gru sentence parameters self.U0_i = normal_param(std=0.01, shape=(self.hidden_dim, word_vector_size)) self.U1_i = normal_param(std=0.01, shape=(self.hidden_dim, word_vector_size)) self.U2_i = normal_param(std=0.01, shape=(self.hidden_dim, word_vector_size)) self.W0_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W1_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W2_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim,)) #attention mechanism parameters self.W1 = normal_param(std=0.0033, shape=(2*self.hidden_dim, (4*self.hidden_dim)+1)) self.W2 = normal_param(std=0.01, shape=(2,2*self.hidden_dim)) self.b1 = constant_param(value=0.0, shape=(2*self.hidden_dim,)) self.b2 = constant_param(value=0.0, shape=(2,)) self.Wb = normal_param(std=0.01, shape=(self.hidden_dim,self.hidden_dim)) q = T.matrix() a = T.matrix() t = T.iscalar() q = dropout(q,0.08) a = dropout(a,0.16) s_a,a_updates = theano.scan(self.input_next_state,sequences=a,outputs_info=T.zeros_like(self.b2_i)) s_q,q_updates = theano.scan(self.input_next_state,sequences=q,outputs_info=T.zeros_like(self.b2_i)) q_q = s_q[-1] a_a = s_a[-1] self.pred = self.attn_step(a_a,q_q) self.loss = T.mean(T.nnet.categorical_crossentropy(self.pred,T.stack([t]))) self.params = [self.U0_i,self.W0_i,self.b0_i, self.U1_i,self.W1_i,self.b1_i, self.U2_i,self.W2_i,self.b2_i, self.W1,self.W2,self.b1,self.b2,self.Wb] self.loss = self.loss + 0.00007*l2_reg(self.params) updts = upd.adam(self.loss,self.params) self.train_fn = theano.function(inputs = [q,a,t], outputs = [self.pred,self.loss], updates = updts) self.test_fn = theano.function(inputs = [q,a], outputs = self.pred)
def __init__(self,f_match,f_decomp,hidden_dim): fm = f_match.split("-") self.f_match = fm[0] self.fm_win = 0 if len(fm) != 1: self.fm_win = int(fm[1]) self.f_decomp = f_decomp self.hidden_dim = hidden_dim #gru sentence parameters self.U0_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.U1_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.U2_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.W0_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W1_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W2_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim,)) spl = T.matrix() smin = T.matrix() tpl = T.matrix() tmin = T.matrix() scr = T.scalar() s_plus,spl_updates = theano.scan(self.input_next_state,sequences=spl,outputs_info=T.zeros_like(self.b0_i)) s_minus,smin_updates = theano.scan(self.input_next_state,sequences=smin,outputs_info=T.zeros_like(self.b0_i)) t_plus,tpl_updates = theano.scan(self.input_next_state,sequences=tpl,outputs_info=T.zeros_like(self.b0_i)) t_minus,tmin_updates = theano.scan(self.input_next_state,sequences=tmin,outputs_info=T.zeros_like(self.b0_i)) s = T.concatenate([s_plus,s_minus],axis = 0) t = T.concatenate([t_plus,t_minus],axis=0) sc,sc_updates = theano.scan(self.l1,sequences = [s,t],outputs_info = None) self.score = T.exp(T.sum(sc)) self.loss = (scr-self.score)*(scr-self.score) self.params = [self.U0_i,self.W0_i,self.b0_i, self.U1_i,self.W1_i,self.b1_i, self.U2_i,self.W2_i,self.b2_i] #self.loss = self.loss + 0.00003*l2_reg(self.params) updts = upd.adam(self.loss,self.params) self.train_fn = theano.function(inputs = [spl,smin,tpl,tmin,scr], outputs = [self.score,self.loss], updates = updts) self.test_fn = theano.function(inputs = [spl,smin,tpl,tmin], outputs = self.score)
def __init__(self, hid_dim, bptt_truncate=-1): self.hidden_dim = hid_dim self.bptt_truncate = bptt_truncate #gru sentence parameters self.U0_i = normal_param(std=0.01, shape=(self.hidden_dim, word_vector_size)) self.U1_i = normal_param(std=0.01, shape=(self.hidden_dim, word_vector_size)) self.U2_i = normal_param(std=0.01, shape=(self.hidden_dim, word_vector_size)) self.W0_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W1_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W2_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim, )) self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim, )) self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim, )) #attention mechanism parameters self.W1 = normal_param(std=0.0033, shape=(2 * self.hidden_dim, (4 * self.hidden_dim) + 1)) self.W2 = normal_param(std=0.01, shape=(2, 2 * self.hidden_dim)) self.b1 = constant_param(value=0.0, shape=(2 * self.hidden_dim, )) self.b2 = constant_param(value=0.0, shape=(2, )) self.Wb = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) q = T.matrix() a = T.matrix() t = T.iscalar() q = dropout(q, 0.08) a = dropout(a, 0.16) s_a, a_updates = theano.scan(self.input_next_state, sequences=a, outputs_info=T.zeros_like(self.b2_i)) s_q, q_updates = theano.scan(self.input_next_state, sequences=q, outputs_info=T.zeros_like(self.b2_i)) q_q = s_q[-1] a_a = s_a[-1] self.pred = self.attn_step(a_a, q_q) self.loss = T.mean( T.nnet.categorical_crossentropy(self.pred, T.stack([t]))) self.params = [ self.U0_i, self.W0_i, self.b0_i, self.U1_i, self.W1_i, self.b1_i, self.U2_i, self.W2_i, self.b2_i, self.W1, self.W2, self.b1, self.b2, self.Wb ] self.loss = self.loss + 0.00007 * l2_reg(self.params) updts = upd.adam(self.loss, self.params) self.train_fn = theano.function(inputs=[q, a, t], outputs=[self.pred, self.loss], updates=updts) self.test_fn = theano.function(inputs=[q, a], outputs=self.pred)
def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000): ####################################### SETTINGS ################################### self.x_train = x_train self.x_test = x_test self.diff = diff self.batch_size = 100.0 self.learning_rate = theano.shared(np.float32(0.0008)) self.momentum = 0.3 self.performance = {"train": [], "test": []} self.inpt = T.ftensor4(name="input") self.df = T.fmatrix(name="differential") self.dim_z = dim_z self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z]))) self.activation = relu self.generative = False self.out_distribution = False # self.y = T.matrix(name="y") self.in_filters = [64, 64, 64] self.filter_lengths = [10.0, 10.0, 10.0] self.params = [] # magic = 73888. self.magic = magic self.dropout_symbolic = T.fscalar() self.dropout_prob = theano.shared(np.float32(0.0)) ####################################### LAYERS ###################################### # LAYER 1 ############################## self.conv1 = one_d_conv_layer( self.inpt, self.in_filters[0], 1, self.filter_lengths[0], param_names=["W1", "b1"] ) self.params += self.conv1.params self.bn1 = batchnorm(self.conv1.output) self.nl1 = self.activation(self.bn1.X) self.maxpool1 = pool_2d(self.nl1, [3, 1], stride=[2, 1], mode="average_exc_pad").astype(theano.config.floatX) self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic) # self.layer1_out = self.maxpool1 # LAYER2 ################################ self.flattened = T.flatten(self.layer1_out, outdim=2) # Variational Layer ##################### self.latent_layer = variational_gauss_layer(self.flattened, self.magic, dim_z) self.params += self.latent_layer.params self.latent_out = self.latent_layer.output # Hidden Layer ######################### self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic) self.params += self.hidden_layer.params self.hid_out = dropout( self.activation(self.hidden_layer.output).reshape( (self.inpt.shape[0], self.in_filters[-1], int(self.magic / self.in_filters[-1]), 1) ), self.dropout_symbolic, ) # Devonvolutional 1 ###################### self.deconv1 = one_d_deconv_layer( self.hid_out, 1, self.in_filters[2], self.filter_lengths[2], pool=2.0, param_names=["W3", "b3"], distribution=False, ) self.params += self.deconv1.params # self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic) self.tanh_out = self.deconv1.output self.last_layer = self.deconv1 if self.out_distribution == True: self.trunk_sigma = self.last_layer.log_sigma[:, :, : self.inpt.shape[2], :] self.trunc_output = self.tanh_out[:, :, : self.inpt.shape[2], :] ################################### FUNCTIONS ###################################################### self.get_latent_states = theano.function( [self.inpt], self.latent_out, givens=[[self.dropout_symbolic, self.dropout_prob]] ) # self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior]) # self.get_prior = theano.function([self.inpt],self.latent_layer.prior) # self.convolve1 = theano.function([self.inpt],self.layer1_out) # self.convolve2 = theano.function([self.inpt],self.layer2_out) self.output = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]] ) self.get_flattened = theano.function( [self.inpt], self.flattened, givens=[[self.dropout_symbolic, self.dropout_prob]] ) # self.deconvolve1 = theano.function([self.inpt],self.deconv1.output) # self.deconvolve2 = theano.function([self.inpt],self.deconv2.output) # self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2)) self.output = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]] ) # self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) self.generate_from_z = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob], [self.latent_out, self.generative_z]], ) self.cost = self.MSE() self.mse = self.MSE() # self.likelihood = self.log_px_z() # self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) # self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) self.derivatives = T.grad(self.cost, self.params) # self.get_gradients = theano.function([self.inpt],self.derivatives) self.updates = adam(self.params, self.derivatives, self.learning_rate) # self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum) self.train_model = theano.function( inputs=[self.inpt, self.df], outputs=self.cost, updates=self.updates, givens=[[self.dropout_symbolic, self.dropout_prob]], )
def __init__(self, f_match, f_decomp, filter_no, att_hid_dim): fm = f_match.split("-") self.f_match = fm[0] self.fm_win = 0 if len(fm) != 1: self.fm_win = int(fm[1]) self.f_decomp = f_decomp self.fn = filter_no self.att_hid_dim = att_hid_dim s = T.tensor4() t = T.tensor4() scr = T.vector() """ # 1 attention mechanism parameters self.W1 = normal_param(std=0.0033, shape=(self.hidden_dim, (4*self.hidden_dim)+1)) self.W2 = normal_param(std=0.01, shape=(res,self.hidden_dim)) self.b1 = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b2 = constant_param(value=0.0, shape=(res,)) self.Wb = normal_param(std=0.008, shape=(self.hidden_dim,self.hidden_dim)) """ # 2 attention mechanism parameters self.Wx = normal_param(std=(2.0 / (self.att_hid_dim + (3 * self.fn))), shape=(self.att_hid_dim, 3 * self.fn)) self.Wd = normal_param(std=(2.0 / (self.att_hid_dim + (3 * self.fn))), shape=(self.att_hid_dim, 3 * self.fn)) self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim, )) self.b_p = constant_param(value=0.0, shape=(res, )) self.Wp = normal_param(std=(2.0 / (res + self.att_hid_dim)), shape=(res, self.att_hid_dim)) w_shp1 = (self.fn, 2, 3, word_vector_size) w_shp2 = (self.fn, 2, 2, word_vector_size) w_shp3 = (self.fn, 2, 1, word_vector_size) w_bound1 = 2 * 3 * word_vector_size w_bound2 = 2 * 2 * word_vector_size w_bound3 = 2 * 1 * word_vector_size b_shp = (self.fn, ) self.W1 = theano.shared(np.asarray(np.random.uniform( low=-1.0 / w_bound1, high=1.0 / w_bound1, size=w_shp1), dtype=s.dtype), name='W1') self.W2 = theano.shared(np.asarray(np.random.uniform( low=-1.0 / w_bound2, high=1.0 / w_bound2, size=w_shp2), dtype=s.dtype), name='W2') self.W3 = theano.shared(np.asarray(np.random.uniform( low=-1.0 / w_bound3, high=1.0 / w_bound3, size=w_shp3), dtype=s.dtype), name='W3') self.b1 = theano.shared(np.asarray(np.random.uniform(low=-.5, high=.5, size=b_shp), dtype=s.dtype), name='b1') self.b2 = theano.shared(np.asarray(np.random.uniform(low=-.5, high=.5, size=b_shp), dtype=s.dtype), name='b2') self.b3 = theano.shared(np.asarray(np.random.uniform(low=-.5, high=.5, size=b_shp), dtype=s.dtype), name='b3') conv_out_s1 = conv2d(s, self.W1) output_s1 = T.tanh(conv_out_s1 + self.b1.dimshuffle('x', 0, 'x', 'x')) output_s1 = output_s1.reshape((output_s1.shape[1], output_s1.shape[2])) o_s1, os1_updates = theano.scan(self.max_pool, sequences=output_s1, outputs_info=None) conv_out_s2 = conv2d(s, self.W2) output_s2 = T.tanh(conv_out_s2 + self.b2.dimshuffle('x', 0, 'x', 'x')) output_s2 = output_s2.reshape((output_s2.shape[1], output_s2.shape[2])) o_s2, os2_updates = theano.scan(self.max_pool, sequences=output_s2, outputs_info=None) conv_out_s3 = conv2d(s, self.W3) output_s3 = T.tanh(conv_out_s3 + self.b3.dimshuffle('x', 0, 'x', 'x')) output_s3 = output_s3.reshape((output_s3.shape[1], output_s3.shape[2])) o_s3, os3_updates = theano.scan(self.max_pool, sequences=output_s3, outputs_info=None) self.o_s = T.concatenate([o_s1, o_s2, o_s3], axis=0) conv_out_t1 = conv2d(t, self.W1) output_t1 = T.tanh(conv_out_t1 + self.b1.dimshuffle('x', 0, 'x', 'x')) output_t1 = output_t1.reshape((output_t1.shape[1], output_t1.shape[2])) o_t1, ot1_updates = theano.scan(self.max_pool, sequences=output_t1, outputs_info=None) conv_out_t2 = conv2d(t, self.W2) output_t2 = T.tanh(conv_out_t2 + self.b2.dimshuffle('x', 0, 'x', 'x')) output_t2 = output_t2.reshape((output_t2.shape[1], output_t2.shape[2])) o_t2, ot2_updates = theano.scan(self.max_pool, sequences=output_t2, outputs_info=None) conv_out_t3 = conv2d(t, self.W3) output_t3 = T.tanh(conv_out_t3 + self.b3.dimshuffle('x', 0, 'x', 'x')) output_t3 = output_t3.reshape((output_t3.shape[1], output_t3.shape[2])) o_t3, ot3_updates = theano.scan(self.max_pool, sequences=output_t3, outputs_info=None) self.o_t = T.concatenate([o_t1, o_t2, o_t3], axis=0) self.pred = self.attn_step_2(self.o_s, self.o_t) self.loss = self.kl_div(scr, self.pred) self.params = [ self.Wx, self.Wd, self.b_h, self.b_p, self.Wp, self.W1, self.W2, self.W3, self.b1, self.b2, self.b3 ] #self.loss = self.loss + 0.00003*l2_reg(self.params) updts = upd.adam(self.loss, self.params) self.train_fn = theano.function(inputs=[s, t, scr], outputs=[self.pred, self.loss], updates=updts) self.test_fn = theano.function(inputs=[s, t], outputs=self.pred) self.f = theano.function([s, t], [self.o_s, self.o_t])
def __init__(self, f_match, f_decomp, hidden_dim, att_hid_dim): fm = f_match.split("-") self.f_match = fm[0] self.fm_win = 0 if len(fm) != 1: self.fm_win = int(fm[1]) self.f_decomp = f_decomp self.hidden_dim = hidden_dim self.att_hid_dim = att_hid_dim #gru sentence parameters self.U0_i = normal_param(std=(2.0 / (self.hidden_dim + word_vector_size)), shape=(self.hidden_dim, word_vector_size)) self.U1_i = normal_param(std=(2.0 / (self.hidden_dim + word_vector_size)), shape=(self.hidden_dim, word_vector_size)) self.U2_i = normal_param(std=(2.0 / (self.hidden_dim + word_vector_size)), shape=(self.hidden_dim, word_vector_size)) self.W0_i = normal_param(std=(2.0 / (self.hidden_dim + self.hidden_dim)), shape=(self.hidden_dim, self.hidden_dim)) self.W1_i = normal_param(std=(2.0 / (self.hidden_dim + self.hidden_dim)), shape=(self.hidden_dim, self.hidden_dim)) self.W2_i = normal_param(std=(2.0 / (self.hidden_dim + self.hidden_dim)), shape=(self.hidden_dim, self.hidden_dim)) self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim, )) self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim, )) self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim, )) # 1 attention mechanism parameters self.W1 = normal_param( std=(2.0 / (self.hidden_dim + (4 * self.hidden_dim))), shape=(self.hidden_dim, (4 * self.hidden_dim) + 1)) self.W2 = normal_param(std=(2.0 / (res + self.hidden_dim)), shape=(res, self.hidden_dim)) self.b1 = constant_param(value=0.0, shape=(self.hidden_dim, )) self.b2 = constant_param(value=0.0, shape=(res, )) self.Wb = normal_param(std=(2.0 / (self.hidden_dim + self.hidden_dim)), shape=(self.hidden_dim, self.hidden_dim)) # 2 attention mechanism parameters self.Wx = normal_param(std=(2.0 / (self.att_hid_dim + (2 * self.hidden_dim))), shape=(self.att_hid_dim, 2 * self.hidden_dim)) self.Wd = normal_param(std=(2.0 / (self.att_hid_dim + (2 * self.hidden_dim))), shape=(self.att_hid_dim, 2 * self.hidden_dim)) self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim, )) self.b_p = constant_param(value=0.0, shape=(res, )) self.Wp = normal_param(std=(2.0 / (res + self.att_hid_dim)), shape=(res, self.att_hid_dim)) spl = T.matrix() smin = T.matrix() tpl = T.matrix() tmin = T.matrix() scr = T.scalar() s_p, spl_updates = theano.scan(self.input_next_state, sequences=spl, outputs_info=T.zeros_like(self.b0_i)) s_m, smin_updates = theano.scan(self.input_next_state, sequences=smin, outputs_info=T.zeros_like(self.b0_i)) t_p, tpl_updates = theano.scan(self.input_next_state, sequences=tpl, outputs_info=T.zeros_like(self.b0_i)) t_m, tmin_updates = theano.scan(self.input_next_state, sequences=tmin, outputs_info=T.zeros_like(self.b0_i)) s_plus = s_p[-1] s_minus = s_m[-1] t_plus = t_p[-1] t_minus = t_m[-1] s = T.concatenate([s_plus, s_minus], axis=0) t = T.concatenate([t_plus, t_minus], axis=0) self.pred = self.attn_step_2(s, t) self.loss = (scr - self.pred) * (scr - self.pred) #self.loss = -(scr*T.log(self.pred)) - ((1-scr)*T.log(1-self.pred)) #for binary class for QA self.params = [ self.U0_i, self.W0_i, self.b0_i, self.U1_i, self.W1_i, self.b1_i, self.U2_i, self.W2_i, self.b2_i, self.Wx, self.Wd, self.b_h, self.b_p, self.Wp ] #self.loss = self.loss + 0.00003*l2_reg(self.params) updts = upd.adam(self.loss, self.params) self.train_fn = theano.function(inputs=[spl, smin, tpl, tmin, scr], outputs=[self.pred, self.loss], updates=updts) self.test_fn = theano.function(inputs=[spl, smin, tpl, tmin], outputs=self.pred)
def build(self): print 'building rnn cell...' hidden_layer = None input = self.x self.params = [] print range(1, len(self.nlayers) - 1) for i in range(1, len(self.nlayers) - 1): if self.type == 'sigmoid': hidden_layer = sigmoid_layer(input, self.nlayers[i - 1], self.nlayers[i], prefix='hid_' + str(i)) elif self.type == 'relu': hidden_layer = relu_layer(input, self.nlayers[i - 1], self.nlayers[i], prefix='hid_' + str(i)) elif self.type == 'selu': hidden_layer = selu_layer(input, self.nlayers[i - 1], self.nlayers[i], prefix='hid_' + str(i)) # Dropout if self.p > 0: drop_mask = self.rng.binomial( n=1, p=1 - self.p, size=hidden_layer.activation.shape, dtype=theano.config.floatX) input = T.switch(self.is_train, hidden_layer.activation * drop_mask, hidden_layer.activation * (1 - self.p)) else: input = T.switch(self.is_train, hidden_layer.activation, hidden_layer.activation) self.params += hidden_layer.params print 'building softmax output layer...' output_layer = softmax(input, self.nlayers[-2], self.nlayers[-1]) self.params += output_layer.params cost = T.sum( T.nnet.categorical_crossentropy(output_layer.activation, self.y)) acc = T.sum(T.eq(output_layer.predict, T.max(self.y, axis=-1))) lr = T.scalar("lr") gparams = [T.clip(T.grad(cost, p), -3, 3) for p in self.params] updates = None if self.optimizer == 'sgd': updates = sgd(self.params, gparams, lr) elif self.optimizer == 'adam': updates = adam(self.params, gparams, lr) elif self.optimizer == 'rmsprop': updates = rmsprop(params=self.params, grads=gparams, learning_rate=lr) self.train = theano.function( inputs=[self.x, self.y, lr], outputs=[cost, acc], updates=updates, givens={self.is_train: np.cast['int32'](1)}) self.test = theano.function( inputs=[self.x], outputs=output_layer.predict, givens={self.is_train: np.cast['int32'](0)})
def __init__(self, hid_dim, att_hid_dim, bptt_truncate=-1): self.hidden_dim = hid_dim self.bptt_truncate = bptt_truncate self.att_hid_dim = att_hid_dim """ # input lstm parameters self.Ui = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.Uf = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.Uo = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.Ug = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.Wi = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.Wf = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.Wo = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.Wg = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.bi = constant_param(value=0.0, shape=(self.hidden_dim,)) self.bf = constant_param(value=0.0, shape=(self.hidden_dim,)) self.bo = constant_param(value=0.0, shape=(self.hidden_dim,)) self.bg = constant_param(value=0.0, shape=(self.hidden_dim,)) """ #gru sentence parameters self.U0_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.U1_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.U2_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.W0_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W1_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W2_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim, )) self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim, )) self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim, )) # 1 attention mechanism parameters self.W1 = normal_param(std=0.0033, shape=(self.hidden_dim, (4 * self.hidden_dim) + 1)) self.W2 = normal_param(std=0.01, shape=(res, self.hidden_dim)) self.b1 = constant_param(value=0.0, shape=(self.hidden_dim, )) self.b2 = constant_param(value=0.0, shape=(res, )) self.Wb = normal_param(std=0.008, shape=(self.hidden_dim, self.hidden_dim)) # 2 attention mechanism parameters self.Wx = normal_param(std=0.015, shape=(self.att_hid_dim, self.hidden_dim)) self.Wd = normal_param(std=0.015, shape=(self.att_hid_dim, self.hidden_dim)) self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim, )) self.b_p = constant_param(value=0.0, shape=(res, )) self.Wp = normal_param(std=0.05, shape=(res, self.att_hid_dim)) q = T.matrix() a = T.matrix() t = T.vector() s_a, a_updates = theano.scan(self.input_next_state, sequences=a, outputs_info=T.zeros_like(self.b0_i)) s_q, q_updates = theano.scan(self.input_next_state, sequences=q, outputs_info=T.zeros_like(self.b0_i)) q_q = s_q[-1] a_a = s_a[-1] self.pred = self.attn_step_2(a_a, q_q) self.loss = self.kl_div(t, self.pred) self.params = [ self.U0_i, self.W0_i, self.b0_i, self.U1_i, self.W1_i, self.b1_i, self.U2_i, self.W2_i, self.b2_i, self.Wx, self.Wd, self.b_h, self.b_p, self.Wp ] #self.loss = self.loss + 0.00003*l2_reg(self.params) updts = upd.adam(self.loss, self.params) self.train_fn = theano.function(inputs=[q, a, t], outputs=[self.pred, self.loss], updates=updts) self.test_fn = theano.function(inputs=[q, a], outputs=self.pred)
def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000): ####################################### SETTINGS ################################### self.x_train = x_train self.x_test = x_test self.diff = diff self.batch_size = 100. self.learning_rate = theano.shared(np.float32(0.0008)) self.momentum = 0.3 self.performance = {"train": [], "test": []} self.inpt = T.ftensor4(name='input') self.df = T.fmatrix(name='differential') self.dim_z = dim_z self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z]))) self.activation = relu self.generative = False self.out_distribution = False #self.y = T.matrix(name="y") self.in_filters = [5, 5, 5] self.filter_lengths = [10., 10., 10.] self.params = [] #magic = 73888. self.magic = magic self.dropout_symbolic = T.fscalar() self.dropout_prob = theano.shared(np.float32(0.0)) ####################################### LAYERS ###################################### # LAYER 1 ############################## self.conv1 = one_d_conv_layer(self.inpt, self.in_filters[0], 1, self.filter_lengths[0], param_names=["W1", 'b1']) self.params += self.conv1.params self.bn1 = batchnorm(self.conv1.output) self.nl1 = self.activation(self.bn1.X) self.maxpool1 = ds.max_pool_2d(self.nl1, [3, 1], st=[2, 1], ignore_border=False).astype( theano.config.floatX) self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic) #self.layer1_out = self.maxpool1 # LAYER2 ################################ self.flattened = T.flatten(self.layer1_out, outdim=2) # Variational Layer ##################### self.latent_layer = variational_gauss_layer(self.flattened, self.magic, dim_z) self.params += self.latent_layer.params self.latent_out = self.latent_layer.output # Hidden Layer ######################### self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic) self.params += self.hidden_layer.params self.hid_out = dropout( self.activation(self.hidden_layer.output).reshape( (self.inpt.shape[0], self.in_filters[-1], int(self.magic / self.in_filters[-1]), 1)), self.dropout_symbolic) # Devonvolutional 1 ###################### self.deconv1 = one_d_deconv_layer(self.hid_out, 1, self.in_filters[2], self.filter_lengths[2], pool=2., param_names=["W3", 'b3'], distribution=False) self.params += self.deconv1.params #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic) self.tanh_out = self.deconv1.output self.last_layer = self.deconv1 if self.out_distribution == True: self.trunk_sigma = self.last_layer.log_sigma[:, :, :self.inpt. shape[2], :] self.trunc_output = self.tanh_out[:, :, :self.inpt.shape[2], :] ################################### FUNCTIONS ###################################################### self.get_latent_states = theano.function( [self.inpt], self.latent_out, givens=[[self.dropout_symbolic, self.dropout_prob]]) #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior]) #self.get_prior = theano.function([self.inpt],self.latent_layer.prior) #self.convolve1 = theano.function([self.inpt],self.layer1_out) #self.convolve2 = theano.function([self.inpt],self.layer2_out) self.output = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]]) self.get_flattened = theano.function( [self.inpt], self.flattened, givens=[[self.dropout_symbolic, self.dropout_prob]]) #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output) #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output) #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2)) self.output = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob]]) #self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) self.generate_from_z = theano.function( [self.inpt], self.trunc_output, givens=[[self.dropout_symbolic, self.dropout_prob], [self.latent_out, self.generative_z]]) self.cost = self.MSE() self.mse = self.MSE() #self.likelihood = self.log_px_z() #self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) self.derivatives = T.grad(self.cost, self.params) #self.get_gradients = theano.function([self.inpt],self.derivatives) self.updates = adam(self.params, self.derivatives, self.learning_rate) #self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum) self.train_model = theano.function( inputs=[self.inpt, self.df], outputs=self.cost, updates=self.updates, givens=[[self.dropout_symbolic, self.dropout_prob]])
def __init__(self,x_train,dim_z=10,batch_size = 10,filter_no = [5.,5.,5.],filter_l = [10.,10.,10.], pooling_d=3,pooling_s=2,learning_rate = 0.0008,dim_y=None,y_train=None,diff=None,magic=5000): ####################################### SETTINGS ################################### self.x_train = x_train self.y_train = y_train if y_train !=None: self.dim_y = dim_y self.diff=diff self.batch_size = batch_size self.learning_rate = theano.shared(np.float32(learning_rate)) self.performance = {"train":[]} self.inpt = T.ftensor4(name='input') self.Y = T.fcol(name= 'label') self.df = T.fmatrix(name='differential') self.dim_z = dim_z self.magic =magic self.pooling_d = pooling_d self.pooling_s = pooling_s self.generative_z = theano.shared(np.float32(np.zeros([1,dim_z]))) self.generative_hid = theano.shared(np.float32(np.zeros([1,magic]))) self.activation =relu self.out_distribution=False self.in_filters = filter_l self.filter_lengths = filter_no self.params = [] self.d_o_prob = theano.shared(np.float32(0.0)) ####################################### LAYERS ###################################### # LAYER 1 ############################## self.conv1 = one_d_conv_layer(self.inpt,self.in_filters[0],1,self.filter_lengths[0],param_names = ["W1",'b1']) self.params+=self.conv1.params self.bn1 = batchnorm(self.conv1.output) self.nl1 = self.activation(self.bn1.X) self.maxpool1 = ds.max_pool_2d(self.nl1,[self.pooling_d,1],st=[self.pooling_s,1],ignore_border = False).astype(theano.config.floatX) self.layer1_out = dropout(self.maxpool1,self.d_o_prob) self.flattened = T.flatten(self.layer1_out,outdim = 2) # Conditional +variational layer layer ##################### if y_train != None: self.c_enc =hidden_layer(self.Y,1,self.dim_y) self.c_dec = hidden_layer(self.Y,1,self.dim_y,param_names = ["W10",'b10']) self.params+=self.c_enc.params self.params+=self.c_dec.params self.c_nl = self.activation(self.c_enc.output) self.c_nl_dec = self.activation(self.c_dec.output) self.concatenated = T.concatenate((self.flattened,self.c_nl),axis = 1) self.latent_layer = variational_gauss_layer(self.concatenated,self.magic+self.dim_y,dim_z) else: self.latent_layer = variational_gauss_layer(self.flattened,self.magic,dim_z) self.params+=self.latent_layer.params self.latent_out = self.latent_layer.output # Hidden Layer ######################### if y_train!= None: self.dec_concat = T.concatenate((self.latent_out,self.c_nl_dec),axis = 1) self.hidden_layer = hidden_layer(self.dec_concat,self.dim_z+self.dim_y,self.magic) else: self.hidden_layer = hidden_layer(self.latent_out,dim_z,self.magic) self.params+=self.hidden_layer.params self.hid_out = dropout(self.activation(self.hidden_layer.output).reshape((self.inpt.shape[0],self.in_filters[-1],int(self.magic/self.in_filters[-1]),1)),self.d_o_prob) # Devonvolutional 1 ###################### self.deconv1 = one_d_deconv_layer(self.hid_out,1,self.in_filters[2],self.filter_lengths[2],pool=self.pooling_d,param_names = ["W3",'b3'],distribution=False) self.params+=self.deconv1.params #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic) self.tanh_out = self.deconv1.output self.last_layer = self.deconv1 if self.out_distribution==True: self.trunk_sigma = self.last_layer.log_sigma[:,:,:self.inpt.shape[2],:] self.trunc_output = self.tanh_out[:,:,:self.inpt.shape[2],:] self.cost = self.MSE() self.mse = self.MSE() #self.likelihood = self.log_px_z() #self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) self.derivatives = T.grad(self.cost,self.params) #self.get_gradients = theano.function([self.inpt],self.derivatives) self.updates =adam(self.params,self.derivatives,self.learning_rate) ################################### FUNCTIONS ###################################################### #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior]) #self.get_prior = theano.function([self.inpt],self.latent_layer.prior) #self.convolve1 = theano.function([self.inpt],self.layer1_out) #self.convolve2 = theano.function([self.inpt],self.layer2_out) #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output) #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output) #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2)) #self.output = theano.function([self.inpt],self.trunc_output,givens=[[self.dropout_symbolic,self.dropout_prob]]) #self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) #self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) #self.get_gradients = theano.function([self.inpt],self.derivatives) self.generate_from_hid = theano.function([self.inpt],self.trunc_output,givens = [[self.hidden_layer.output,self.generative_hid]]) self.get_flattened = theano.function([self.inpt],self.flattened) if self.y_train!=None: self.generate_from_z = theano.function([self.inpt,self.Y],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) self.train_model = theano.function(inputs = [self.inpt,self.df,self.Y],outputs = self.cost,updates = self.updates) self.get_latent_states = theano.function([self.inpt,self.Y],self.latent_out) self.get_c_enc = theano.function([self.Y],self.c_enc.output) self.output = theano.function([self.inpt,self.Y],self.trunc_output) self.get_concat = theano.function([self.inpt,self.Y],self.concatenated) else: self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) self.train_model = theano.function(inputs = [self.inpt,self.df],outputs = self.cost,updates = self.updates) self.output = theano.function([self.inpt],self.trunc_output) self.get_latent_states = theano.function([self.inpt],self.latent_out)
def __init__(self,hid_dim,att_hid_dim,bptt_truncate = -1): self.hidden_dim = hid_dim self.bptt_truncate = bptt_truncate self.att_hid_dim = att_hid_dim """ # input lstm parameters self.Ui = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.Uf = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.Uo = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.Ug = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.Wi = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.Wf = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.Wo = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.Wg = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.bi = constant_param(value=0.0, shape=(self.hidden_dim,)) self.bf = constant_param(value=0.0, shape=(self.hidden_dim,)) self.bo = constant_param(value=0.0, shape=(self.hidden_dim,)) self.bg = constant_param(value=0.0, shape=(self.hidden_dim,)) """ #gru sentence parameters self.U0_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.U1_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.U2_i = normal_param(std=0.006, shape=(self.hidden_dim, word_vector_size)) self.W0_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W1_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.W2_i = normal_param(std=0.01, shape=(self.hidden_dim, self.hidden_dim)) self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim,)) # 1 attention mechanism parameters self.W1 = normal_param(std=0.0033, shape=(self.hidden_dim, (4*self.hidden_dim)+1)) self.W2 = normal_param(std=0.01, shape=(res,self.hidden_dim)) self.b1 = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b2 = constant_param(value=0.0, shape=(res,)) self.Wb = normal_param(std=0.008, shape=(self.hidden_dim,self.hidden_dim)) # 2 attention mechanism parameters self.Wx = normal_param(std=0.015, shape=(self.att_hid_dim, self.hidden_dim)) self.Wd = normal_param(std=0.015, shape=(self.att_hid_dim, self.hidden_dim)) self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim,)) self.b_p = constant_param(value=0.0, shape=(res,)) self.Wp = normal_param(std=0.05, shape=(res,self.att_hid_dim)) q = T.matrix() a = T.matrix() t = T.vector() s_a,a_updates = theano.scan(self.input_next_state,sequences=a,outputs_info=T.zeros_like(self.b0_i)) s_q,q_updates = theano.scan(self.input_next_state,sequences=q,outputs_info=T.zeros_like(self.b0_i)) q_q = s_q[-1] a_a = s_a[-1] self.pred = self.attn_step_2(a_a,q_q) self.loss = self.kl_div(t,self.pred) self.params = [self.U0_i,self.W0_i,self.b0_i, self.U1_i,self.W1_i,self.b1_i, self.U2_i,self.W2_i,self.b2_i, self.Wx,self.Wd,self.b_h,self.b_p,self.Wp] #self.loss = self.loss + 0.00003*l2_reg(self.params) updts = upd.adam(self.loss,self.params) self.train_fn = theano.function(inputs = [q,a,t], outputs = [self.pred,self.loss], updates = updts) self.test_fn = theano.function(inputs = [q,a], outputs = self.pred)
def __init__(self,f_match,f_decomp,filter_no): fm = f_match.split("-") self.f_match = fm[0] self.fm_win = 0 if len(fm) != 1: self.fm_win = int(fm[1]) self.f_decomp = f_decomp self.fn = filter_no s = T.tensor4() t = T.tensor4() scr = T.scalar() w_shp1 = (self.fn,2,3,word_vector_size) w_shp2 = (self.fn,2,2,word_vector_size) w_shp3 = (self.fn,2,1,word_vector_size) w_bound1 = 2*3*word_vector_size w_bound2 = 2*2*word_vector_size w_bound3 = 2*1*word_vector_size b_shp = (self.fn,) self.W1 = theano.shared( np.asarray(np.random.uniform( low=-1.0 / w_bound1, high=1.0 / w_bound1, size=w_shp1), dtype=s.dtype), name ='W1') self.W2 = theano.shared( np.asarray(np.random.uniform( low=-1.0 / w_bound2, high=1.0 / w_bound2, size=w_shp2), dtype=s.dtype), name ='W2') self.W3 = theano.shared( np.asarray(np.random.uniform( low=-1.0 / w_bound3, high=1.0 / w_bound3, size=w_shp3), dtype=s.dtype), name ='W3') self.b1 = theano.shared(np.asarray( np.random.uniform(low=-.5, high=.5, size=b_shp), dtype=s.dtype), name ='b1') self.b2 = theano.shared(np.asarray( np.random.uniform(low=-.5, high=.5, size=b_shp), dtype=s.dtype), name ='b2') self.b3 = theano.shared(np.asarray( np.random.uniform(low=-.5, high=.5, size=b_shp), dtype=s.dtype), name ='b3') conv_out_s1 = conv2d(s,self.W1) output_s1 = T.tanh(conv_out_s1 + self.b1.dimshuffle('x', 0, 'x', 'x')) output_s1 = output_s1.reshape((output_s1.shape[1],output_s1.shape[2])) o_s1,os1_updates = theano.scan(self.max_pool,sequences = output_s1,outputs_info = None) conv_out_s2 = conv2d(s,self.W2) output_s2 = T.tanh(conv_out_s2 + self.b2.dimshuffle('x', 0, 'x', 'x')) output_s2 = output_s2.reshape((output_s2.shape[1],output_s2.shape[2])) o_s2,os2_updates = theano.scan(self.max_pool,sequences = output_s2,outputs_info = None) conv_out_s3 = conv2d(s,self.W3) output_s3 = T.tanh(conv_out_s3 + self.b3.dimshuffle('x', 0, 'x', 'x')) output_s3 = output_s3.reshape((output_s3.shape[1],output_s3.shape[2])) o_s3,os3_updates = theano.scan(self.max_pool,sequences = output_s3,outputs_info = None) self.o_s = T.concatenate([o_s1,o_s2,o_s3],axis=0) conv_out_t1 = conv2d(t,self.W1) output_t1 = T.tanh(conv_out_t1 + self.b1.dimshuffle('x', 0, 'x', 'x')) output_t1 = output_t1.reshape((output_t1.shape[1],output_t1.shape[2])) o_t1,ot1_updates = theano.scan(self.max_pool,sequences = output_t1,outputs_info = None) conv_out_t2 = conv2d(t,self.W2) output_t2 = T.tanh(conv_out_t2 + self.b2.dimshuffle('x', 0, 'x', 'x')) output_t2 = output_t2.reshape((output_t2.shape[1],output_t2.shape[2])) o_t2,ot2_updates = theano.scan(self.max_pool,sequences = output_t2,outputs_info = None) conv_out_t3 = conv2d(t,self.W3) output_t3 = T.tanh(conv_out_t3 + self.b3.dimshuffle('x', 0, 'x', 'x')) output_t3 = output_t3.reshape((output_t3.shape[1],output_t3.shape[2])) o_t3,ot3_updates = theano.scan(self.max_pool,sequences = output_t3,outputs_info = None) self.o_t = T.concatenate([o_t1,o_t2,o_t3],axis=0) sc,sc_updates = theano.scan(self.l1,sequences = [self.o_s,self.o_t],outputs_info = None) self.score = T.exp(T.sum(sc)) self.loss = (scr-self.score)*(scr-self.score) self.params = [self.W1,self.W2,self.W3,self.b1,self.b2,self.b3] #self.loss = self.loss + 0.00003*l2_reg(self.params) updts = upd.adam(self.loss,self.params) self.train_fn = theano.function(inputs = [s,t,scr], outputs = [self.score,self.loss], updates = updts) self.test_fn = theano.function(inputs = [s,t], outputs = self.score) self.f = theano.function([s,t], [self.o_s,self.o_t])
def __init__(self,f_match,f_decomp,hidden_dim,att_hid_dim): fm = f_match.split("-") self.f_match = fm[0] self.fm_win = 0 if len(fm) != 1: self.fm_win = int(fm[1]) self.f_decomp = f_decomp self.hidden_dim = hidden_dim self.att_hid_dim = att_hid_dim #gru sentence parameters self.U0_i = normal_param(std=(2.0/(self.hidden_dim+word_vector_size)), shape=(self.hidden_dim, word_vector_size)) self.U1_i = normal_param(std=(2.0/(self.hidden_dim+word_vector_size)), shape=(self.hidden_dim, word_vector_size)) self.U2_i = normal_param(std=(2.0/(self.hidden_dim+word_vector_size)), shape=(self.hidden_dim, word_vector_size)) self.W0_i = normal_param(std=(2.0/(self.hidden_dim+self.hidden_dim)), shape=(self.hidden_dim, self.hidden_dim)) self.W1_i = normal_param(std=(2.0/(self.hidden_dim+self.hidden_dim)), shape=(self.hidden_dim, self.hidden_dim)) self.W2_i = normal_param(std=(2.0/(self.hidden_dim+self.hidden_dim)), shape=(self.hidden_dim, self.hidden_dim)) self.b0_i = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b1_i = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b2_i = constant_param(value=0.0, shape=(self.hidden_dim,)) # 1 attention mechanism parameters self.W1 = normal_param(std=(2.0/(self.hidden_dim+(4*self.hidden_dim))), shape=(self.hidden_dim, (4*self.hidden_dim)+1)) self.W2 = normal_param(std=(2.0/(res+self.hidden_dim)), shape=(res,self.hidden_dim)) self.b1 = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b2 = constant_param(value=0.0, shape=(res,)) self.Wb = normal_param(std=(2.0/(self.hidden_dim+self.hidden_dim)), shape=(self.hidden_dim,self.hidden_dim)) # 2 attention mechanism parameters self.Wx = normal_param(std=(2.0/(self.att_hid_dim+(2*self.hidden_dim))), shape=(self.att_hid_dim, 2*self.hidden_dim)) self.Wd = normal_param(std=(2.0/(self.att_hid_dim+(2*self.hidden_dim))), shape=(self.att_hid_dim, 2*self.hidden_dim)) self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim,)) self.b_p = constant_param(value=0.0, shape=(res,)) self.Wp = normal_param(std=(2.0/(res+self.att_hid_dim)), shape=(res,self.att_hid_dim)) spl = T.matrix() smin = T.matrix() tpl = T.matrix() tmin = T.matrix() scr = T.vector() s_p,spl_updates = theano.scan(self.input_next_state,sequences=spl,outputs_info=T.zeros_like(self.b0_i)) s_m,smin_updates = theano.scan(self.input_next_state,sequences=smin,outputs_info=T.zeros_like(self.b0_i)) t_p,tpl_updates = theano.scan(self.input_next_state,sequences=tpl,outputs_info=T.zeros_like(self.b0_i)) t_m,tmin_updates = theano.scan(self.input_next_state,sequences=tmin,outputs_info=T.zeros_like(self.b0_i)) s_plus = s_p[-1] s_minus = s_m[-1] t_plus = t_p[-1] t_minus = t_m[-1] s = T.concatenate([s_plus,s_minus],axis = 0) t = T.concatenate([t_plus,t_minus],axis=0) self.pred = self.attn_step_2(s,t) self.loss = self.kl_div(scr,self.pred) self.params = [self.U0_i,self.W0_i,self.b0_i, self.U1_i,self.W1_i,self.b1_i, self.U2_i,self.W2_i,self.b2_i, self.Wx,self.Wd,self.b_h,self.b_p,self.Wp] #self.loss = self.loss + 0.00003*l2_reg(self.params) updts = upd.adam(self.loss,self.params) self.train_fn = theano.function(inputs = [spl,smin,tpl,tmin,scr], outputs = [self.pred,self.loss], updates = updts) self.test_fn = theano.function(inputs = [spl,smin,tpl,tmin], outputs = self.pred)
def __init__(self,f_match,f_decomp,filter_no,att_hid_dim): fm = f_match.split("-") self.f_match = fm[0] self.fm_win = 0 if len(fm) != 1: self.fm_win = int(fm[1]) self.f_decomp = f_decomp self.fn = filter_no self.att_hid_dim = att_hid_dim s = T.tensor4() t = T.tensor4() scr = T.vector() """ # 1 attention mechanism parameters self.W1 = normal_param(std=0.0033, shape=(self.hidden_dim, (4*self.hidden_dim)+1)) self.W2 = normal_param(std=0.01, shape=(res,self.hidden_dim)) self.b1 = constant_param(value=0.0, shape=(self.hidden_dim,)) self.b2 = constant_param(value=0.0, shape=(res,)) self.Wb = normal_param(std=0.008, shape=(self.hidden_dim,self.hidden_dim)) """ # 2 attention mechanism parameters self.Wx = normal_param(std=(2.0/(self.att_hid_dim+(3*self.fn))), shape=(self.att_hid_dim, 3*self.fn)) self.Wd = normal_param(std=(2.0/(self.att_hid_dim+(3*self.fn))), shape=(self.att_hid_dim, 3*self.fn)) self.b_h = constant_param(value=0.0, shape=(self.att_hid_dim,)) self.b_p = constant_param(value=0.0, shape=(res,)) self.Wp = normal_param(std=(2.0/(res+self.att_hid_dim)), shape=(res,self.att_hid_dim)) w_shp1 = (self.fn,2,3,word_vector_size) w_shp2 = (self.fn,2,2,word_vector_size) w_shp3 = (self.fn,2,1,word_vector_size) w_bound1 = 2*3*word_vector_size w_bound2 = 2*2*word_vector_size w_bound3 = 2*1*word_vector_size b_shp = (self.fn,) self.W1 = theano.shared( np.asarray(np.random.uniform( low=-1.0 / w_bound1, high=1.0 / w_bound1, size=w_shp1), dtype=s.dtype), name ='W1') self.W2 = theano.shared( np.asarray(np.random.uniform( low=-1.0 / w_bound2, high=1.0 / w_bound2, size=w_shp2), dtype=s.dtype), name ='W2') self.W3 = theano.shared( np.asarray(np.random.uniform( low=-1.0 / w_bound3, high=1.0 / w_bound3, size=w_shp3), dtype=s.dtype), name ='W3') self.b1 = theano.shared(np.asarray( np.random.uniform(low=-.5, high=.5, size=b_shp), dtype=s.dtype), name ='b1') self.b2 = theano.shared(np.asarray( np.random.uniform(low=-.5, high=.5, size=b_shp), dtype=s.dtype), name ='b2') self.b3 = theano.shared(np.asarray( np.random.uniform(low=-.5, high=.5, size=b_shp), dtype=s.dtype), name ='b3') conv_out_s1 = conv2d(s,self.W1) output_s1 = T.tanh(conv_out_s1 + self.b1.dimshuffle('x', 0, 'x', 'x')) output_s1 = output_s1.reshape((output_s1.shape[1],output_s1.shape[2])) o_s1,os1_updates = theano.scan(self.max_pool,sequences = output_s1,outputs_info = None) conv_out_s2 = conv2d(s,self.W2) output_s2 = T.tanh(conv_out_s2 + self.b2.dimshuffle('x', 0, 'x', 'x')) output_s2 = output_s2.reshape((output_s2.shape[1],output_s2.shape[2])) o_s2,os2_updates = theano.scan(self.max_pool,sequences = output_s2,outputs_info = None) conv_out_s3 = conv2d(s,self.W3) output_s3 = T.tanh(conv_out_s3 + self.b3.dimshuffle('x', 0, 'x', 'x')) output_s3 = output_s3.reshape((output_s3.shape[1],output_s3.shape[2])) o_s3,os3_updates = theano.scan(self.max_pool,sequences = output_s3,outputs_info = None) self.o_s = T.concatenate([o_s1,o_s2,o_s3],axis=0) conv_out_t1 = conv2d(t,self.W1) output_t1 = T.tanh(conv_out_t1 + self.b1.dimshuffle('x', 0, 'x', 'x')) output_t1 = output_t1.reshape((output_t1.shape[1],output_t1.shape[2])) o_t1,ot1_updates = theano.scan(self.max_pool,sequences = output_t1,outputs_info = None) conv_out_t2 = conv2d(t,self.W2) output_t2 = T.tanh(conv_out_t2 + self.b2.dimshuffle('x', 0, 'x', 'x')) output_t2 = output_t2.reshape((output_t2.shape[1],output_t2.shape[2])) o_t2,ot2_updates = theano.scan(self.max_pool,sequences = output_t2,outputs_info = None) conv_out_t3 = conv2d(t,self.W3) output_t3 = T.tanh(conv_out_t3 + self.b3.dimshuffle('x', 0, 'x', 'x')) output_t3 = output_t3.reshape((output_t3.shape[1],output_t3.shape[2])) o_t3,ot3_updates = theano.scan(self.max_pool,sequences = output_t3,outputs_info = None) self.o_t = T.concatenate([o_t1,o_t2,o_t3],axis=0) self.pred = self.attn_step_2(self.o_s,self.o_t) self.loss = self.kl_div(scr,self.pred) self.params = [self.Wx,self.Wd,self.b_h,self.b_p,self.Wp, self.W1,self.W2,self.W3,self.b1,self.b2,self.b3] #self.loss = self.loss + 0.00003*l2_reg(self.params) updts = upd.adam(self.loss,self.params) self.train_fn = theano.function(inputs = [s,t,scr], outputs = [self.pred,self.loss], updates = updts) self.test_fn = theano.function(inputs = [s,t], outputs = self.pred) self.f = theano.function([s,t], [self.o_s,self.o_t])