def test_specify_shape_inplace(self): # test that specify_shape don't break inserting inplace op dtype = self.dtype if dtype is None: dtype = theano.config.floatX rng = numpy.random.RandomState(utt.fetch_seed()) a = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype) a = self.cast_value(a) a_shared = self.shared_constructor(a) b = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype) b = self.cast_value(b) b_shared = self.shared_constructor(b) s = numpy.zeros((40, 40), dtype=dtype) s = self.cast_value(s) s_shared = self.shared_constructor(s) f = theano.function([], updates={s_shared: theano.dot(a_shared, b_shared) + s_shared}) topo = f.maker.env.toposort() f() # [Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)] if theano.config.mode != "FAST_COMPILE": assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1 assert all( node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm) ) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm") # Their is no inplace gemm for sparse # assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "StructuredDot") s_shared_specify = tensor.specify_shape(s_shared, s_shared.get_value(borrow=True).shape) # now test with the specify shape op in the output f = theano.function( [], s_shared.shape, updates={s_shared: theano.dot(a_shared, b_shared) + s_shared_specify} ) topo = f.maker.env.toposort() shp = f() assert numpy.all(shp == (40, 40)) if theano.config.mode != "FAST_COMPILE": assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1 assert all( node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm) ) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm") # now test with the specify shape op in the inputs and outputs a_shared = tensor.specify_shape(a_shared, a_shared.get_value(borrow=True).shape) b_shared = tensor.specify_shape(b_shared, b_shared.get_value(borrow=True).shape) f = theano.function( [], s_shared.shape, updates={s_shared: theano.dot(a_shared, b_shared) + s_shared_specify} ) topo = f.maker.env.toposort() shp = f() assert numpy.all(shp == (40, 40)) if theano.config.mode != "FAST_COMPILE": assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1 assert all( node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm) ) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
def encoder(infomatf, infomatb, htm1matf, ctm1matf, htm1matb, ctm1matb, Eenf, Eenb, Wenf, Wenb, benf, benb): # infomat is a matrix, having # batch * D dim = Eenf.shape[1] # xtmatf = theano.dot(infomatf, Eenf) xtmatb = theano.dot(infomatb, Eenb) # pretranf = T.concatenate([xtmatf, htm1matf], axis=1) pretranb = T.concatenate([xtmatb, htm1matb], axis=1) # posttranf = theano.dot(pretranf, Wenf) + benf posttranb = theano.dot(pretranb, Wenb) + benb # itmatf = T.nnet.sigmoid(posttranf[:, 0:dim]) ftmatf = T.nnet.sigmoid(posttranf[:, dim : (2 * dim)]) gtmatf = T.tanh(posttranf[:, (2 * dim) : (3 * dim)]) otmatf = T.nnet.sigmoid(posttranf[:, (3 * dim) :]) ctmatf = ftmatf * ctm1matf + itmatf * gtmatf # htmatf = otmatf * T.tanh(ctmatf) # itmatb = T.nnet.sigmoid(posttranb[:, 0:dim]) ftmatb = T.nnet.sigmoid(posttranb[:, dim : (2 * dim)]) gtmatb = T.tanh(posttranb[:, (2 * dim) : (3 * dim)]) otmatb = T.nnet.sigmoid(posttranb[:, (3 * dim) :]) ctmatb = ftmatb * ctm1matb + itmatb * gtmatb # htmatb = otmatb * T.tanh(ctmatb) # return htmatf, ctmatf, htmatb, ctmatb
def decoder(self, lang, h_tm1_dec, c_tm1_dec): x_t_lang = theano.dot(lang, self.Emb_dec) # beta1 = tensor.tensordot(self.scope_att, self.U_att, (2, 0)) beta2 = theano.dot(h_tm1_dec, self.W_att) beta3 = tensor.tanh(beta1 + beta2) beta4 = tensor.tensordot(beta3, self.b_att, (2, 0)) # |-> # lines * # batch pre_alpha = tensor.nnet.softmax(tensor.transpose(beta4, axes=(1, 0))) # pre_alpha *= self.weights_pre_sel # Alpha alpha = pre_alpha / pre_alpha.sum(axis=1, keepdims=True) # z_t = tensor.sum(alpha[:, :, None] * tensor.transpose(self.scope_att, axes=(1, 0, 2)), axis=1) # pre_tran = tensor.concatenate([x_t_lang, h_tm1_dec, z_t], axis=1) post_tran = theano.dot(pre_tran, self.W_dec) + self.b_dec # i_t = tensor.nnet.sigmoid(post_tran[:, :self.dim_model]) f_t = tensor.nnet.sigmoid(post_tran[:, self.dim_model:2 * self.dim_model]) g_t = tensor.tanh(post_tran[:, 2 * self.dim_model:3 * self.dim_model]) o_t = tensor.nnet.sigmoid(post_tran[:, 3 * self.dim_model:]) c_t_dec = f_t * c_tm1_dec + i_t * g_t h_t_dec = o_t * tensor.tanh(c_t_dec) # pre_y = tensor.concatenate([h_t_dec, z_t], axis=1) y_t_0 = theano.dot((x_t_lang + theano.dot(pre_y, self.L)), self.L_0) y_t = tensor.nnet.softmax(y_t_0) log_y_t = tensor.log(y_t + numpy.float32(1e-8)) return h_t_dec, c_t_dec, y_t, log_y_t
def compute_output(self): label_results = self.process_label_results( self.semantic_prediction) #tensor.round(self.semantic_prediction) print(label_results) print(tensor.round(self.semantic_prediction)) label_specific_Ws = tensor.tensordot(label_results, self.Ws, axes=[1, 0]) label_specific_Vs = tensor.tensordot(label_results, self.Vs, axes=[1, 0]) label_specific_W = th.dot(label_specific_Ws, self.W) label_specific_V = th.dot(label_specific_Vs, self.V) # compute output self.output = getFunction('softmax')( tensor.batched_dot(self.input, label_specific_W) + tensor.batched_dot(self.extra_input, label_specific_V) + self.b) for i in range(len(self.semantic_label_map.keys()) + 1): ho = self.get_output(i) self.output_hybrids.append(ho)
def OneStep(vsample) : hmean = T.nnet.sigmoid(theano.dot(vsample, W) + bhid) hsample = trng.binomial(size=hmean.shape, n=1, p=hmean) vmean = T.nnet.sigmoid(theano.dot(hsample, W.T) + bvis) print hmean return trng.binomial(size=vsample.shape, n=1, p=vmean, dtype=theano.config.floatX)
def one_step(self, x_t, h_tm1, W_ih, W_hh, b_h, W_ho, b_o): h_t = T.tanh(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + b_h) y_t = theano.dot(h_t, W_ho) + b_o y_t = sigmoid(y_t) if self.ignore_zero: return [h_t, y_t], theano.scan_module.until(T.eq(T.sum(abs(x_t)), 0)) return [h_t, y_t]
def _step2(self, x_t, h_tm1, c_tm1, x_w, h_w, c_w, W_co, b_i, b_f, b_c, b_o): sigma = lasagne.nonlinearities.sigmoid # for the other activation function we use the tanh act = T.tanh # sequences: x_t # prior results: h_tm1, c_tm1 # non-sequences: W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xy, W_hy, W_cy, b_y x_prod = theano.dot(x_t, x_w) h_prod = theano.dot(h_tm1, h_w) c_prod = theano.dot(c_tm1, c_w) i_t = sigma(self._slice(x_prod,0,self.dim_proj) + self._slice(h_prod,0,self.dim_proj) + \ self._slice(c_prod,0,self.dim_proj) + b_i.dimshuffle(('x',0))) f_t = sigma(self._slice(x_prod,1,self.dim_proj) + self._slice(h_prod,1,self.dim_proj) + \ self._slice(c_prod,1,self.dim_proj) \ + b_f.dimshuffle(('x',0))) c_t = f_t * c_tm1 + i_t * act(self._slice(x_prod,2,self.dim_proj) + self._slice(h_prod,2,self.dim_proj) \ + b_c.dimshuffle(('x',0)) ) o_t = sigma(self._slice(x_prod,3,self.dim_proj)+ self._slice(h_prod,3,self.dim_proj) +\ theano.dot(c_t, W_co) + b_o.dimshuffle(('x',0))) h_t = o_t * act(c_t) return [h_t, c_t]
def state_with_attend(self, h1, attended, x_m=None): # attented: (src_sent_len, batch_size, src_nhids*2) _az = theano.dot(attended, self.W_cz) + self.b_z2 _hz = theano.dot(h1, self.W_hz2) if self.ln is not False: _az = ln(_az, self.g1, self.b1) _hz = ln(_hz, self.g2, self.b2) z = T.nnet.sigmoid(_az + _hz) # z: (batch_size, trg_nhids) _ar = theano.dot(attended, self.W_cr) + self.b_r2 _hr = theano.dot(h1, self.W_hr2) if self.ln is not False: _ar = ln(_ar, self.g1, self.b1) _hr = ln(_hr, self.g2, self.b2) r = T.nnet.sigmoid(_ar + _hr) # r: (batch_size, trg_nhids) # _ah: (batch_size, trg_nhids) _ah = theano.dot(attended, self.W_ch) _hh = T.dot(h1, self.W_hh2) + self.b_h2 if self.ln is not False: _ah = ln(_ah, self.g3, self.b3) _hh = ln(_hh, self.g4, self.b4) h2 = T.tanh(_ah + _hh * r) h2 = z * h1 + (1. - z) * h2 if x_m is not None: h2 = x_m[:, None] * h2 + (1. - x_m)[:, None] * h1 # h2: (batch_size, trg_nhids) return h2
def test_dtw(): W = theano.shared(numpy.eye(4, dtype=theano.config.floatX), name='W') theano.config.compute_test_value = 'raise' x1 = tt.matrix('x1') x2 = tt.matrix('x2') x1.tag.test_value = numpy.array([[0.1] * 4, [-0.1] * 4], dtype=theano.config.floatX) x2.tag.test_value = numpy.array( [[0.1] * 4, [0.1] * 1 + [-0.1] * 3, [-0.1] * 4], dtype=theano.config.floatX) e1 = theano.dot(x1, W) e2 = theano.dot(x2, W) y = theano_batch_dtw.dtw.theano_symbolic_dtw(e1, e2, tt.constant(2, dtype='int64'), tt.constant(3, dtype='int64'), normalize=False) theano.printing.debugprint(y) g = theano.grad(y, W) theano.printing.debugprint(g) print 'y', y.dtype, y.tag.test_value.shape, '\n', y.tag.test_value print 'g', g.dtype, g.tag.test_value.shape, '\n', g.tag.test_value path, cost = speech_dtw._dtw.multivariate_dtw(e1.tag.test_value, e2.tag.test_value) print cost, list(reversed(path))
def oneStep(u_tm4,u_t,x_tm3,x_tm1,y_tm1,W,W_in_1,W_in_2,W_feedback,W_out): x_t=T.tanh(theano.dot(x_tm1,W)+\ theano.dot(u_t,W_in_1)+\ theano.dot(u_tm4,W_in_2)+\ theano.dot(y_tm1,W_feedback)) y_t=theano.dot(x_tm3,W_out) return [x_t,y_t]
def apply(self, state_below, mask_below, context, c_mask): hiddens, attended = self._forward(state_below, mask_below, context, c_mask) # state_below: shape(trg_sent_len-1, batch_size, trgw_embsz) # hiddens: shape(trg_sent_len-1, batch_size, trg_nhids) # attended: shape(trg_sent_len-1, batch_size, src_nhids*2) # note: the scan function will remember all privious states combine = T.concatenate([state_below, hiddens, attended], axis=2) # combine: shape(trg_sent_len-1, batch_size, trgw_embsz+trg_nhids+src_nhids*2) # self.W_m: shape(trgw_embsz + trg_nhids + c_hids, n_out*2) # self.b_m: shape(n_out*2,) if self.max_out: merge_out = theano.dot(combine, self.W_m) + self.b_m # merge_out: shape(trg_sent_len-1, batch_size, n_out*2) merge_out = merge_out.reshape( (merge_out.shape[0], merge_out.shape[1], merge_out.shape[2] / 2, 2), ndim=4).max(axis=3) else: merge_out = T.tanh(theano.dot(combine, self.W_m) + self.b_m) ''' such as: (1, 2, 6) -> (1, 2, 3, 2) -> (1, 2, 3) [[[ 1, 2, 3, 4, 5, 6], [[[[1, 2], [3, 4], [4, 5]], [[[ 2, 4, 5], [ 2, 3, 4, 5, 6, 7]]] -> [[2, 3], [3, 4], [4, 5]]]] -> [ 3, 4, 5]]] ''' # mask_below[:, :, None] -> shape(trg_sent_len-1, batch_size, 1) return merge_out * mask_below[:, :, None]
def unfold(): smearbckg = 1. if nbckg > 0: bckgnormerr = [(-1. + nuis) / nuis if berr < 0. else berr for berr, nuis in zip( backgroundnormsysts, bckgnuisances)] bckgnormerr = mc.math.stack(bckgnormerr) smearedbackgrounds = backgrounds if nobjsyst > 0: smearbckg = smearbckg + theano.dot( objnuisances, backgroundobjsysts) smearedbackgrounds = backgrounds * smearbckg bckg = theano.dot(1. + bckgnuisances * bckgnormerr, smearedbackgrounds) tresmat = array(resmat) reco = theano.dot(truth, tresmat) out = reco if nobjsyst > 0: smear = 1. + theano.dot(objnuisances, signalobjsysts) out = reco * smear if nbckg > 0: out = bckg + out return out
def test_gemv1(): ''' test vector1+dot(matrix,vector2) ''' v1 = theano.tensor._shared( numpy.array(numpy.random.rand(2), dtype='float32')) v2 = theano.tensor._shared( numpy.array(numpy.random.rand(5), dtype='float32')) m = theano.tensor._shared( numpy.array(numpy.random.rand(5, 2), dtype='float32')) no_gpu_f = theano.function([], v2 + theano.dot(m, v1), mode=mode_without_gpu) gpu_f = theano.function([], v2 + theano.dot(m, v1), mode=mode_with_gpu) #gpu_f2 is needed to test the case when the input is not on the gpu #but the output is moved to the gpu. gpu_f2 = theano.function([], cuda.gpu_from_host(v2 + theano.dot(m, v1)), mode=mode_with_gpu) # Assert they produce the same output assert numpy.allclose(no_gpu_f(), gpu_f(), atol=atol) assert numpy.allclose(no_gpu_f(), gpu_f2(), atol=atol) # Assert that the gpu version actually uses gpu assert sum([ node.op is cuda.blas.gpu_gemm_inplace for node in gpu_f2.maker.env.toposort() ]) == 1 assert sum([ node.op is cuda.blas.gpu_gemm_inplace for node in gpu_f.maker.env.toposort() ]) == 1
def _step_forward_with_attention(self, x_t, x_m, h_tm1, c, c_mask, c_x): ''' x_t: input at time t x_m: mask of x_t h_tm1: previous state c_x: contex of the rnn ''' # attended = self.attention_layer.apply(c, c_mask, h_tm1) # c_z = theano.dot(attended, self.W_cz) # c_r = theano.dot(attended, self.W_cr) # c_h = theano.dot(attended, self.W_ch) # return [self._step_forward_with_context(x_t, x_m, h_tm1, c_z, c_r, c_h), attended] #### new arc h1 = self._step_forward(x_t, x_m, h_tm1) attended = self.attention_layer.apply(c, c_mask, c_x, h1 ) z = T.nnet.sigmoid(theano.dot(attended, self.W_cz) + theano.dot(h1, self.W_hz2) + self.b_z2) r = T.nnet.sigmoid(theano.dot(attended, self.W_cr) + theano.dot(h1, self.W_hr2) + self.b_r2) c_h = theano.dot(attended, self.W_ch) h2 = T.tanh((T.dot(h1, self.W_hh2) + self.b_h2) * r + c_h) h2 = h1 * z + (1. - z) * h2 if x_m: h2 = x_m[:, None] * h2 + (1. - x_m)[:, None] * h1 return h2, attended
def __init__(self, rng, input, n_in, n_out, diffusion, W=None, activation=T.nnet.relu): self.input = input if W is None: W_values = np.asarray( rng.uniform( low=-np.sqrt(6. / (n_in + n_out)), high=np.sqrt(6. / (n_in + n_out)), size=(n_in, n_out) ), dtype=theano.config.floatX ) if activation == theano.tensor.nnet.sigmoid: W_values *= 4 W = theano.shared(value=W_values, name='W', borrow=True) self.W = W self.D = diffusion lin_output = theano.dot(theano.dot(diffusion, input), self.W) self.output = ( lin_output if activation is None else activation(lin_output) ) self.params = [self.W]
def apply(self, state_below, mask_below, init_state=None, context=None): if state_below.ndim == 3: batch_size = state_below.shape[1] n_steps = state_below.shape[0] else: raise NotImplementedError if self.with_contex: if init_state is None: init_state = T.tanh(theano.dot(context, self.W_c_init)) c_z = theano.dot(context, self.W_cz) c_r = theano.dot(context, self.W_cr) c_h = theano.dot(context, self.W_ch) non_sequences = [c_z, c_r, c_h] rval, updates = theano.scan(self._step_forward_with_context, sequences=[state_below, mask_below], outputs_info=[init_state], non_sequences=non_sequences, n_steps=n_steps ) else: if init_state is None: init_state = T.alloc(numpy.float32(0.), batch_size, self.n_hids) rval, updates = theano.scan(self._step_forward, sequences=[state_below, mask_below], outputs_info=[init_state], n_steps=n_steps ) self.output = rval return self.output
def recurrence(x_t, feat_t, h_tm1): # i_t = sigma(theano.dot(x_t, self.W_xi) + theano.dot(h_tm1, self.W_hi) + theano.dot(c_tm1, self.W_ci) + self.b_i) # f_t = sigma(theano.dot(x_t, self.W_xf) + theano.dot(h_tm1, self.W_hf) + theano.dot(c_tm1, self.W_cf) + self.b_f) # c_t = f_t * c_tm1 + i_t * T.tanh(theano.dot(x_t, self.W_xc) + theano.dot(h_tm1, self.W_hc) + self.b_c) # o_t = sigma(theano.dot(x_t, self.W_xo)+ theano.dot(h_tm1, self.W_ho) + theano.dot(c_t, self.W_co) + self.b_o) # h_t = o_t * T.tanh(c_t) z_t = sigma(theano.dot(x_t, self.W_xz) + self.b_z) ###### THIS IS DIFFERENT r_t = sigma( theano.dot(x_t, self.W_xr) + theano.dot(h_tm1, self.W_hr) + self.b_r) h_t = (T.tanh( theano.dot(h_tm1 * r_t, self.W_hh) + T.tanh(x_t[50:100]) + self.b_h) * z_t) + h_tm1 * (T.ones_like(z_t) - z_t) # h_t = T.tanh(h_tm1) if self.featdim > 0: all_t = T.concatenate([h_t, feat_t]) else: all_t = h_t # print "all_t", type(all_t), T.shape(all_t) s_t = softmax(theano.dot(all_t, self.W_hy) + self.b_y) # print T.shape(h_t), T.shape(c_t), T.shape(s_t) return [h_t, s_t]
def one_step_no_output(self, x_t, h_tm1, W_xc, W_hc, b_c, W_ih, W_hh, W_ho, b_o, b_h): C = sigmoid(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) + b_c) h_t_hat = T.tanh(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + b_h) h_t = (1 - C) * h_t_hat + C * x_t if self.ignore_zero: return [h_t, h_t], theano.scan_module.until(T.eq(T.sum(abs(x_t)), 0)) return [h_t, h_t]
def _input_to_hidden(self, x): x = x.dimshuffle((1, 0, 2)) r = T.dot(x, self.W_r) + self.b_r z = T.dot(x, self.W_z) + self.b_z h = T.dot(x, self.W_h) + self.b_h return r, z, h
def one_step_no_output(self, x_t, h_tm1, W_ih, W_hh, b_h, W_ho, b_o): """ function that did not calculate the output data """ h_t = T.tanh(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + b_h) if self.ignore_zero: return [h_t, h_t], theano.scan_module.until(T.eq(T.sum(abs(x_t)), 0)) return [h_t, h_t]
def oneStep(u_tm4, u_t, x_tm3, x_tm1, y_tm1, W, W_in_1, W_in_2, W_feedback, W_out): x_t = T.tanh(theano.dot(x_tm1, W) + \ theano.dot(u_t, W_in_1) + \ theano.dot(u_tm4, W_in_2) + \ theano.dot(y_tm1, W_feedback)) y_t = theano.dot(x_tm3, W_out) return [x_t, y_t]
def __init__(self, name, inp): eqvars = self.arrdict[name] w_hidden, b_hidden, w_output, b_output = eqvars hidden = T.dot(w_hidden.T, inp) + b_hidden hidden_act = M.tanh(hidden) output = (T.dot(w_output.T, hidden_act) + b_output) self.proj = output.sum()
def step(x_t, h_t_1, W_h, W_x, W_y): # Add breakpoint h = t.tanh(theano.dot(W_h, h_t_1) + theano.dot(W_x, x_t) + b_h) y = (theano.dot(W_y, h) + b_y) e_y = t.exp(y - y.max()) smax_y = e_y / e_y.sum() return h, smax_y
def setL(x, name1="w", name2="b", name3="b_", act="sigmoid"): w = self.seg.params[name1] b = self.seg.params[name2] b_ = self.seg.params[name3] activate = self.getfunc(act) y = activate(theano.dot(x, w) + b) z = activate(theano.dot(y, w.T) + b_) return zip([w, b, b_], theano.grad(self.lossfunc(x, z), [w, b, b_]))
def test_specify_shape_inplace(self): # test that specify_shape don't break inserting inplace op dtype = self.dtype if dtype is None: dtype = theano.config.floatX rng = numpy.random.RandomState(utt.fetch_seed()) a = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype) a = self.cast_value(a) a_shared = self.shared_constructor(a) b = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype) b = self.cast_value(b) b_shared = self.shared_constructor(b) s = numpy.zeros((40, 40), dtype=dtype) s = self.cast_value(s) s_shared = self.shared_constructor(s) f = theano.function([], updates=[(s_shared, theano.dot(a_shared, b_shared) + s_shared)]) topo = f.maker.fgraph.toposort() f() #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)] if theano.config.mode != 'FAST_COMPILE': assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1 assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm)) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm") # Their is no inplace gemm for sparse #assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "StructuredDot") s_shared_specify = tensor.specify_shape(s_shared, s_shared.get_value(borrow=True).shape) # now test with the specify shape op in the output f = theano.function([], s_shared.shape, updates=[(s_shared, theano.dot(a_shared, b_shared) + s_shared_specify)]) topo = f.maker.fgraph.toposort() shp = f() assert numpy.all(shp == (40, 40)) if theano.config.mode != 'FAST_COMPILE': assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1 assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm)) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm") # now test with the specify shape op in the inputs and outputs a_shared = tensor.specify_shape(a_shared, a_shared.get_value(borrow=True).shape) b_shared = tensor.specify_shape(b_shared, b_shared.get_value(borrow=True).shape) f = theano.function([], s_shared.shape, updates=[(s_shared, theano.dot(a_shared, b_shared) + s_shared_specify)]) topo = f.maker.fgraph.toposort() shp = f() assert numpy.all(shp == (40, 40)) if theano.config.mode != 'FAST_COMPILE': assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1 assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm)) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
def func_dec(self, xt, htm1, ctm1): # xt -- embedded world representations current_att_weight = self.softmax( theano.dot( tensor.tanh( theano.dot( htm1, self.W_att_target ) + self.scope_att_times_W ), self.b_att ) ) # zt = theano.dot(current_att_weight, self.scope_att) # post_transform = self.b_dec + theano.dot( tensor.concatenate( [xt, htm1, zt], axis=0 ), self.W_dec ) gate_input = tensor.nnet.sigmoid( post_transform[:self.dim_model] ) gate_forget = tensor.nnet.sigmoid( post_transform[self.dim_model:2*self.dim_model] ) gate_output = tensor.nnet.sigmoid( post_transform[2*self.dim_model:3*self.dim_model] ) gate_pre_c = tensor.tanh( post_transform[3*self.dim_model:] ) ct = gate_forget * ctm1 + gate_input * gate_pre_c ht = gate_output * tensor.tanh(ct) ''' Add drop out here, by cha chen ''' # Set up an random number generator srng = RandomStreams(seed=0) # Set up the dropout windows = srng.uniform((self.dim_model,)) < 0.9 getwins = theano.function([],windows) winst = getwins() ht_dropout = ht * winst # return the dropout version return ht, ht_dropout, ct, zt # # return ht, ct, zt '''
def _input_to_hidden(self, x): # (time_steps, batch_size, input_size) x = x.dimshuffle((1, 0, 2)) xi = T.dot(x, self.W_i) + self.b_i xf = T.dot(x, self.W_f) + self.b_f xc = T.dot(x, self.W_c) + self.b_c xo = T.dot(x, self.W_o) + self.b_o return xi, xf, xc, xo
def hidden_cov_units_preactivation_given_v(self, v, small=0.5): """Return argument to the sigmoid that would give mean of covariance hid units See the math at the top of this file for what 'adjusted' means. return b - 0.5 * dot(adjusted(v), U)**2 """ unit_v = v / (TT.sqrt(TT.mean(v**2, axis=1)+small)).dimshuffle(0,'x') # adjust row norm return self.b + 0.5 * dot(dot(unit_v, self.U)**2, self.P)
def __init__(self, input, w, b, params=[]): self.output=nnet.softmax(theano.dot(input, w)+b) self.l1=abs(w).sum() self.l2_sqr = (w**2).sum() self.argmax=theano.tensor.argmax(theano.dot(input, w)+b, axis=input.ndim-1) self.input = input self.w = w self.b = b self.params = params
def _forward(self, state_below, mask_below=None, init_state=None, context=None): if state_below.ndim == 3: # state_below is a 3-d matrix batch_size = state_below.shape[1] n_steps = state_below.shape[0] else: raise NotImplementedError # state_below:(src_sent_len,batch_size,embsize), # mask_below:(src_sent_len,batch_size) 0-1 matrix (padding) if mask_below: inps = [state_below, mask_below] if self.with_contex: fn = self._step_forward_with_context else: fn = self._step_forward else: inps = [state_below] if self.with_contex: fn = lambda x1, x2, x3, x4, x5: self._step_forward_with_context( x1, None, x2, x3, x4, x5) else: fn = lambda x1, x2: self._step_forward(x1, None, x2) if self.with_contex: if init_state is None: init_state = T.tanh( theano.dot(context, self.W_c_init) + self.b_init) c_z = theano.dot(context, self.W_cz) c_r = theano.dot(context, self.W_cr) c_h = theano.dot(context, self.W_ch) if self.ln: c_z = ln(c_z, self.gcz + self.bcz) c_r = ln(c_r, self.gcr + self.bcr) c_h = ln(c_h, self.gch + self.bch) non_sequences = [c_z, c_r, c_h] rval, updates = theano.scan(fn, sequences=inps, outputs_info=[init_state], non_sequences=non_sequences, n_steps=n_steps) else: if init_state is None: init_state = T.alloc(numpy.float32(0.), batch_size, self.n_hids) # init_state = T.unbroadcast(T.alloc(0., batch_size, self.n_hids), 0) rval, updates = theano.scan(fn, sequences=inps, outputs_info=[init_state], n_steps=n_steps) self.output = rval # if change like this, it only return the hidden state of the last word in the sentence return self.output
def hidden_cov_units_preactivation_given_v(self, v, small=0.5): """Return argument to the sigmoid that would give mean of covariance hid units See the math at the top of this file for what 'adjusted' means. return b - 0.5 * dot(adjusted(v), U)**2 """ unit_v = v / (TT.sqrt(TT.mean(v**2, axis=1) + small)).dimshuffle( 0, 'x') # adjust row norm return self.b + 0.5 * dot(dot(unit_v, self.U)**2, self.P)
def get_output_for(self, input, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.dot(input, self.Ws[0]*self.share_mask_W) + T.dot(input, self.Ws[1]*self.split_mask_W) activation = activation + (self.bs[0]* self.share_mask_b).dimshuffle('x', 0) + (self.bs[1]* self.split_mask_b).dimshuffle('x', 0) return self.nonlinearity(activation)
def build_mdn_predict(proj, x, tparams): x_diff_squared_avg = tensor.mean((x[:,1:] - x[:,:-1])**2,axis=1) invsigma_given_x = tensor.maximum(tensor.nnet.sigmoid(theano.dot( proj, tparams['U_sigma']) + tparams['b_sigma']) , 1e-8)/ x_diff_squared_avg[:, None] mu = theano.dot(proj, tparams['U_mu']) + tparams['b_mu'] p_mix_given_x = tensor.maximum(tensor.minimum(tensor.nnet.softmax( tensor.dot(proj, tparams['U_mix']) + tparams['b_mix']), 1e-6), 1-1e-6) p_mix_given_x = tensor.log(p_mix_given_x / (tensor.sum(p_mix_given_x, axis=1)[:, None] + 10 * EPS) + EPS) return invsigma_given_x, mu, p_mix_given_x
def ln_linear(inputs, size, bias, concat=False, dtype=None, scope=None): if not isinstance(size, (list, tuple)): raise ValueError("size argument must be (input_size, output_size)") input_size, output_size = size if not isinstance(input_size, (list, tuple)): input_size = [input_size] if not isinstance(inputs, (list, tuple)): inputs = [inputs] if len(inputs) != len(input_size): raise RuntimeError("unmatched elements found: inputs and input_size") results = [] with variable_scope(scope): if concat: input_size = sum(input_size) inputs = theano.tensor.concatenate(inputs, -1) shape = [input_size, output_size] matrix = get_variable("matrix", shape, dtype=dtype) res = theano.dot(inputs, matrix) with variable_scope("layer_norm"): alpha = get_variable("gains", shape=(output_size,), dtype=dtype, initializer=ones_initializer) beta = get_variable("biases", shape=(output_size,), dtype=dtype, initializer=zeros_initializer) res = layer_normalize(res, alpha, beta) results.append(res) else: for i in range(len(input_size)): shape = [input_size[i], output_size] name = "matrix_%d" % i matrix = get_variable(name, shape, dtype=dtype) res = theano.dot(inputs[i], matrix) with variable_scope("layer_norm"): alpha = get_variable("gains_%d" % i, shape=(output_size,), dtype=dtype, initializer=ones_initializer()) beta = get_variable("biases_%d" % i, shape=(output_size,), dtype=dtype, initializer=zeros_initializer()) res = layer_normalize(res, alpha, beta) results.append(res) if bias: shape = [output_size] bias = get_variable("bias", shape, dtype=dtype) results.append(bias) if len(results) == 1: return results[0] return reduce(theano.tensor.add, results)
def pool_one(self, R): """ Attention-based pooling :param R: sentence representation, shape=[n, nb_filter] :return W_max: shape=[class_embbed_dim,] """ G = theano.dot(theano.dot(R, self.U), self.WL) # shape=[n, nb_classes] A = T.nnet.softmax(G.transpose()).transpose() # shape=[n, nb_classes] WO = T.dot(R.transpose(), A) # shape=[nb_filter, nb_classes] W_max = T.max(WO, axis=1) # shape=[nb_filter,] return T.tanh(W_max)
def test_input_aliasing_affecting_inplace_operations(self): # Note: to trigger this bug with theano rev 4586:2bc6fc7f218b, # you need to make in inputs mutable (so that inplace # operations are used) and to break the elemwise composition # with some non-elemwise op (here dot) x = theano.tensor.dvector() y = theano.tensor.dvector() m1 = theano.tensor.dmatrix() m2 = theano.tensor.dmatrix() f = theano.function( [ theano.In(x, mutable=True), theano.In(y, mutable=True), theano.In(m1, mutable=True), theano.In(m2, mutable=True), ], theano.dot((x * 2), m1) + theano.dot((y * 3), m2), ) # Test 1. If the same variable is given twice # Compute bogus values v = np.asarray([1, 2, 3, 4, 5], dtype="float64") m = np.asarray( [ [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], ], dtype="float64", ) bogus_vals = f(v, v, m, m) # Since we used inplace operation v and m may be corrupted # so we need to recreate them v = np.asarray([1, 2, 3, 4, 5], dtype="float64") m = np.asarray( [ [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], ], dtype="float64", ) m_copy = m.copy() v_copy = v.copy() vals = f(v, v_copy, m, m_copy) assert np.allclose(vals, bogus_vals)
def gru_aspect(a_i, rm1, pb): g_i = sigma( T.dot(a_i, dropout(self.Wxa_1, self.ms[7], pb)) + T.dot(rm1, self.Wha_1)) f_i = sigma( T.dot(a_i, dropout(self.Wxa_2, self.ms[8], pb)) + T.dot(rm1, self.Wha_2)) c_i = T.tanh( theano.dot(a_i, dropout(self.Wxa_3, self.ms[9], pb)) + theano.dot(rm1 * f_i, self.Wha_3)) r_i = (T.ones_like(g_i) - g_i) * rm1 + g_i * c_i return r_i
def gru_opinion(a_i, rm1, pb): g_i = sigma( T.dot(a_i, dropout(self.Wxo_1, self.ms[10], pb)) + T.dot(rm1, self.Who_1)) f_i = sigma( T.dot(a_i, dropout(self.Wxo_2, self.ms[11], pb)) + T.dot(rm1, self.Who_2)) c_i = T.tanh( theano.dot(a_i, dropout(self.Wxo_3, self.ms[12], pb)) + theano.dot(rm1 * f_i, self.Who_3)) r_i = (T.ones_like(g_i) - g_i) * rm1 + g_i * c_i return r_i
def one_step(x_t, h_tminus1, c_tminus1): i_t = sigmoid(theano.dot(x_t, self.W_xi) + theano.dot(h_tminus1, self.W_hi) + self.b_i) f_t = sigmoid(theano.dot(x_t, self.W_xf) + theano.dot(h_tminus1, self.W_hf) + self.b_f) o_t = sigmoid(theano.dot(x_t, self.W_xo) + theano.dot(h_tminus1, self.W_ho) + self.b_o) g_t = self.activation_fun(theano.dot(x_t, self.W_xg) + theano.dot(h_tminus1, self.W_hg) + self.b_g) c_t = f_t * c_tminus1 + i_t * g_t h_t = o_t * self.activation_fun(c_t) y_t = sigmoid(theano.dot(h_t, self.W_hy) + self.b_y) return [h_t, c_t, y_t]
def rand_rotate_matrix_symbol(angle=90, ss=0.5): srs = T.shared_randomstreams.RandomStreams() # np.pi / 180 * agx = (srs.uniform() * (2 * angle) - angle) * np.pi / 180 agy = (srs.uniform() * (2 * angle) - angle) * np.pi / 180 s = srs.uniform() + ss Rx = T.stack(1, 0, 0, 0, T.cos(agx), T.sin(agx), 0, -T.sin(agx), T.cos(agx)).reshape((3, 3)) Ry = T.stack(T.cos(agy), 0, -T.sin(agy), 0, 1, 0, T.sin(agy), 0, T.cos(agy)).reshape((3, 3)) Ss = T.stack(s, 0, 0, 0, s, 0, 0, 0, s).reshape((3, 3)) value = theano.dot(Ry, theano.dot(Rx, Ss)) return value
def step(self, x_t, h_tm1, W_ih, W_hh, b_h, W_ho, b_o): # h_t = g(W_ih x_t + W_hh h_tm1 + bh) ### Does not work on recurrent layer, see http://arxiv.org/pdf/1311.0701v7.pdf h_t = self.g(theano.dot(x_t, W_ih) + theano.dot(h_tm1, W_hh) + b_h) # y_t = act(W_ho h_t + b_o) ### y_t = self.act(theano.dot(h_t, W_ho) + b_o) y_t = self.act(theano.dot(h_t, W_ho) + b_o) return [h_t, y_t]
def get_output_for(self, input, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.dot(input[:, 0:-self.split_num], self.Ws[0]*self.share_mask_W) activation = activation + (self.bs[0]* self.share_mask_b).dimshuffle('x', 0) for i in range(0, len(self.Ws)): activation_mask = TT.stack([input[:, -self.split_num+i-0]]*self.num_units).T activation += (T.dot(input[:, 0:-self.split_num], self.Ws[i]*self.split_mask_W) + (self.bs[i]* self.split_mask_b).dimshuffle('x', 0)) * activation_mask return self.nonlinearity(activation)
def test_partial_input_aliasing_affecting_inplace_operations(self): # Note: to trigger this bug with theano rev 4586:2bc6fc7f218b, # you need to make in inputs mutable ( so that inplace # operations are used) and to break the elemwise composition # with some non-elemwise op ( here dot ) x = theano.tensor.dvector() y = theano.tensor.dvector() z = theano.tensor.dvector() m1 = theano.tensor.dmatrix() m2 = theano.tensor.dmatrix() m3 = theano.tensor.dmatrix() # Test 2. If variables only partial overlap # more exactly we care about the case when we have a,b,c # and a shares memory with b, b shares memory with c, but # c does not share memory with a f = theano.function( [ theano.In(x, mutable=True), theano.In(y, mutable=True), theano.In(z, mutable=True), theano.In(m1, mutable=True), theano.In(m2, mutable=True), theano.In(m3, mutable=True), ], ( theano.dot((x * 2), m1) + theano.dot((y * 3), m2) + theano.dot((z * 4), m3) ), ) # Compute bogus values v = np.asarray([1, 2, 3, 4, 5], dtype="float64") m = np.asarray([[1, 0], [0, 1]], dtype="float64") bogus_vals = f(v[:2], v[1:3], v[2:4], m, m, m) # Since we used inplace operation v and m may be corrupted # so we need to recreate them v = np.asarray([1, 2, 3, 4, 5], dtype="float64") m = np.asarray([[1, 0], [0, 1]], dtype="float64") m_copy1 = m.copy() v_copy1 = v.copy() m_copy2 = m.copy() v_copy2 = v.copy() vals = f(v[:2], v_copy1[1:3], v_copy2[2:4], m, m_copy1, m_copy2) assert np.allclose(vals, bogus_vals)
def mk_training_fn(self): """The Constant Stochastic Gradient Step Fn with Optimal Preconditioning Matrix""" q_size = self.q_size avg_C = self.avg_C t = self.t updates = self.updates # Trying to stick to variables names as given in the publication # https://arxiv.org/pdf/1704.04289v1.pdf S = self.batch_size N = self.total_size # inputs random = self.random inarray = self.inarray # gradient of log likelihood gt = -1 * (1. / S) * (self.dlogp_elemwise.sum(axis=0) + (S / N) * self.dlog_prior) # update moving average of Noise Covariance gt_diff = (self.dlogp_elemwise - self.dlogp_elemwise.mean(axis=0)) V = (1. / (S - 1)) * theano.dot(gt_diff.T, gt_diff) C_t = (1. - 1. / t) * avg_C + (1. / t) * V # BB^T = C B = tt.switch(t < 0, tt.eye(q_size), tt.slinalg.cholesky(C_t)) # Optimal Preconditioning Matrix H = (2. * S / N) * tt.nlinalg.matrix_inverse(C_t) # step value on the log likelihood gradient preconditioned with H step = -1 * theano.dot(H, gt.dimshuffle([0, 'x'])) # sample gaussian noise dW dW = random.normal((q_size, 1), dtype=theano.config.floatX, avg=0.0, std=1.0) # noise term is inversely proportional to batch size noise_term = (1. / np.sqrt(S)) * theano.dot(H, theano.dot(B, dW)) # step + noise term dq = (step + noise_term).flatten() # update time and avg_C updates.update({avg_C: C_t, t: t + 1}) f = theano.function(outputs=dq, inputs=inarray, updates=updates, allow_input_downcast=True) return f
def image_step_val(Imat, htm1mat, ctm1mat, Wcnn, Wxi, Whi, bi, Wxf, Whf, bf, Wxc, Whc, bc, Wxo, Who, bo, Why, by, forbatch): xtmat = theano.dot(Imat, Wcnn) itmat = sigma(theano.dot(xtmat,Wxi) + theano.dot(htm1mat,Whi) + T.outer(forbatch,bi) ) ftmat = sigma(theano.dot(xtmat,Wxf) + theano.dot(htm1mat,Whf) + T.outer(forbatch,bf) ) ctmat = ftmat * ctm1mat + itmat*act(theano.dot(xtmat,Wxc)+theano.dot(htm1mat,Whc)+T.outer(forbatch,bc) ) otmat = sigma(theano.dot(xtmat,Wxo) + theano.dot(htm1mat,Who) + T.outer(forbatch,bo) ) htmat = otmat * act(ctmat) # yt = T.concatenate([addzero,tempyt],axis=0) return htmat, ctmat
def encoder(wordt, htm1, ctm1, Een, Wxien, Whien, bien, Wxfen, Whfen, bfen, Wxcen, Whcen, bcen, Wxoen, Whoen, boen): xt = theano.dot(wordt, Een) it = sigma(theano.dot(xt,Wxien) + theano.dot(htm1,Whien) + bien ) ft = sigma(theano.dot(xt,Wxfen) + theano.dot(htm1,Whfen) + bfen ) ct = ft * ctm1 + it*act(theano.dot(xt,Wxcen)+theano.dot(htm1,Whcen)+bcen ) ot = sigma(theano.dot(xt,Wxoen) + theano.dot(htm1,Whoen) + boen ) ht = ot * act(ct) # yt = T.concatenate([addzero,tempyt],axis=0) return ht, ct
def one_lstm_step(x_t, h_tm1, c_tm1, W_xi, W_hi, W_xf, W_hf, W_xc, W_hc, W_xo, W_ho ): i_t = T.nnet.sigmoid(theano.dot(x_t, W_xi) + theano.dot(h_tm1, W_hi) ) f_t = T.nnet.sigmoid(theano.dot(x_t, W_xf) + theano.dot(h_tm1, W_hf) ) c_t = f_t * c_tm1 + i_t * T.tanh(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) ) o_t = T.nnet.sigmoid(theano.dot(x_t, W_xo)+ theano.dot(h_tm1, W_ho) ) h_t = o_t * T.tanh(c_t) return [h_t, c_t]
def mk_training_fn(self): """The Constant Stochastic Gradient Step Fn with Optimal Preconditioning Matrix""" q_size = self.q_size avg_C = self.avg_C t = self.t updates = self.updates # Trying to stick to variables names as given in the publication # https://arxiv.org/pdf/1704.04289v1.pdf S = self.batch_size N = self.total_size # inputs random = self.random inarray = self.inarray # gradient of log likelihood gt = -1 * (1. / S) * (self.dlogp_elemwise.sum(axis=0) + (S / N) * self.dlog_prior) # update moving average of Noise Covariance gt_diff = (self.dlogp_elemwise - self.dlogp_elemwise.mean(axis=0)) V = (1. / (S - 1)) * theano.dot(gt_diff.T, gt_diff) C_t = (1. - 1. / t) * avg_C + (1. / t) * V # BB^T = C B = tt.switch(t < 0, tt.eye(q_size), tt.slinalg.cholesky(C_t)) # Optimal Preconditioning Matrix H = (2. * S / N) * tt.nlinalg.matrix_inverse(C_t) # step value on the log likelihood gradient preconditioned with H step = -1 * theano.dot(H, gt.dimshuffle([0, 'x'])) # sample gaussian noise dW dW = random.normal( (q_size, 1), dtype=theano.config.floatX, avg=0.0, std=1.0) # noise term is inversely proportional to batch size noise_term = (1. / np.sqrt(S)) * theano.dot(H, theano.dot(B, dW)) # step + noise term dq = (step + noise_term).flatten() # update time and avg_C updates.update({avg_C: C_t, t: t + 1}) f = theano.function( outputs=dq, inputs=inarray, updates=updates, allow_input_downcast=True) return f
def __init__(self, x, y, n_dim, k_classes): self.weights = theano.shared(value=numpy.zeros( (n_dim, k_classes), # (n_dim , k_classes), dtype=tensor.dscalar() ), name="weights") self.bias = theano.shared( value=numpy.zeros((k_classes,), dtype=theano.config.floatX), name='bias') self.n_dim = n_dim self.classes = k_classes self.x = x self.y = y self.probability_d_in_k = tensor.nnet.softmax(theano.dot(self.x, self.weights) + self.bias) self.classification = tensor.argmax(self.probability_d_in_k, axis=1) self.template = [(self.n_dim, self.classes), (self.classes,)] self.loss_gradient = theano.function( inputs=[self.x, self.y], outputs=[tensor.grad(self.log_loss(), self.weights), tensor.grad(self.log_loss(), self.bias)] ) self.loss_overall = theano.function( inputs=[self.x, self.y], outputs=self.log_loss(), )
def audcc_from_power(self, power, n_bands=None, n_audcc=None, dct_unitary=None, noise_level=None): """ :type power: ndarray or NdArrayResult with ndim=2 :param power: a power spectrogram with each frame in a row. A frequency-scaled spectrogram makes sense here too. :type n_bands: int :param n_bands: number of critical bands of power :type n_audcc: int :param n_audcc: number of cepstral coefficients to calculate :type dct_unitary: Bool :param dct_unitary: True means apply different scaling to first coef. """ n_audcc = self.n_audcc if n_audcc is None else n_audcc dct_unitary = self.dct_unitary if dct_unitary is None else dct_unitary n_bands = self.n_bands if n_bands is None else n_bands noise_level = self.noise_level if noise_level is None else noise_level dct = fourier.dct_matrix(n_audcc, n_bands, unitary=dct_unitary) dct = theano.tensor.as_tensor_variable(dct, name="AudioFeatures.dct<%i>"%id(dct)) return theano.dot(theano.tensor.log(power + noise_level), dct.T)
def test_csr_correct_output_faster_than_scipy(self): # contrast with test_grad, we put csr in float32, csc in float64 sparse_dtype = "float32" dense_dtype = "float32" a = SparseType("csr", dtype=sparse_dtype)() b = tensor.matrix(dtype=dense_dtype) d = theano.dot(a, b) f = theano.function([a, b], d) for M, N, K, nnz in [(4, 3, 2, 3), (40, 30, 20, 3), (40, 30, 20, 30), (400, 3000, 200, 6000)]: spmat = sp.csr_matrix(random_lil((M, N), sparse_dtype, nnz)) mat = numpy.asarray(numpy.random.randn(N, K), dense_dtype) t0 = time.time() theano_result = f(spmat, mat) t1 = time.time() scipy_result = spmat * mat t2 = time.time() theano_time = t1 - t0 scipy_time = t2 - t1 # print theano_result # print scipy_result print "theano took", theano_time, print "scipy took", scipy_time overhead_tol = 0.002 # seconds overhead_rtol = 1.1 # times as long self.assertTrue(numpy.allclose(theano_result, scipy_result)) if not theano.config.mode in ["DebugMode", "DEBUG_MODE"]: self.assertFalse(theano_time > overhead_rtol * scipy_time + overhead_tol)
def apply(self, state_below, mask_below, context, c_mask): hiddens, attended = self._forward(state_below, mask_below, context, c_mask) combine = T.concatenate([state_below, hiddens, attended], axis=2) if self.max_out: merge_out = theano.dot(combine, self.W_m) + self.b_m merge_out = merge_out.reshape((merge_out.shape[0], merge_out.shape[1], merge_out.shape[2]/2, 2), ndim=4).max(axis=3) else: merge_out = T.tanh(theano.dot(combine, self.W_m) + self.b_m) return merge_out * mask_below[:, :, None]
def one_step(i_t, h_tm1, o_tm1, h_bias, W_in, W_out, W_rec): """Perform one step of a simple recurrent network returning the current hidden activations and the output. `i_t` is the input at the current timestep, `h_tm1` and `o_tm1` are the hidden values and outputs of the previous timestep. `h_bias` is the bias for the hidden units. `W_in`, `W_out` and `W_rec` are the weight matrices. Transfer functions can be specified via `hiddenfunc` and `outfunc` for the hidden and the output layer.""" hidden_in = theano.dot(W_in, i_t) hidden_in += theano.dot(W_rec, h_tm1) hidden_in += h_bias h_t = hiddenfunc(hidden_in) o_t = outfunc(theano.dot(W_out, h_t)) return [h_t, o_t]
def t_gemv1(self, m_shp): ''' test vector2 + dot(matrix, vector1) ''' rng = numpy.random.RandomState(unittest_tools.fetch_seed()) v1 = theano.shared(numpy.array(rng.uniform(size=(m_shp[1],)), dtype='float32')) v2_orig = numpy.array(rng.uniform(size=(m_shp[0],)), dtype='float32') v2 = theano.shared(v2_orig) m = theano.shared(numpy.array(rng.uniform(size=m_shp), dtype='float32')) f = theano.function([], v2 + tensor.dot(m, v1), mode=self.mode) # Assert they produce the same output assert numpy.allclose(f(), numpy.dot(m.get_value(), v1.get_value()) + v2_orig) topo = [n.op for n in f.maker.env.toposort()] assert topo == [CGemv(inplace=False)], topo #test the inplace version f = theano.function([], [], updates={v2:v2+theano.dot(m,v1)}, mode=self.mode) # Assert they produce the same output f() assert numpy.allclose(v2.get_value(), numpy.dot(m.get_value(), v1.get_value()) + v2_orig) topo = [n.op for n in f.maker.env.toposort()] assert topo == [CGemv(inplace=True)]
def __init__(self, X, n_in, n_out): # Initialize network parameters. W = theano.shared(np.random.randn(n_in, n_out)) b = theano.shared(np.zeros(n_out)) self.params = [W, b] # Compute layer activations self.output = T.nnet.sigmoid(theano.dot(X, W) + b)
def t_gemv1(self, m_shp): """ test vector2 + dot(matrix, vector1) """ rng = numpy.random.RandomState(unittest_tools.fetch_seed()) v1 = theano.shared(numpy.array(rng.uniform(size=(m_shp[1],)), dtype="float32")) v2_orig = numpy.array(rng.uniform(size=(m_shp[0],)), dtype="float32") v2 = theano.shared(v2_orig) m = theano.shared(numpy.array(rng.uniform(size=m_shp), dtype="float32")) f = theano.function([], v2 + tensor.dot(m, v1), mode=self.mode) # Assert they produce the same output assert numpy.allclose(f(), numpy.dot(m.get_value(), v1.get_value()) + v2_orig) topo = [n.op for n in f.maker.fgraph.toposort()] assert topo == [CGemv(inplace=False)], topo # test the inplace version g = theano.function([], [], updates=[(v2, v2 + theano.dot(m, v1))], mode=self.mode) # Assert they produce the same output g() assert numpy.allclose(v2.get_value(), numpy.dot(m.get_value(), v1.get_value()) + v2_orig) topo = [n.op for n in g.maker.fgraph.toposort()] assert topo == [CGemv(inplace=True)] # Do the same tests with a matrix with strides in both dimensions m.set_value(m.get_value(borrow=True)[::-1, ::-1], borrow=True) v2.set_value(v2_orig) assert numpy.allclose(f(), numpy.dot(m.get_value(), v1.get_value()) + v2_orig) g() assert numpy.allclose(v2.get_value(), numpy.dot(m.get_value(), v1.get_value()) + v2_orig)
def step(self, u_t, *args): """ step function to calculate BPTT type u_t: T.matrix() param u_t: input sequence of the network type * args: python parameter list param * args: this is needed to implement a more general model of the step function see theano@users: http: // groups.google.com / group / theano - users / \ browse_thread / thread / 2fa44792c9cdd0d5 """ # get the recurrent activations r_act_vals = [args[u] for u in range(self.len_output_taps)] # get the recurrent weights r_weights = [args[u] for u in range(self.len_output_taps, (self.len_output_taps) * 2)] # get the input/output weights b_h = args[self.len_output_taps * 2] W_in = args[self.len_output_taps * 2 + 1] b_in = args[self.len_output_taps * 2 + 2] # sum up the recurrent activations act = theano.dot(r_act_vals[0], r_weights[0]) + b_h for u in range(1, self.len_output_taps): act += T.dot(r_act_vals[u], r_weights[u]) + b_h # compute the new recurrent activation h_t = T.tanh(T.dot(u_t, W_in) + b_in + act) return h_t
def pred(p, X): ''' ''' w = p['w'].value b = p['b'].value P = TT.nnet.softmax( T.dot(X, w) + b ) return TT.argmax(P, 1)