w_t = ss6.sum(axis=1) attinp_h2, attgate_h2 = att_to_h2.proj(w_t) attinp_h3, attgate_h3 = att_to_h3.proj(w_t) h2_t = cell2.step(xinp_h2_t + h1inp_h2 + attinp_h2, xgate_h2_t + h1gate_h2 + attgate_h2, h2_tm1) h2inp_h3, h2gate_h3 = h2_to_h3.proj(h2_t) h3_t = cell3.step(xinp_h3_t + h1inp_h3 + h2inp_h3 + attinp_h3, xgate_h3_t + h1gate_h3 + h2gate_h3 + attgate_h3, h3_tm1) return h1_t, h2_t, h3_t, k_t, w_t init_x = as_shared(np_zeros((minibatch_size, n_out))) srng = RandomStreams(1999) def _slice_outs(outs): k = n_components half = n_out // 2 outs = outs.reshape((-1, n_density)) mu = outs[:, 0:half * k].reshape((-1, half, k)) sigma = outs[:, half * k:2 * half * k].reshape((-1, half, k)) coeff = outs[:, 2 * half * k:] sigma = tensor.exp(sigma - bias_sym) + 1E-6 coeff = tensor.nnet.softmax(coeff * (1. + bias_sym)) + 1E-6 return mu, sigma, coeff # Used to calculate stopping heuristic from sections 5.3 u_max = 0. * tensor.arange(c_sym.shape[0]) + c_sym.shape[0]
outs_deconv = outs_deconv.dimshuffle(2, 0, 3, 1) outs_deconv = outs_deconv[:target.shape[0]] theano.printing.Print("outs_deconv.shape")(outs_deconv.shape) preds = softmax(outs_deconv + b_softmax) theano.printing.Print("preds.shape")(preds.shape) theano.printing.Print("target.shape")(target.shape) target = theano_one_hot(target, r=n_bins) theano.printing.Print("target.shape")(target.shape) cost = categorical_crossentropy(preds, target) theano.printing.Print("cost.shape")(cost.shape) theano.printing.Print("mask.shape")(mask.shape) cost = cost * mask.dimshuffle(0, 1, 'x') cost = cost.sum() / (target.shape[0] * target.shape[1]) grads = tensor.grad(cost, params) init_x = as_shared(np_zeros((minibatch_size, n_out))) srng = RandomStreams(1999) """ # Used to calculate stopping heuristic from sections 5.3 u_max = 0. * tensor.arange(c_sym.shape[0]) + c_sym.shape[0] u_max = u_max.dimshuffle('x', 'x', 0) u_max = tensor.cast(u_max, theano.config.floatX) def _slice_outs(outs): k = n_components if outs.ndim == 4: def _r(i): i = i.dimshuffle(0, 2, 1, 3) return i.reshape((-1, i.shape[2], i.shape[3])) mu = _r(outs[:, :, :, 0:k])
w_t = ss6.sum(axis=1) attinp_h2, attgate_h2 = att_to_h2.proj(w_t) attinp_h3, attgate_h3 = att_to_h3.proj(w_t) h2_t = cell2.step(xinp_h2_t + h1inp_h2 + attinp_h2, xgate_h2_t + h1gate_h2 + attgate_h2, h2_tm1) h2inp_h3, h2gate_h3 = h2_to_h3.proj(h2_t) h3_t = cell3.step(xinp_h3_t + h1inp_h3 + h2inp_h3 + attinp_h3, xgate_h3_t + h1gate_h3 + h2gate_h3 + attgate_h3, h3_tm1) return h1_t, h2_t, h3_t, k_t, w_t init_x = as_shared(np_zeros((minibatch_size, n_feats))) srng = RandomStreams(1999) # Used to calculate stopping heuristic from sections 5.3 u_max = 0. * tensor.arange(c_sym.shape[0]) + c_sym.shape[0] u_max = u_max.dimshuffle('x', 'x', 0) u_max = tensor.cast(u_max, theano.config.floatX) def sample_step(x_tm1, h1_tm1, h2_tm1, h3_tm1, k_tm1, w_tm1, ctx): theano.printing.Print("x_tm1.shape")(x_tm1.shape) pt1 = theano_one_hot(x_tm1[:, 0], n_classes=n_softmax1) theano.printing.Print("pt1.shape")(pt1.shape) pt2 = theano_one_hot(x_tm1[:, 1], n_classes=n_softmax2) theano.printing.Print("pt2.shape")(pt2.shape) x_tm1 = tensor.concatenate((pt1, pt2), axis=-1) theano.printing.Print("x_tm1.shape")(x_tm1.shape)