border_mode = (conv_size1 - 1, 0) conv1 = conv2d(inpt, w_conv1, subsample=(2, 1), border_mode=border_mode) conv1 = conv1 + b_conv1.dimshuffle('x', 0, 'x', 'x') theano.printing.Print("conv1.shape")(conv1.shape) border_mode = (conv_size2 - 1, 0) conv2 = conv2d(conv1, w_conv2, subsample=(2, 1), border_mode=border_mode) conv2 = relu(conv2 + b_conv2.dimshuffle('x', 0, 'x', 'x')) theano.printing.Print("conv2.shape")(conv2.shape) # Last axis is 1 conv_out = conv2[:, :, :, 0].dimshuffle(2, 0, 1) theano.printing.Print("conv_out.shape")(conv_out.shape) conv_h1, convgate_h1 = conv_to_h1.proj(conv_out) conv_h2, convgate_h2 = conv_to_h2.proj(conv_out) u = tensor.arange(c_sym.shape[0]).dimshuffle('x', 'x', 0) u = tensor.cast(u, theano.config.floatX) def calc_phi(k_t, a_t, b_t, u_c): a_t = a_t.dimshuffle(0, 1, 'x') b_t = b_t.dimshuffle(0, 1, 'x') ss1 = (k_t.dimshuffle(0, 1, 'x') - u_c) ** 2 ss2 = -b_t * ss1 ss3 = a_t * tensor.exp(ss2) ss4 = ss3.sum(axis=1) return ss4 def step(xinp_h1_t, xgate_h1_t,
params += [h1_to_att_a, h1_to_att_b, h1_to_att_k] params += [h1_to_outs, h2_to_outs, h3_to_outs] pred_proj, = make_weights(n_v_proj, [n_pred_proj], random_state) pred_b, = make_biases([n_pred_proj]) params += [pred_proj, pred_b] biases += [pred_b] inpt = X_sym[:-1] target = X_sym[1:] mask = X_mask_sym[1:] context = c_sym * c_mask_sym.dimshuffle(0, 1, 'x') inp_h1, inpgate_h1 = inp_to_h1.proj(inpt) inp_h2, inpgate_h2 = inp_to_h2.proj(inpt) inp_h3, inpgate_h3 = inp_to_h3.proj(inpt) u = tensor.arange(c_sym.shape[0]).dimshuffle('x', 'x', 0) u = tensor.cast(u, theano.config.floatX) def calc_phi(k_t, a_t, b_t, u_c): a_t = a_t.dimshuffle(0, 1, 'x') b_t = b_t.dimshuffle(0, 1, 'x') ss1 = (k_t.dimshuffle(0, 1, 'x') - u_c)**2 ss2 = -b_t * ss1 ss3 = a_t * tensor.exp(ss2) ss4 = ss3.sum(axis=1) return ss4
params += [h1_to_att_a, h1_to_att_b, h1_to_att_k] params += [h1_to_outs, h2_to_outs, h3_to_outs] pred_proj, = make_weights(n_v_proj, [n_pred_proj], random_state) pred_b, = make_biases([n_pred_proj]) params += [pred_proj, pred_b] biases += [pred_b] inpt = X_sym[:-1] target = X_sym[1:] mask = X_mask_sym[1:] context = c_sym * c_mask_sym.dimshuffle(0, 1, 'x') inp_h1, inpgate_h1 = inp_to_h1.proj(inpt) inp_h2, inpgate_h2 = inp_to_h2.proj(inpt) inp_h3, inpgate_h3 = inp_to_h3.proj(inpt) u = tensor.arange(c_sym.shape[0]).dimshuffle('x', 'x', 0) u = tensor.cast(u, theano.config.floatX) def calc_phi(k_t, a_t, b_t, u_c): a_t = a_t.dimshuffle(0, 1, 'x') b_t = b_t.dimshuffle(0, 1, 'x') ss1 = (k_t.dimshuffle(0, 1, 'x') - u_c) ** 2 ss2 = -b_t * ss1 ss3 = a_t * tensor.exp(ss2) ss4 = ss3.sum(axis=1) return ss4
h2_to_outs, = make_weights(n_hid, [n_hid], random_state) h3_to_outs, = make_weights(n_hid, [n_hid], random_state) params += [h1_to_outs, h2_to_outs, h3_to_outs] # 2 * for mag and phase v_outs_to_corr_outs, = make_weights(n_v_hid, [1], random_state) corr_outs_to_final_outs, = make_weights(n_hid, [2 * n_density], random_state) params += [v_outs_to_corr_outs, corr_outs_to_final_outs] inpt = X_sym[:-1] target = X_sym[1:] mask = X_mask_sym[1:] context = c_sym * c_mask_sym.dimshuffle(0, 1, 'x') inp_h1, inpgate_h1 = inp_to_h1.proj(inpt) inp_h2, inpgate_h2 = inp_to_h2.proj(inpt) inp_h3, inpgate_h3 = inp_to_h3.proj(inpt) u = tensor.arange(c_sym.shape[0]).dimshuffle('x', 'x', 0) u = tensor.cast(u, theano.config.floatX) def calc_phi(k_t, a_t, b_t, u_c): a_t = a_t.dimshuffle(0, 1, 'x') b_t = b_t.dimshuffle(0, 1, 'x') ss1 = (k_t.dimshuffle(0, 1, 'x') - u_c)**2 ss2 = -b_t * ss1 ss3 = a_t * tensor.exp(ss2) ss4 = ss3.sum(axis=1) return ss4
border_mode = (conv_size1 - 1, 0) conv1 = conv2d(inpt, w_conv1, subsample=(2, 1), border_mode=border_mode) conv1 = conv1 + b_conv1.dimshuffle('x', 0, 'x', 'x') theano.printing.Print("conv1.shape")(conv1.shape) border_mode = (conv_size2 - 1, 0) conv2 = conv2d(conv1, w_conv2, subsample=(2, 1), border_mode=border_mode) conv2 = relu(conv2 + b_conv2.dimshuffle('x', 0, 'x', 'x')) theano.printing.Print("conv2.shape")(conv2.shape) # Last axis is 1 conv_out = conv2[:, :, :, 0].dimshuffle(2, 0, 1) theano.printing.Print("conv_out.shape")(conv_out.shape) conv_h1, convgate_h1 = conv_to_h1.proj(conv_out) conv_h2, convgate_h2 = conv_to_h2.proj(conv_out) u = tensor.arange(c_sym.shape[0]).dimshuffle('x', 'x', 0) u = tensor.cast(u, theano.config.floatX) def calc_phi(k_t, a_t, b_t, u_c): a_t = a_t.dimshuffle(0, 1, 'x') b_t = b_t.dimshuffle(0, 1, 'x') ss1 = (k_t.dimshuffle(0, 1, 'x') - u_c)**2 ss2 = -b_t * ss1 ss3 = a_t * tensor.exp(ss2) ss4 = ss3.sum(axis=1) return ss4 def step(xinp_h1_t, xgate_h1_t, xinp_h2_t, xgate_h2_t, h1_tm1, h2_tm1,
softmax2_proj, = make_weights(n_proj, [n_softmax2], random_state) softmax2_b, = make_biases([n_softmax2]) params += [softmax1_proj, softmax1_b, softmax2_proj, softmax2_b] biases += [softmax1_b, softmax2_b] inpt = X_sym[:-1] target = X_sym[1:] mask = X_mask_sym[1:] context = c_sym * c_mask_sym.dimshuffle(0, 1, 'x') pt1 = theano_one_hot(inpt[:, :, 0], n_classes=n_softmax1) pt2 = theano_one_hot(inpt[:, :, 1], n_classes=n_softmax2) inpt = tensor.concatenate((pt1, pt2), axis=-1) inpt_reduced = inpt.dot(inp_proj) + inp_b inp_h1, inpgate_h1 = inp_to_h1.proj(inpt_reduced) inp_h2, inpgate_h2 = inp_to_h2.proj(inpt_reduced) inp_h3, inpgate_h3 = inp_to_h3.proj(inpt_reduced) u = tensor.arange(c_sym.shape[0]).dimshuffle('x', 'x', 0) u = tensor.cast(u, theano.config.floatX) def calc_phi(k_t, a_t, b_t, u_c): a_t = a_t.dimshuffle(0, 1, 'x') b_t = b_t.dimshuffle(0, 1, 'x') ss1 = (k_t.dimshuffle(0, 1, 'x') - u_c)**2 ss2 = -b_t * ss1 ss3 = a_t * tensor.exp(ss2) ss4 = ss3.sum(axis=1) return ss4