def out_step(shuff_inpt_tm1, vinp_t, pred_fm1, v_h1_tm1): j_t = concatenate((shuff_inpt_tm1, vinp_t, pred_fm1.dimshuffle(0, 'x')), axis=-1) theano.printing.Print("j_t.shape")(j_t.shape) vinp_h1_t, vgate_h1_t = outs_to_v_h1.proj(j_t) v_h1_t = v_cell1.step(vinp_h1_t, vgate_h1_t, v_h1_tm1) theano.printing.Print("v_h1_t.shape")(v_h1_t.shape) pred_f = v_h1_t.dot(pred_proj) + pred_b theano.printing.Print("pred_f.shape")(pred_f.shape) return pred_f[:, 0], v_h1_t
def out_step(shuff_inpt_tm1, vinp_t, pred_fm1, v_h1_tm1): j_t = concatenate( (shuff_inpt_tm1, vinp_t, pred_fm1.dimshuffle(0, 'x')), axis=-1) theano.printing.Print("j_t.shape")(j_t.shape) vinp_h1_t, vgate_h1_t = outs_to_v_h1.proj(j_t) v_h1_t = v_cell1.step(vinp_h1_t, vgate_h1_t, v_h1_tm1) theano.printing.Print("v_h1_t.shape")(v_h1_t.shape) pred_f = v_h1_t.dot(pred_proj) + pred_b theano.printing.Print("pred_f.shape")(pred_f.shape) return pred_f[:, 0], v_h1_t
def sample_out_step(x_tm1_shuf, vinp_t, pred_fm1, v_h1_tm1): j_t = concatenate( (x_tm1_shuf, vinp_t, pred_fm1.dimshuffle(0, 'x')), axis=-1) theano.printing.Print("j_t.shape")(j_t.shape) vinp_h1_t, vgate_h1_t = outs_to_v_h1.proj(j_t) v_h1_t = v_cell1.step(vinp_h1_t, vgate_h1_t, v_h1_tm1) theano.printing.Print("v_h1_t.shape")(v_h1_t.shape) pred_f = v_h1_t.dot(pred_proj) + pred_b # clip MSE estimate... not perfect #pred_f = tensor.clip(pred_f, 0 + 0.01, n_bins - 0.01) #pred_f = tensor.floor(pred_f) return pred_f[:, 0], v_h1_t
def sample_out_step(x_tm1_shuf, vinp_t, pred_fm1, v_h1_tm1): j_t = concatenate((x_tm1_shuf, vinp_t, pred_fm1.dimshuffle(0, 'x')), axis=-1) theano.printing.Print("j_t.shape")(j_t.shape) vinp_h1_t, vgate_h1_t = outs_to_v_h1.proj(j_t) v_h1_t = v_cell1.step(vinp_h1_t, vgate_h1_t, v_h1_tm1) theano.printing.Print("v_h1_t.shape")(v_h1_t.shape) pred_f = sigmoid(v_h1_t.dot(pred_proj) + pred_b) pred_f = sample_binomial(pred_f, n_bins, srng) theano.printing.Print("pred_f.shape")(pred_f.shape) return pred_f[:, 0], v_h1_t
def sample_out_step(x_tm1_shuf, vinp_f, pred_fm1, v_h1_fm1): theano.printing.Print("pred_fm1.shape")(pred_fm1.shape) samp_fm1 = sample_softmax(pred_fm1, srng).dimshuffle(0, 'x') theano.printing.Print("samp_fm1.shape")(samp_fm1.shape) j_f = concatenate((x_tm1_shuf, vinp_f, samp_fm1), axis=-1) theano.printing.Print("j_f.shape")(j_f.shape) vinp_h1_f, vgate_h1_f = outs_to_v_h1.proj(j_f) v_h1_f = v_cell1.step(vinp_h1_f, vgate_h1_f, v_h1_fm1) theano.printing.Print("v_h1_f.shape")(v_h1_f.shape) pred_f = v_h1_f.dot(pred_proj) + pred_b theano.printing.Print("pred_f.shape")(pred_f.shape) return pred_f, v_h1_f, samp_fm1
def sample_out_step(x_tm1_shuf, vinp_t, pred_fm1, v_h1_tm1): j_t = concatenate((x_tm1_shuf, vinp_t, pred_fm1.dimshuffle(0, 'x')), axis=-1) theano.printing.Print("j_t.shape")(j_t.shape) vinp_h1_t, vgate_h1_t = outs_to_v_h1.proj(j_t) v_h1_t = v_cell1.step(vinp_h1_t, vgate_h1_t, v_h1_tm1) theano.printing.Print("v_h1_t.shape")(v_h1_t.shape) pred_f = v_h1_t.dot(pred_proj) + pred_b # clip MSE estimate... not perfect #pred_f = tensor.clip(pred_f, 0 + 0.01, n_bins - 0.01) #pred_f = tensor.floor(pred_f) return pred_f[:, 0], v_h1_t
(shuff_inpt_shapes[2], shuff_inpt_shapes[1] * shuff_inpt_shapes[0], 1)) theano.printing.Print("shuff_inpt.shape")(shuff_inpt.shape) theano.printing.Print("vinp.shape")(vinp.shape) # input from previous time, pred from previous feature true_f = tensor.zeros_like(target) # Target *just* offset in frequency so we can use it true_f = tensor.set_subtensor(true_f[:, :, 1:], target[:, :, :-1]) true_f = true_f.dimshuffle(2, 0, 1) true_f_shapes = true_f.shape true_f = true_f.reshape( (true_f_shapes[0], true_f_shapes[1] * true_f_shapes[2], 1)) theano.printing.Print("shuff_inpt.shape")(shuff_inpt.shape) theano.printing.Print("vinp.shape")(shuff_inpt.shape) theano.printing.Print("true_f.shape")(true_f.shape) j = concatenate((shuff_inpt, vinp, true_f), axis=-1) vinp_h1, vgate_h1 = outs_to_v_h1.proj(j) def out_step(vinp_h1_t, vinpgate_h1_t, v_h1_tm1): v_h1_t = v_cell1.step(vinp_h1_t, vinpgate_h1_t, v_h1_tm1) return v_h1_t init_hidden = tensor.zeros((shuff_inpt.shape[1], n_v_proj), dtype=theano.config.floatX) theano.printing.Print("init_hidden.shape")(init_hidden.shape) v_h1, updates = theano.scan(fn=out_step, sequences=[vinp_h1, vgate_h1], outputs_info=[init_hidden]) pre_pred = v_h1.dot(pred_proj) + pred_b pre_pred = pre_pred.dimshuffle(1, 0, 2) shp = pre_pred.shape
1)) theano.printing.Print("shuff_inpt.shape")(shuff_inpt.shape) theano.printing.Print("vinp.shape")(vinp.shape) # input from previous time, pred from previous feature true_f = tensor.zeros_like(target) # Target *just* offset in frequency so we can use it true_f = tensor.set_subtensor(true_f[:, :, 1:], target[:, :, :-1]) true_f = true_f.dimshuffle(2, 0, 1) true_f_shapes = true_f.shape true_f = true_f.reshape((true_f_shapes[0], true_f_shapes[1] * true_f_shapes[2], 1)) theano.printing.Print("shuff_inpt.shape")(shuff_inpt.shape) theano.printing.Print("vinp.shape")(shuff_inpt.shape) theano.printing.Print("true_f.shape")(true_f.shape) j = concatenate((shuff_inpt, vinp, true_f), axis=-1) vinp_h1, vgate_h1 = outs_to_v_h1.proj(j) def out_step(vinp_h1_t, vinpgate_h1_t, v_h1_tm1): v_h1_t = v_cell1.step(vinp_h1_t, vinpgate_h1_t, v_h1_tm1) return v_h1_t init_hidden = tensor.zeros((shuff_inpt.shape[1], n_v_proj), dtype=theano.config.floatX) theano.printing.Print("init_hidden.shape")(init_hidden.shape) v_h1, updates = theano.scan( fn=out_step, sequences=[vinp_h1, vgate_h1], outputs_info=[init_hidden]) pre_pred = v_h1.dot(pred_proj) + pred_b pre_pred = pre_pred.dimshuffle(1, 0, 2) shp = pre_pred.shape
def sample_step(x_tm1, h1_tm1, h2_tm1, h3_tm1, k_tm1, w_tm1, ctx): xinp_h1_t, xgate_h1_t = inp_to_h1.proj(x_tm1) xinp_h2_t, xgate_h2_t = inp_to_h2.proj(x_tm1) xinp_h3_t, xgate_h3_t = inp_to_h3.proj(x_tm1) attinp_h1, attgate_h1 = att_to_h1.proj(w_tm1) h1_t = cell1.step(xinp_h1_t + attinp_h1, xgate_h1_t + attgate_h1, h1_tm1) h1inp_h2, h1gate_h2 = h1_to_h2.proj(h1_t) h1inp_h3, h1gate_h3 = h1_to_h3.proj(h1_t) a_t = h1_t.dot(h1_to_att_a) b_t = h1_t.dot(h1_to_att_b) k_t = h1_t.dot(h1_to_att_k) a_t = tensor.exp(a_t) b_t = tensor.exp(b_t) k_t = k_tm1 + tensor.exp(k_t) ss_t = calc_phi(k_t, a_t, b_t, u) # calculate and return stopping criteria sh_t = calc_phi(k_t, a_t, b_t, u_max) ss5 = ss_t.dimshuffle(0, 1, 'x') ss6 = ss5 * ctx.dimshuffle(1, 0, 2) w_t = ss6.sum(axis=1) attinp_h2, attgate_h2 = att_to_h2.proj(w_t) attinp_h3, attgate_h3 = att_to_h3.proj(w_t) h2_t = cell2.step(xinp_h2_t + h1inp_h2 + attinp_h2, xgate_h2_t + h1gate_h2 + attgate_h2, h2_tm1) h2inp_h3, h2gate_h3 = h2_to_h3.proj(h2_t) h3_t = cell3.step(xinp_h3_t + h1inp_h3 + h2inp_h3 + attinp_h3, xgate_h3_t + h1gate_h3 + h2gate_h3 + attgate_h3, h3_tm1) out_t = h1_t.dot(h1_to_outs) + h2_t.dot(h2_to_outs) + h3_t.dot( h3_to_outs) theano.printing.Print("out_t.shape")(out_t.shape) out_t_shape = out_t.shape x_tm1_shuf = x_tm1.dimshuffle(1, 0, 'x') vinp_t = out_t.dimshuffle(1, 0, 'x') theano.printing.Print("x_tm1_shuf.shape")(x_tm1_shuf.shape) theano.printing.Print("vinp_t.shape")(vinp_t.shape) init_hidden = tensor.zeros((x_tm1_shuf.shape[1], n_v_proj), dtype=theano.config.floatX) j_t = concatenate((x_tm1_shuf, vinp_t), axis=-1) vinp_h1_t, vinpgate_h1_t = outs_to_v_h1.proj(j_t) def sample_out_step(vinp_h1_f, vinpgate_h1_f, v_h1_tm1): v_h1_f = v_cell1.step(vinp_h1_f, vinpgate_h1_f, v_h1_tm1) return v_h1_f pre_pred_t, isupdates = theano.scan( fn=sample_out_step, sequences=[vinp_h1_t, vinpgate_h1_t], outputs_info=[init_hidden]) #pred_t = sigmoid(pre_pred_t) #x_t = sample_binomial(pred_t, n_bins, srng) # MSE pred_t = pre_pred_t.dot(pred_proj) + pred_b x_t = pred_t return x_t, h1_t, h2_t, h3_t, k_t, w_t, ss_t, sh_t, isupdates
shp = vinp.shape shuff_inpt_shapes = inpt.shape theano.printing.Print("inpt.shape")(inpt.shape) shuff_inpt = inpt.dimshuffle(2, 1, 0) theano.printing.Print("shuff_inpt.shape")(shuff_inpt.shape) shuff_inpt = shuff_inpt.reshape( (shuff_inpt_shapes[2], shuff_inpt_shapes[1] * shuff_inpt_shapes[0], 1)) theano.printing.Print("shuff_inpt.shape")(shuff_inpt.shape) theano.printing.Print("vinp.shape")(vinp.shape) # input from previous time, pred from previous feature true_f = tensor.zeros_like(target) theano.printing.Print("shuff_inpt.shape")(shuff_inpt.shape) theano.printing.Print("vinp.shape")(shuff_inpt.shape) j = concatenate((shuff_inpt, vinp), axis=-1) vinp_h1, vgate_h1 = outs_to_v_h1.proj(j) def out_step(vinp_h1_t, vinpgate_h1_t, v_h1_tm1): v_h1_t = v_cell1.step(vinp_h1_t, vinpgate_h1_t, v_h1_tm1) return v_h1_t init_hidden = tensor.zeros((shuff_inpt.shape[1], n_v_proj), dtype=theano.config.floatX) theano.printing.Print("init_hidden.shape")(init_hidden.shape) v_h1, updates = theano.scan(fn=out_step, sequences=[vinp_h1, vgate_h1], outputs_info=[init_hidden]) pre_pred = v_h1.dot(pred_proj) + pred_b theano.printing.Print("pre_pred.shape")(pre_pred.shape) pre_pred = pre_pred.dimshuffle(1, 0, 2)
def sample_step(x_tm1, h1_tm1, h2_tm1, h3_tm1, k_tm1, w_tm1, ctx): xinp_h1_t, xgate_h1_t = inp_to_h1.proj(x_tm1) xinp_h2_t, xgate_h2_t = inp_to_h2.proj(x_tm1) xinp_h3_t, xgate_h3_t = inp_to_h3.proj(x_tm1) attinp_h1, attgate_h1 = att_to_h1.proj(w_tm1) h1_t = cell1.step(xinp_h1_t + attinp_h1, xgate_h1_t + attgate_h1, h1_tm1) h1inp_h2, h1gate_h2 = h1_to_h2.proj(h1_t) h1inp_h3, h1gate_h3 = h1_to_h3.proj(h1_t) a_t = h1_t.dot(h1_to_att_a) b_t = h1_t.dot(h1_to_att_b) k_t = h1_t.dot(h1_to_att_k) a_t = tensor.exp(a_t) b_t = tensor.exp(b_t) k_t = k_tm1 + tensor.exp(k_t) ss_t = calc_phi(k_t, a_t, b_t, u) # calculate and return stopping criteria sh_t = calc_phi(k_t, a_t, b_t, u_max) ss5 = ss_t.dimshuffle(0, 1, 'x') ss6 = ss5 * ctx.dimshuffle(1, 0, 2) w_t = ss6.sum(axis=1) attinp_h2, attgate_h2 = att_to_h2.proj(w_t) attinp_h3, attgate_h3 = att_to_h3.proj(w_t) h2_t = cell2.step(xinp_h2_t + h1inp_h2 + attinp_h2, xgate_h2_t + h1gate_h2 + attgate_h2, h2_tm1) h2inp_h3, h2gate_h3 = h2_to_h3.proj(h2_t) h3_t = cell3.step(xinp_h3_t + h1inp_h3 + h2inp_h3 + attinp_h3, xgate_h3_t + h1gate_h3 + h2gate_h3 + attgate_h3, h3_tm1) out_t = h1_t.dot(h1_to_outs) + h2_t.dot(h2_to_outs) + h3_t.dot( h3_to_outs) theano.printing.Print("out_t.shape")(out_t.shape) out_t_shape = out_t.shape vinp_t = out_t.dimshuffle(1, 0, 'x') theano.printing.Print("vinp_t.shape")(vinp_t.shape) theano.printing.Print("x_tm1.shape")(x_tm1.shape) shuff_inpt_t = x_tm1.dimshuffle(1, 0, 'x') theano.printing.Print("shuff_inpt_t.shape")(shuff_inpt_t.shape) j_t = concatenate((shuff_inpt_t, vinp_t), axis=-1) def sample_out_step(j_t, v_h1_tm1): vinp_h1_t, vgate_h1_t = outs_to_v_h1.proj(j_t) v_h1_t = v_cell1.step(vinp_h1_t, vgate_h1_t, v_h1_tm1) return v_h1_t init_corr_out_t = tensor.zeros((vinp_t.shape[1], n_v_proj)) theano.printing.Print("init_corr_out_t.shape")(init_corr_out_t.shape) corr_out_t, isupdates = theano.scan(fn=sample_out_step, sequences=[j_t], outputs_info=[init_corr_out_t]) theano.printing.Print("corr_out_t.shape")(corr_out_t.shape) corr_out_t = corr_out_t.dimshuffle(1, 0, 2) theano.printing.Print("corr_out_t.shape")(corr_out_t.shape) shp = corr_out_t.shape corr_out_t = corr_out_t.reshape((shp[0], -1)) theano.printing.Print("corr_out_t.shape")(corr_out_t.shape) pre_pred_t = corr_out_t.dot(pred_proj) + pred_b theano.printing.Print("pre_pred_t.shape")(pre_pred_t.shape) #pred_t = sigmoid(pre_pred_t) #x_t = sample_binomial(pred_t, n_bins, srng) # MSE pred_t = pre_pred_t x_t = pred_t theano.printing.Print("pred_t.shape")(pred_t.shape) return x_t, h1_t, h2_t, h3_t, k_t, w_t, ss_t, sh_t, isupdates