def add_gate_params(gate, gate_name): """ Convenience function for adding layer parameters from a Gate instance. """ return (parameter(init_array(gate.W_in, (input_feature_size, num_units)), name=gate_name+".W"), parameter(init_array(gate.W_hid, (num_units, num_units)), name=gate_name+".W"), parameter(init_array(gate.b, (1, num_units)), name=gate_name+".b"), gate.nonlinearity)
def __init__(self, input_shapes, axis=1, name=None, M=nn.IIDGaussian(std=0.001), N=nn.IIDGaussian(std=0.001), b=nn.Constant(0)): assert axis >= 1 self.axis = axis name = "unnamed" if name is None else name self.y_shape, self.u_shape = input_shapes self.y_dim = int(np.prod(self.y_shape[self.axis - 1:])) self.u_dim, = self.u_shape self.M = nn.parameter(nn.init_array( M, (self.y_dim, self.y_dim, self.u_dim)), name=name + ".M") self.N = nn.parameter(nn.init_array(N, (self.y_dim, self.u_dim)), name=name + ".N") if b is None: self.b = None else: self.b = nn.parameter(nn.init_array(b, (self.y_dim, )), name=name + ".b") # TODO: not regularizable
def __init__(self, input_size, output_size, name="", weight_init=HeUniform(1.0), bias_init=Constant(0)): """ Initialize an Feedforward cell. """ self.W = parameter(init_array(weight_init, (input_size, output_size)), name=name + ".W") self.b = parameter(init_array(bias_init, (1, output_size)), name=name + ".b")
def __init__(self, input_feature_size, input_time_size, num_units, weight_init=HeUniform(), activation=cgt.sigmoid, cell_out_init=IIDUniform(-0.1, 0.1), hid_out_init=IIDUniform(-0.1, 0.1), #cell_out_init=Constant(0.0), #hid_out_init=Constant(0.0), backwards=False): ingate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation) forgetgate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation) cell = Gate(W_cell=None, nonlinearity=cgt.tanh) outgate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation) self.nonlinearity = activation self.num_units = num_units self.backwards = backwards self.timesteps = input_time_size def add_gate_params(gate, gate_name): """ Convenience function for adding layer parameters from a Gate instance. """ return (parameter(init_array(gate.W_in, (input_feature_size, num_units)), name=None), parameter(init_array(gate.W_hid, (num_units, num_units)), name=None), parameter(init_array(gate.b, (1, num_units)), name=None), gate.nonlinearity) # Add in parameters from the supplied Gate instances (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate, self.nonlinearity_ingate) = add_gate_params(ingate, 'ingate') (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate, self.nonlinearity_forgetgate) = add_gate_params(forgetgate, 'forgetgate') (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell, self.nonlinearity_cell) = add_gate_params(cell, 'cell') (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate, self.nonlinearity_outgate) = add_gate_params(outgate, 'outgate') self.hid_init = parameter(init_array(hid_out_init, (1, num_units)), name=None) self.cell_init = parameter(init_array(cell_out_init, (1, num_units)), name=None) # Stack input weight matrices into a (num_inputs, 4*num_units) #checks out # matrix, which speeds up computation self.W_in_stacked = cgt.concatenate( [self.W_in_to_ingate, self.W_in_to_forgetgate, self.W_in_to_cell, self.W_in_to_outgate], axis=1) # Same for hidden weight matrices self.W_hid_stacked = cgt.concatenate( [self.W_hid_to_ingate, self.W_hid_to_forgetgate, self.W_hid_to_cell, self.W_hid_to_outgate], axis=1) # Stack biases into a (4*num_units) vector self.b_stacked = cgt.concatenate( [self.b_ingate, self.b_forgetgate, self.b_cell, self.b_outgate], axis=1) self.cell_prev = None self.hid_prev = None
def make_prediction(self, max_label_length, ground_labels_basis_btc): context_i_bf = parameter(init_array(IIDGaussian(0.1), (self.batch_size, self.feature_size)), name=None) state_i_bf = parameter(init_array(IIDGaussian(0.1), (self.batch_size, self.decoder_size)), name=None) char_list = [] for iter_step in range(0, max_label_length): #Is this right? prev_out_bc = ground_labels_basis_btc[:, iter_step, :] state_i_bf = self.get_decoder_state(context_i_bf, prev_out_bc, state_i_bf) context_i_bf = self.get_context(state_i_bf) this_character_dist = self.get_character_distribution(state_i_bf, context_i_bf) char_list.append(cgt.argmax(this_character_dist, axis=1)) final = cgt.dimshuffle(cgt.stack(char_list), [1, 0]) return final
def __init__(self, input_size, output_size, name="", weight_init=HeUniform(1.0), bias_init=Constant(0)): """ Initialize an Feedforward cell. """ self.W = parameter(init_array(weight_init, (input_size, output_size)), name=name + ".W") self.b = parameter(init_array(bias_init, (1, output_size)), name=name + '.b')
def __init__(self, input_size, rnn_size, name="", weight_init=HeUniform(1.0)): """ lstm cell """ # TODO: add bias # forget gate weights self.W_xf = parameter(init_array(weight_init, (input_size, rnn_size)), name=name + ".W_xf") self.W_hf = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name + "W_hf") # input gate weights self.W_xi = parameter(init_array(weight_init, (input_size, rnn_size)), name=name + ".W_xi") self.W_hi = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name + "W_hi") # output gate weights self.W_xo = parameter(init_array(weight_init, (input_size, rnn_size)), name=name + ".W_xo") self.W_ho = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name + "W_ho") # candidate value weights self.W_xc = parameter(init_array(weight_init, (input_size, rnn_size)), name=name + ".W_xc") self.W_hc = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name + "W_hc")
def __init__(self, input_feature_size, input_time_size, num_units, weight_init=XavierNormal(), activation=cgt.sigmoid, hid_out_init=IIDUniform(0, 1), backwards=False): self.num_units = num_units self.timesteps = input_time_size self.num_batches = None self.backwards = backwards self.input_feature_size = input_feature_size resetgate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=None, nonlinearity=activation) Gate(W_in=weight_init, W_hid=weight_init, W_cell=None, nonlinearity=activation) updategate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=None, nonlinearity=activation) hidden_update = Gate(W_in=weight_init, W_hid=weight_init, W_cell=None, nonlinearity=cgt.tanh) def add_gate_params(gate, gate_name): """ Convenience function for adding layer parameters from a Gate instance. """ return (parameter(init_array(gate.W_in, (input_feature_size, num_units)), name=gate_name+".W"), parameter(init_array(gate.W_hid, (num_units, num_units)), name=gate_name+".W"), parameter(init_array(gate.b, (1, num_units)), name=gate_name+".b"), gate.nonlinearity) # Add in all parameters from gates (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate, self.nonlinearity_updategate) = add_gate_params(updategate, 'updategate') (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate, self.nonlinearity_resetgate) = add_gate_params(resetgate, 'resetgate') (self.W_in_to_hidden_update, self.W_hid_to_hidden_update, self.b_hidden_update, self.nonlinearity_hid) = add_gate_params(hidden_update, 'hidden_update') self.hid_init = parameter(init_array(hid_out_init, (1, num_units)), name='.hid_out_init') self.hid_out = None
def __init__(self, input_size, hidden_size, name="", weight_init=HeUniform(1.0)): """ Initialize an RNN cell """ # input to hidden self.W_xh = parameter(init_array(weight_init, (input_size, hidden_size)), name=name+".W_xh") # hidden to hidden self.W_hh = parameter(init_array(weight_init, (hidden_size, hidden_size)), name=name+".W_hh") # hidden to output self.W_ho = parameter(init_array(weight_init, (hidden_size, hidden_size)), name=name+".W_ho")
def test_get_decoder_state(): batch_size = 32 feat_t_steps = 20 feat_num_features = 42 num_out_classes = 28 num_out_classes_true = num_out_classes + 2 # Start, end, are added decoder_size = 50 tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features)) tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_num_features), (batch_size, feat_num_features)) tau3 = np.reshape(np.random.normal(0.1, 0.2, batch_size*num_out_classes_true), (batch_size, num_out_classes_true)) feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features)) s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes, decoder_size=decoder_size, feature_size=feat_num_features) context_bf = cgt.matrix(fixed_shape=(batch_size, feat_num_features)) prev_out_bc = cgt.matrix(fixed_shape=(batch_size, num_out_classes_true)) state_i_bf = nn.parameter(nn.init_array(nn.IIDGaussian(0.1), (batch_size, decoder_size)), name="decoder_init") decoder_out = s.get_decoder_state(context_bf, prev_out_bc, state_i_bf) decode_fun = cgt.function([feats, context_bf, prev_out_bc], [decoder_out]) m = decode_fun(tau, tau2, tau3)[0] assert m.shape == (batch_size, decoder_size) assert np.mean(m) < 1.0
def __init__(self, num_units, input_feature_size, input_time_size, activation=rectify, backwards=False, weight_init=XavierNormal(), hid_out_init=IIDUniform(0, 1)): self.in_to_hid = Affine(input_size=input_feature_size, output_size=num_units, weight_init=weight_init) self.hid_to_hid = Affine(input_size=num_units, output_size=num_units, weight_init=weight_init) self.activation = activation self.hid_init = parameter(init_array(hid_out_init, (1, num_units)), name='.hid_out_init') self.timesteps = input_time_size self.backwards = backwards
def get_train_objective(self, max_label_length, ground_labels_basis_btc): context_i_bf = parameter(init_array(IIDUniform(-0.1, 0.1), (self.batch_size, self.feature_size)), name=None) state_i_bf = parameter(init_array(IIDUniform(-0.1, 0.1), (self.batch_size, self.decoder_size)), name=None) prev_out_bc = cgt.zeros((self.batch_size, self.true_number_classes), dtype='i8') #+ self.start_token_index log_probs = None for iter_step in range(0, max_label_length): state_i_bf = self.get_decoder_state(context_i_bf, prev_out_bc, state_i_bf) context_i_bf = self.get_context(state_i_bf) this_character_dist_bc = self.get_character_distribution(state_i_bf, context_i_bf) prev_out_bc = ground_labels_basis_btc[:, iter_step, :] log_probs_pre = prev_out_bc * this_character_dist_bc log_probs_pre = cgt.log(cgt.sum(log_probs_pre, axis=1)) if log_probs is None: log_probs = cgt.sum(log_probs_pre) else: log_probs += cgt.sum(log_probs_pre) log_probs = -log_probs return log_probs
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim)) a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n*adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def __init__(self, input_size, hidden_size, name="", weight_init=HeUniform(1.0)): """ Initialize an RNN cell """ # input to hidden self.W_xh = parameter(init_array(weight_init, (input_size, hidden_size)), name=name + ".W_xh") # hidden to hidden self.W_hh = parameter(init_array(weight_init, (hidden_size, hidden_size)), name=name + ".W_hh") # hidden to output self.W_ho = parameter(init_array(weight_init, (hidden_size, hidden_size)), name=name + ".W_ho")
def __init__(self, input_feature_size, input_time_size, num_units, weight_init=XavierNormal(), activation=rectify, cell_out_init=IIDUniform(0, 1), hid_out_init=IIDUniform(0, 1), backwards=False): ingate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation) forgetgate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation) cell = Gate(W_cell=None, nonlinearity=cgt.tanh) outgate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation) self.nonlinearity = activation self.num_units = num_units self.backwards = backwards self.timesteps = input_time_size def add_gate_params(gate, gate_name): """ Convenience function for adding layer parameters from a Gate instance. """ return (parameter(init_array(gate.W_in, (input_feature_size, num_units)), name=gate_name+".W"), parameter(init_array(gate.W_hid, (num_units, num_units)), name=gate_name+".W"), parameter(init_array(gate.b, (1, num_units)), name=gate_name+".b"), gate.nonlinearity) # Add in parameters from the supplied Gate instances (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate, self.nonlinearity_ingate) = add_gate_params(ingate, 'ingate') (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate, self.nonlinearity_forgetgate) = add_gate_params(forgetgate, 'forgetgate') (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell, self.nonlinearity_cell) = add_gate_params(cell, 'cell') (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate, self.nonlinearity_outgate) = add_gate_params(outgate, 'outgate') self.hid_init = parameter(init_array(hid_out_init, (1, num_units)), name='.hid_out_init') self.cell_init = parameter(init_array(cell_out_init, (1, num_units)), name='.cell_out_init')
def __init__(self, nn_input_btf, num_out_classes, get_features_fun=None, feature_size=40, decoder_size=40, w_init=IIDUniform(-0.1, 0.1)): self.start_token_index = num_out_classes self.end_token_index = self.start_token_index + 1 self.true_number_classes = num_out_classes + 2 # add dims for start and end token. self.batch_size = cgt.infer_shape(nn_input_btf)[0] self.w_init = w_init self.feature_size = feature_size self.decoder_size = decoder_size if get_features_fun is not None: self.get_features_fun = get_features_fun else: self.get_features_fun = self.get_features_bengio features_btf = self.get_features_fun(nn_input_btf, num_units=self.feature_size) # Compute psi<h_u> over all u (timesteps), the features from the ground data. # This is for computing the context c_i. The features are put through a dense layer. self.features_post_mlp_btf = temporalDenseLayer(features_btf, self.feature_size, w_init=self.w_init, activation=linear, bias_init=Constant(0.0)) self.mixing_vec_w = parameter(init_array(w_init, (1, 1, self.feature_size,)), name=None) # These are for the decoder mechanism, which computes s_i. rnn_activation = cgt.sigmoid recurrence = Recurrent self.recurrent_decoder_one = recurrence(num_units=self.decoder_size, input_time_size=None, input_feature_size=self.feature_size + self.true_number_classes, weight_init=self.w_init, activation=rnn_activation).take_one_step self.recurrent_decoder_two = linear #self.recurrent_decoder_two = recurrence(num_units=self.decoder_size, input_time_size=None, # input_feature_size=self.decoder_size, # weight_init=self.w_init, activation=rnn_activation).take_one_step # Multiply s_i by V to make it have same dimension as h_u. self.states_mlp_bf = Affine(self.decoder_size, self.feature_size, weight_init=self.w_init, bias_init=Constant(0.0)) # This is the final dense layer, which computes the class probs at the end of all things. self.final_out_dense = Affine(self.decoder_size + self.feature_size, self.true_number_classes, weight_init=w_init, bias_init=Constant(0.0))
def __init__(self, input_size, rnn_size, name="", weight_init=HeUniform(1.0)): """ lstm cell """ # TODO: add bias # forget gate weights self.W_xf = parameter(init_array(weight_init, (input_size, rnn_size)), name=name+".W_xf") self.W_hf = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name+"W_hf") # input gate weights self.W_xi = parameter(init_array(weight_init, (input_size, rnn_size)), name=name+".W_xi") self.W_hi = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name+"W_hi") # output gate weights self.W_xo = parameter(init_array(weight_init, (input_size, rnn_size)), name=name+".W_xo") self.W_ho = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name+"W_ho") # candidate value weights self.W_xc = parameter(init_array(weight_init, (input_size, rnn_size)), name=name+".W_xc") self.W_hc = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name+"W_hc")
def __init__(self, input_size, hidden_size, name="", weight_init=HeUniform(1.0)): """ Chung, Junyoung, et al. "Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling." arXiv preprint arXiv:1412.3555 (2014). In the above paper: z is used as notation for the update gate r as notation for the reset gate """ # TODO: bias # The paper makes no mention of bias in equations or text. # Sooo I'm not sure we need it. # reset gate self.W_xr = parameter(init_array(weight_init, (input_size, hidden_size)), name=name + ".W_input_to_reset") self.W_hr = parameter(init_array(weight_init, (hidden_size, hidden_size)), name=name + "W_hidden_to_reset") # update gate self.W_xz = parameter(init_array(weight_init, (input_size, hidden_size)), name=name + ".W_input_to_update") self.W_hz = parameter(init_array(weight_init, (hidden_size, hidden_size)), name=name + "W_hidden_to_update") # ~hidden is the candidate activation, so we'll denote it as c self.W_xc = parameter(init_array(weight_init, (input_size, hidden_size)), name=name + ".W_input_to_candidate") self.W_hc = parameter(init_array(weight_init, (hidden_size, hidden_size)), name=name + "W_hidden_to_candidate")
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim)) a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh( nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh( nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid, ctrl_dim, weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na).sum(axis=1) ) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n * adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def build_fcn_action_cond_encoder_net(input_shapes, levels=None): x_shape, u_shape = input_shapes x_c_dim = x_shape[0] x1_c_dim = 16 levels = levels or [3] levels = sorted(set(levels)) X = cgt.tensor4('X', fixed_shape=(None, ) + x_shape) U = cgt.matrix('U', fixed_shape=(None, ) + u_shape) # encoding Xlevels = {} for level in range(levels[-1] + 1): if level == 0: Xlevel = X else: if level == 1: xlevelm1_c_dim = x_c_dim xlevel_c_dim = x1_c_dim else: xlevelm1_c_dim = xlevel_c_dim xlevel_c_dim = 2 * xlevel_c_dim Xlevel_1 = nn.rectify( nn.SpatialConvolution(xlevelm1_c_dim, xlevel_c_dim, kernelshape=(3, 3), pad=(1, 1), stride=(1, 1), name='conv%d_1' % level, weight_init=nn.IIDGaussian(std=0.01))( Xlevels[level - 1])) Xlevel_2 = nn.rectify( nn.SpatialConvolution( xlevel_c_dim, xlevel_c_dim, kernelshape=(3, 3), pad=(1, 1), stride=(1, 1), name='conv%d_2' % level, weight_init=nn.IIDGaussian(std=0.01))(Xlevel_1)) Xlevel = nn.max_pool_2d(Xlevel_2, kernelshape=(2, 2), pad=(0, 0), stride=(2, 2)) Xlevels[level] = Xlevel # bilinear Xlevels_next_pred_0 = {} Ylevels = OrderedDict() Ylevels_diff_pred = OrderedDict() for level in levels: Xlevel = Xlevels[level] Xlevel_diff_pred = Bilinear(input_shapes, b=None, axis=2, name='bilinear%d' % level)(Xlevel, U) Xlevels_next_pred_0[level] = Xlevel + Xlevel_diff_pred Ylevels[level] = Xlevel.reshape( (Xlevel.shape[0], cgt.mul_multi(Xlevel.shape[1:]))) Ylevels_diff_pred[level] = Xlevel_diff_pred.reshape( (Xlevel_diff_pred.shape[0], cgt.mul_multi(Xlevel_diff_pred.shape[1:]))) # decoding Xlevels_next_pred = {} for level in range(levels[-1] + 1)[::-1]: if level == levels[-1]: Xlevel_next_pred = Xlevels_next_pred_0[level] else: if level == 0: xlevelm1_c_dim = x_c_dim elif level < levels[-1] - 1: xlevel_c_dim = xlevelm1_c_dim xlevelm1_c_dim = xlevelm1_c_dim // 2 Xlevel_next_pred_2 = SpatialDeconvolution( xlevel_c_dim, xlevel_c_dim, kernelshape=(2, 2), pad=(0, 0), stride=(2, 2), name='upsample%d' % (level + 1), weight_init=nn.IIDGaussian(std=0.01))(Xlevels_next_pred[ level + 1]) # TODO initialize with bilinear # TODO should rectify? Xlevel_next_pred_1 = nn.rectify( SpatialDeconvolution( xlevel_c_dim, xlevel_c_dim, kernelshape=(3, 3), pad=(1, 1), stride=(1, 1), name='deconv%d_2' % (level + 1), weight_init=nn.IIDGaussian(std=0.01))(Xlevel_next_pred_2)) nonlinearity = nn.rectify if level > 0 else cgt.tanh Xlevel_next_pred = nonlinearity( SpatialDeconvolution( xlevel_c_dim, xlevelm1_c_dim, kernelshape=(3, 3), pad=(1, 1), stride=(1, 1), name='deconv%d_1' % (level + 1), weight_init=nn.IIDGaussian(std=0.01))(Xlevel_next_pred_1)) if level in Xlevels_next_pred_0: coefs = nn.parameter(nn.init_array(nn.Constant(0.5), (2, )), name='sum%d.coef' % level) Xlevel_next_pred = coefs[0] * Xlevel_next_pred + coefs[ 1] * Xlevels_next_pred_0[level] # TODO: tanh should be after sum Xlevels_next_pred[level] = Xlevel_next_pred X_next_pred = Xlevels_next_pred[0] Y = cgt.concatenate(Ylevels.values(), axis=1) Y_diff_pred = cgt.concatenate(Ylevels_diff_pred.values(), axis=1) X_diff = cgt.tensor4('X_diff', fixed_shape=(None, ) + x_shape) X_next = X + X_diff loss = ((X_next - X_next_pred)**2).mean(axis=0).sum() / 2. net_name = 'FcnActionCondEncoderNet_levels' + ''.join( str(level) for level in levels) input_vars = OrderedDict([(var.name, var) for var in [X, U, X_diff]]) pred_vars = OrderedDict([('Y_diff_pred', Y_diff_pred), ('Y', Y), ('X_next_pred', X_next_pred)]) return net_name, input_vars, pred_vars, loss