Exemple #1
0
    def __init__(self, input_size, output_size, name="", weight_init=HeUniform(1.0), bias_init=Constant(0)):
        """
        Initialize an Feedforward cell.
        """

        self.W = parameter(init_array(weight_init, (input_size, output_size)), name=name + ".W")
        self.b = parameter(init_array(bias_init, (1, output_size)), name=name + ".b")
Exemple #2
0
    def __init__(self,
                 input_shapes,
                 axis=1,
                 name=None,
                 M=nn.IIDGaussian(std=0.001),
                 N=nn.IIDGaussian(std=0.001),
                 b=nn.Constant(0)):
        assert axis >= 1
        self.axis = axis
        name = "unnamed" if name is None else name

        self.y_shape, self.u_shape = input_shapes
        self.y_dim = int(np.prod(self.y_shape[self.axis - 1:]))
        self.u_dim, = self.u_shape

        self.M = nn.parameter(nn.init_array(
            M, (self.y_dim, self.y_dim, self.u_dim)),
                              name=name + ".M")
        self.N = nn.parameter(nn.init_array(N, (self.y_dim, self.u_dim)),
                              name=name + ".N")
        if b is None:
            self.b = None
        else:
            self.b = nn.parameter(nn.init_array(b, (self.y_dim, )),
                                  name=name + ".b")  # TODO: not regularizable
Exemple #3
0
 def add_gate_params(gate, gate_name):
     """ Convenience function for adding layer parameters from a Gate
     instance. """
     return (parameter(init_array(gate.W_in, (input_feature_size, num_units)), name=gate_name+".W"),
             parameter(init_array(gate.W_hid, (num_units, num_units)), name=gate_name+".W"),
             parameter(init_array(gate.b, (1, num_units)), name=gate_name+".b"),
             gate.nonlinearity)
Exemple #4
0
    def __init__(self,
                 input_channels,
                 output_channels,
                 kernelshape,
                 pad,
                 stride=(1, 1),
                 name=None,
                 weight_init=nn.Constant(0),
                 bias_init=nn.Constant(0)):
        # type conversion
        self.input_channels = int(input_channels)
        self.output_channels = int(output_channels)
        self.kernelshape = tuple(map(int, kernelshape))
        self.pad = tuple(map(int, pad))
        self.stride = tuple(map(int, stride))
        name = "unnamed" if name is None else name

        self.weight = theano.shared(nn.init_array(
            weight_init,
            (self.output_channels, self.input_channels) + self.kernelshape),
                                    name=name + ".W")
        self.bias = theano.shared(nn.init_array(
            bias_init, (1, self.output_channels, 1, 1)),
                                  name=name + ".b")
        self.bias.type.broadcastable = (True, False, True, True)
Exemple #5
0
    def __init__(self, input_feature_size, input_time_size, num_units,
                 weight_init=HeUniform(),
                 activation=cgt.sigmoid,
                 cell_out_init=IIDUniform(-0.1, 0.1),
                 hid_out_init=IIDUniform(-0.1, 0.1),
                 #cell_out_init=Constant(0.0),
                 #hid_out_init=Constant(0.0),
                 backwards=False):

        ingate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation)
        forgetgate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation)
        cell = Gate(W_cell=None, nonlinearity=cgt.tanh)
        outgate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation)

        self.nonlinearity = activation
        self.num_units = num_units
        self.backwards = backwards
        self.timesteps = input_time_size

        def add_gate_params(gate, gate_name):
            """ Convenience function for adding layer parameters from a Gate
            instance. """
            return (parameter(init_array(gate.W_in, (input_feature_size, num_units)), name=None),
                    parameter(init_array(gate.W_hid, (num_units, num_units)), name=None),
                    parameter(init_array(gate.b, (1, num_units)), name=None),
                    gate.nonlinearity)

        # Add in parameters from the supplied Gate instances
        (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate, self.nonlinearity_ingate) = add_gate_params(ingate, 'ingate')

        (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate, self.nonlinearity_forgetgate) = add_gate_params(forgetgate, 'forgetgate')

        (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell, self.nonlinearity_cell) = add_gate_params(cell, 'cell')

        (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate, self.nonlinearity_outgate) = add_gate_params(outgate, 'outgate')

        self.hid_init = parameter(init_array(hid_out_init, (1, num_units)), name=None)

        self.cell_init = parameter(init_array(cell_out_init, (1, num_units)), name=None)


        # Stack input weight matrices into a (num_inputs, 4*num_units) #checks out
        # matrix, which speeds up computation
        self.W_in_stacked = cgt.concatenate(
            [self.W_in_to_ingate, self.W_in_to_forgetgate,
             self.W_in_to_cell, self.W_in_to_outgate], axis=1)

        # Same for hidden weight matrices
        self.W_hid_stacked = cgt.concatenate(
            [self.W_hid_to_ingate, self.W_hid_to_forgetgate,
             self.W_hid_to_cell, self.W_hid_to_outgate], axis=1)

        # Stack biases into a (4*num_units) vector
        self.b_stacked = cgt.concatenate(
            [self.b_ingate, self.b_forgetgate,
             self.b_cell, self.b_outgate], axis=1)

        self.cell_prev = None
        self.hid_prev = None
    def __init__(self, input_size, output_size, name=None, weight_init=nn.Zeros(), bias_init=nn.Zeros()):
        input_size = int(input_size)
        output_size = int(output_size)
        name = "unnamed" if name is None else name

        self.weight = theano.shared(nn.init_array(weight_init, (input_size, output_size)), name=name + ".W")
        self.bias = theano.shared(nn.init_array(bias_init, (1, output_size)), name=name + ".b")
        self.bias.type.broadcastable = (True, False)
Exemple #7
0
    def make_prediction(self, max_label_length, ground_labels_basis_btc):
        context_i_bf = parameter(init_array(IIDGaussian(0.1), (self.batch_size, self.feature_size)), name=None)
        state_i_bf = parameter(init_array(IIDGaussian(0.1), (self.batch_size, self.decoder_size)), name=None)
        char_list = []
        for iter_step in range(0, max_label_length): #Is this right?
            prev_out_bc = ground_labels_basis_btc[:, iter_step, :]
            state_i_bf = self.get_decoder_state(context_i_bf, prev_out_bc, state_i_bf)
            context_i_bf = self.get_context(state_i_bf)
            this_character_dist = self.get_character_distribution(state_i_bf, context_i_bf)
            char_list.append(cgt.argmax(this_character_dist, axis=1))

        final = cgt.dimshuffle(cgt.stack(char_list), [1, 0])
        return final
Exemple #8
0
    def __init__(self,
                 input_size,
                 rnn_size,
                 name="",
                 weight_init=HeUniform(1.0)):
        """
        lstm cell
        """
        # TODO: add bias

        # forget gate weights
        self.W_xf = parameter(init_array(weight_init, (input_size, rnn_size)),
                              name=name + ".W_xf")
        self.W_hf = parameter(init_array(weight_init, (rnn_size, rnn_size)),
                              name=name + "W_hf")

        # input gate weights
        self.W_xi = parameter(init_array(weight_init, (input_size, rnn_size)),
                              name=name + ".W_xi")
        self.W_hi = parameter(init_array(weight_init, (rnn_size, rnn_size)),
                              name=name + "W_hi")

        # output gate weights
        self.W_xo = parameter(init_array(weight_init, (input_size, rnn_size)),
                              name=name + ".W_xo")
        self.W_ho = parameter(init_array(weight_init, (rnn_size, rnn_size)),
                              name=name + "W_ho")

        # candidate value weights
        self.W_xc = parameter(init_array(weight_init, (input_size, rnn_size)),
                              name=name + ".W_xc")
        self.W_hc = parameter(init_array(weight_init, (rnn_size, rnn_size)),
                              name=name + "W_hc")
    def __init__(self, input_channels, output_channels, kernelshape, pad, stride=(1,1), name=None, weight_init=nn.Constant(0), bias_init=nn.Constant(0)):
        # type conversion
        self.input_channels = int(input_channels)
        self.output_channels = int(output_channels)
        self.kernelshape = tuple(map(int, kernelshape))
        self.pad = tuple(map(int,pad))
        self.stride = tuple(map(int,stride))
        name = "unnamed" if name is None else name

        self.weight = theano.shared(nn.init_array(weight_init, (self.output_channels, self.input_channels) + self.kernelshape),
            name=name+".W")
        self.bias = theano.shared(nn.init_array(bias_init, (1, self.output_channels, 1, 1)), 
            name=name+".b")
        self.bias.type.broadcastable = (True,False,True,True)
    def __init__(self,
                 input_size,
                 output_size,
                 name="",
                 weight_init=HeUniform(1.0),
                 bias_init=Constant(0)):
        """
        Initialize an Feedforward cell.
        """

        self.W = parameter(init_array(weight_init, (input_size, output_size)),
                           name=name + ".W")
        self.b = parameter(init_array(bias_init, (1, output_size)),
                           name=name + '.b')
Exemple #11
0
def _init_optim_state(ws, reset=False):
    if 'optim_state' in ws and not reset: return
    config = ws['config']
    if 'optim_state' in ws:
        print "Reusing cached optim_state"
        theta = ws['optim_state']['theta']
    elif 'snapshot' in config:
        print "Loading optim_state from previous snapshot: %s" % config['snapshot']
        ws['optim_state'] = pickle.load(open(config['snapshot'], 'r'))
        theta = ws['optim_state']['theta']
    else:
        init_method = config['init_theta']['distr']
        if init_method == 'XavierNormal':
            init_theta = nn.XavierNormal(**config['init_theta']['params'])
        elif init_method == 'gaussian':
            init_theta = nn.IIDGaussian(**config['init_theta']['params'])
        else:
            raise ValueError('unknown init distribution')
        theta = nn.init_array(init_theta, (ws['param_col'].get_total_size(), 1)).flatten()
    method = config['opt_method'].lower()
    if method == 'rmsprop':
        optim_create = lambda t: rmsprop_create(t, step_size=config['step_size'])
    elif method == 'adam':
        optim_create = lambda t: adam_create(t, step_size=config['step_size'])
    else:
        raise ValueError('unknown optimization method: %s' % method)
    if reset or 'optim_state' not in ws:
        ws['optim_state'] = optim_create(theta)
Exemple #12
0
def test_get_decoder_state():
    batch_size = 32
    feat_t_steps = 20
    feat_num_features = 42
    num_out_classes = 28
    num_out_classes_true = num_out_classes + 2  # Start, end, are added
    decoder_size = 50

    tau = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_t_steps*feat_num_features), (batch_size, feat_t_steps, feat_num_features))
    tau2 = np.reshape(np.random.normal(0.1, 0.2, batch_size*feat_num_features), (batch_size, feat_num_features))
    tau3 = np.reshape(np.random.normal(0.1, 0.2, batch_size*num_out_classes_true), (batch_size, num_out_classes_true))

    feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features))
    s = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes,
                          decoder_size=decoder_size, feature_size=feat_num_features)

    context_bf = cgt.matrix(fixed_shape=(batch_size, feat_num_features))
    prev_out_bc = cgt.matrix(fixed_shape=(batch_size, num_out_classes_true))
    state_i_bf = nn.parameter(nn.init_array(nn.IIDGaussian(0.1), (batch_size, decoder_size)), name="decoder_init")
    decoder_out = s.get_decoder_state(context_bf, prev_out_bc, state_i_bf)
    decode_fun = cgt.function([feats, context_bf, prev_out_bc], [decoder_out])

    m = decode_fun(tau, tau2, tau3)[0]
    assert m.shape == (batch_size, decoder_size)
    assert np.mean(m) < 1.0
Exemple #13
0
def _init_optim_state(ws, reset=False):
    if 'optim_state' in ws and not reset: return
    config = ws['config']
    if 'optim_state' in ws:
        print "Reusing cached optim_state"
        theta = ws['optim_state']['theta']
    elif 'snapshot' in config:
        print "Loading optim_state from previous snapshot: %s" % config[
            'snapshot']
        ws['optim_state'] = pickle.load(open(config['snapshot'], 'r'))
        theta = ws['optim_state']['theta']
    else:
        init_method = config['init_theta']['distr']
        if init_method == 'XavierNormal':
            init_theta = nn.XavierNormal(**config['init_theta']['params'])
        elif init_method == 'gaussian':
            init_theta = nn.IIDGaussian(**config['init_theta']['params'])
        else:
            raise ValueError('unknown init distribution')
        theta = nn.init_array(init_theta,
                              (ws['param_col'].get_total_size(), 1)).flatten()
    method = config['opt_method'].lower()
    if method == 'rmsprop':
        optim_create = lambda t: rmsprop_create(t,
                                                step_size=config['step_size'])
    elif method == 'adam':
        optim_create = lambda t: adam_create(t, step_size=config['step_size'])
    else:
        raise ValueError('unknown optimization method: %s' % method)
    if reset or 'optim_state' not in ws:
        ws['optim_state'] = optim_create(theta)
Exemple #14
0
    def __init__(self, input_size, hidden_size, name="", weight_init=HeUniform(1.0)):
        """
        Initialize an RNN cell
        """

        # input to hidden
        self.W_xh = parameter(init_array(weight_init, (input_size, hidden_size)),
            name=name+".W_xh")

        # hidden to hidden
        self.W_hh = parameter(init_array(weight_init, (hidden_size, hidden_size)),
            name=name+".W_hh")

        # hidden to output
        self.W_ho = parameter(init_array(weight_init, (hidden_size, hidden_size)),
            name=name+".W_ho")
Exemple #15
0
    def __init__(self,
                 input_size,
                 output_size,
                 name=None,
                 weight_init=nn.Constant(0),
                 bias_init=nn.Constant(0)):
        input_size = int(input_size)
        output_size = int(output_size)
        name = "unnamed" if name is None else name

        self.weight = theano.shared(nn.init_array(weight_init,
                                                  (input_size, output_size)),
                                    name=name + ".W")
        self.bias = theano.shared(nn.init_array(bias_init, (1, output_size)),
                                  name=name + ".b")
        self.bias.type.broadcastable = (True, False)
Exemple #16
0
    def __init__(self, input_feature_size, input_time_size, num_units,
                 weight_init=XavierNormal(),
                 activation=cgt.sigmoid,
                 hid_out_init=IIDUniform(0, 1),
                 backwards=False):

        self.num_units = num_units
        self.timesteps = input_time_size
        self.num_batches = None
        self.backwards = backwards
        self.input_feature_size = input_feature_size

        resetgate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=None, nonlinearity=activation)
        Gate(W_in=weight_init, W_hid=weight_init, W_cell=None, nonlinearity=activation)
        updategate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=None, nonlinearity=activation)
        hidden_update = Gate(W_in=weight_init, W_hid=weight_init, W_cell=None, nonlinearity=cgt.tanh)


        def add_gate_params(gate, gate_name):
            """ Convenience function for adding layer parameters from a Gate
            instance. """
            return (parameter(init_array(gate.W_in, (input_feature_size, num_units)), name=gate_name+".W"),
                    parameter(init_array(gate.W_hid, (num_units, num_units)), name=gate_name+".W"),
                    parameter(init_array(gate.b, (1, num_units)), name=gate_name+".b"),
                    gate.nonlinearity)

        # Add in all parameters from gates
        (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate, self.nonlinearity_updategate) = add_gate_params(updategate, 'updategate')
        (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate, self.nonlinearity_resetgate) = add_gate_params(resetgate, 'resetgate')
        (self.W_in_to_hidden_update, self.W_hid_to_hidden_update, self.b_hidden_update, self.nonlinearity_hid) = add_gate_params(hidden_update, 'hidden_update')

        self.hid_init = parameter(init_array(hid_out_init, (1, num_units)), name='.hid_out_init')
        self.hid_out = None
Exemple #17
0
 def __init__(self, num_units, input_feature_size, input_time_size, activation=rectify,
              backwards=False, weight_init=XavierNormal(), hid_out_init=IIDUniform(0, 1)):
     self.in_to_hid = Affine(input_size=input_feature_size, output_size=num_units, weight_init=weight_init)
     self.hid_to_hid = Affine(input_size=num_units, output_size=num_units, weight_init=weight_init)
     self.activation = activation
     self.hid_init = parameter(init_array(hid_out_init, (1, num_units)), name='.hid_out_init')
     self.timesteps = input_time_size
     self.backwards = backwards
Exemple #18
0
    def get_train_objective(self, max_label_length, ground_labels_basis_btc):
        context_i_bf = parameter(init_array(IIDUniform(-0.1, 0.1), (self.batch_size, self.feature_size)), name=None)
        state_i_bf = parameter(init_array(IIDUniform(-0.1, 0.1), (self.batch_size, self.decoder_size)), name=None)
        prev_out_bc = cgt.zeros((self.batch_size, self.true_number_classes), dtype='i8') #+ self.start_token_index
        log_probs = None
        for iter_step in range(0, max_label_length):
            state_i_bf = self.get_decoder_state(context_i_bf, prev_out_bc, state_i_bf)
            context_i_bf = self.get_context(state_i_bf)
            this_character_dist_bc = self.get_character_distribution(state_i_bf, context_i_bf)
            prev_out_bc = ground_labels_basis_btc[:, iter_step, :]
            log_probs_pre = prev_out_bc * this_character_dist_bc
            log_probs_pre = cgt.log(cgt.sum(log_probs_pre, axis=1))
            if log_probs is None:
                log_probs = cgt.sum(log_probs_pre)
            else:
                log_probs += cgt.sum(log_probs_pre)

        log_probs = -log_probs
        return log_probs
Exemple #19
0
def train(args, X, Y, dbg_iter=None, dbg_epoch=None, dbg_done=None):
    dbg_out = []
    net_in, net_out = hybrid_network(args.num_inputs,
                                     args.num_outputs,
                                     args.num_units,
                                     args.num_sto,
                                     dbg_out=dbg_out)
    params, f_step, f_loss, f_grad, f_surr = \
        make_funcs(net_in, net_out, args, dbg_out=dbg_out)
    param_col = ParamCollection(params)
    init_params = nn.init_array(args.init_conf,
                                (param_col.get_total_size(), 1))
    param_col.set_value_flat(init_params.flatten())
    init_params = [
        np.array([[0., 1.]]),  # W_1
        np.array([[0., 0.]]),  # b_1
        np.array([[1.], [1.]]),  # W_3
        np.array([[0.]]),  # b_3
    ]
    param_col.set_values(init_params)
    if 'snapshot' in args:
        print "Loading params from previous snapshot"
        snapshot = pickle.load(open(args['snapshot'], 'r'))
        param_col.set_values(snapshot)
    # param_col.set_value_flat(
    #     np.random.normal(0., 1.,size=param_col.get_total_size())
    # )
    # optim_state = Table(theta=param_col.get_value_flat(),
    #                     scratch=param_col.get_value_flat(),
    #                     step_size=args.step_size
    #                     )

    optim_state = make_rmsprop_state(theta=param_col.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)
    for i_epoch in range(args.n_epochs):
        for i_iter in range(X.shape[0]):
            ind = np.random.choice(X.shape[0], args['size_batch'])
            x, y = X[ind], Y[ind]  # not sure this works for multi-dim
            info = f_surr(x, y, num_samples=args['size_sample'])
            loss, loss_surr, grad = info['loss'], info['surr_loss'], info[
                'surr_grad']
            # loss, loss_surr, grad = f_grad(x, y)
            # update
            rmsprop_update(param_col.flatten_values(grad), optim_state)
            # optim_state.scratch = param_col.flatten_values(grad)
            # optim_state.theta -= optim_state.step_size * optim_state.scratch
            param_col.set_value_flat(optim_state.theta)
            print param_col.get_value_flat()
            if dbg_iter:
                dbg_iter(i_epoch, i_iter, param_col, optim_state, info)
        if dbg_epoch: dbg_epoch(i_epoch, param_col, f_surr)
    if dbg_done: dbg_done(param_col, optim_state, f_surr)
    return optim_state
Exemple #20
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 name="",
                 weight_init=HeUniform(1.0)):
        """
        Initialize an RNN cell
        """

        # input to hidden
        self.W_xh = parameter(init_array(weight_init,
                                         (input_size, hidden_size)),
                              name=name + ".W_xh")

        # hidden to hidden
        self.W_hh = parameter(init_array(weight_init,
                                         (hidden_size, hidden_size)),
                              name=name + ".W_hh")

        # hidden to output
        self.W_ho = parameter(init_array(weight_init,
                                         (hidden_size, hidden_size)),
                              name=name + ".W_ho")
Exemple #21
0
    def __init__(self, input_feature_size, input_time_size, num_units,
                 weight_init=XavierNormal(),
                 activation=rectify,
                 cell_out_init=IIDUniform(0, 1),
                 hid_out_init=IIDUniform(0, 1),
                 backwards=False):

        ingate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation)
        forgetgate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation)
        cell = Gate(W_cell=None, nonlinearity=cgt.tanh)
        outgate = Gate(W_in=weight_init, W_hid=weight_init, W_cell=weight_init, nonlinearity=activation)

        self.nonlinearity = activation
        self.num_units = num_units
        self.backwards = backwards
        self.timesteps = input_time_size

        def add_gate_params(gate, gate_name):
            """ Convenience function for adding layer parameters from a Gate
            instance. """
            return (parameter(init_array(gate.W_in, (input_feature_size, num_units)), name=gate_name+".W"),
                    parameter(init_array(gate.W_hid, (num_units, num_units)), name=gate_name+".W"),
                    parameter(init_array(gate.b, (1, num_units)), name=gate_name+".b"),
                    gate.nonlinearity)

        # Add in parameters from the supplied Gate instances
        (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate, self.nonlinearity_ingate) = add_gate_params(ingate, 'ingate')

        (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate, self.nonlinearity_forgetgate) = add_gate_params(forgetgate, 'forgetgate')

        (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell, self.nonlinearity_cell) = add_gate_params(cell, 'cell')

        (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate, self.nonlinearity_outgate) = add_gate_params(outgate, 'outgate')

        self.hid_init = parameter(init_array(hid_out_init, (1, num_units)), name='.hid_out_init')

        self.cell_init = parameter(init_array(cell_out_init, (1, num_units)), name='.cell_out_init')
Exemple #22
0
def train(args, X, Y, dbg_iter=None, dbg_epoch=None, dbg_done=None):
    dbg_out = []
    net_in, net_out = hybrid_network(args.num_inputs, args.num_outputs,
                                     args.num_units, args.num_sto,
                                     dbg_out=dbg_out)
    params, f_step, f_loss, f_grad, f_surr = \
        make_funcs(net_in, net_out, args, dbg_out=dbg_out)
    param_col = ParamCollection(params)
    init_params = nn.init_array(args.init_conf, (param_col.get_total_size(), 1))
    param_col.set_value_flat(init_params.flatten())
    init_params = [
        np.array([[0., 1.]]),  # W_1
        np.array([[0., 0.]]),  # b_1
        np.array([[1.], [1.]]),  # W_3
        np.array([[0.]]),  # b_3
    ]
    param_col.set_values(init_params)
    if 'snapshot' in args:
        print "Loading params from previous snapshot"
        snapshot = pickle.load(open(args['snapshot'], 'r'))
        param_col.set_values(snapshot)
    # param_col.set_value_flat(
    #     np.random.normal(0., 1.,size=param_col.get_total_size())
    # )
    # optim_state = Table(theta=param_col.get_value_flat(),
    #                     scratch=param_col.get_value_flat(),
    #                     step_size=args.step_size
    #                     )

    optim_state = make_rmsprop_state(theta=param_col.get_value_flat(),
                                     step_size=args.step_size,
                                     decay_rate=args.decay_rate)
    for i_epoch in range(args.n_epochs):
        for i_iter in range(X.shape[0]):
            ind = np.random.choice(X.shape[0], args['size_batch'])
            x, y = X[ind], Y[ind]  # not sure this works for multi-dim
            info = f_surr(x, y, num_samples=args['size_sample'])
            loss, loss_surr, grad = info['loss'], info['surr_loss'], info['surr_grad']
            # loss, loss_surr, grad = f_grad(x, y)
            # update
            rmsprop_update(param_col.flatten_values(grad), optim_state)
            # optim_state.scratch = param_col.flatten_values(grad)
            # optim_state.theta -= optim_state.step_size * optim_state.scratch
            param_col.set_value_flat(optim_state.theta)
            print param_col.get_value_flat()
            if dbg_iter: dbg_iter(i_epoch, i_iter, param_col, optim_state, info)
        if dbg_epoch: dbg_epoch(i_epoch, param_col, f_surr)
    if dbg_done: dbg_done(param_col, optim_state, f_surr)
    return optim_state
Exemple #23
0
    def __init__(self, input_size, rnn_size, name="", weight_init=HeUniform(1.0)):
        """
        lstm cell
        """
        # TODO: add bias

        # forget gate weights
        self.W_xf = parameter(init_array(weight_init, (input_size, rnn_size)), name=name+".W_xf")
        self.W_hf = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name+"W_hf")

        # input gate weights
        self.W_xi = parameter(init_array(weight_init, (input_size, rnn_size)), name=name+".W_xi")
        self.W_hi = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name+"W_hi")

        # output gate weights
        self.W_xo = parameter(init_array(weight_init, (input_size, rnn_size)), name=name+".W_xo")
        self.W_ho = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name+"W_ho")

        # candidate value weights
        self.W_xc = parameter(init_array(weight_init, (input_size, rnn_size)), name=name+".W_xc")
        self.W_hc = parameter(init_array(weight_init, (rnn_size, rnn_size)), name=name+"W_hc")
Exemple #24
0
    def __init__(self, nn_input_btf, num_out_classes, get_features_fun=None,
                 feature_size=40, decoder_size=40, w_init=IIDUniform(-0.1, 0.1)):

        self.start_token_index = num_out_classes
        self.end_token_index = self.start_token_index + 1
        self.true_number_classes = num_out_classes + 2  # add dims for start and end token.
        self.batch_size = cgt.infer_shape(nn_input_btf)[0]
        self.w_init = w_init
        self.feature_size = feature_size
        self.decoder_size = decoder_size

        if get_features_fun is not None:
            self.get_features_fun = get_features_fun
        else:
            self.get_features_fun = self.get_features_bengio

        features_btf = self.get_features_fun(nn_input_btf, num_units=self.feature_size)
        # Compute psi<h_u> over all u (timesteps), the features from the ground data.

        # This is for computing the context c_i. The features are put through a dense layer.
        self.features_post_mlp_btf = temporalDenseLayer(features_btf, self.feature_size, w_init=self.w_init,
                                                        activation=linear, bias_init=Constant(0.0))

        self.mixing_vec_w = parameter(init_array(w_init, (1, 1, self.feature_size,)), name=None)

        # These are for the decoder mechanism, which computes s_i.
        rnn_activation = cgt.sigmoid
        recurrence = Recurrent

        self.recurrent_decoder_one = recurrence(num_units=self.decoder_size, input_time_size=None,
                                                input_feature_size=self.feature_size + self.true_number_classes,
                                                weight_init=self.w_init, activation=rnn_activation).take_one_step
        self.recurrent_decoder_two = linear
        #self.recurrent_decoder_two = recurrence(num_units=self.decoder_size, input_time_size=None,
        #                                        input_feature_size=self.decoder_size,
        #                                        weight_init=self.w_init, activation=rnn_activation).take_one_step

        # Multiply s_i by V to make it have same dimension as h_u.
        self.states_mlp_bf = Affine(self.decoder_size, self.feature_size,
                                    weight_init=self.w_init, bias_init=Constant(0.0))
        # This is the final dense layer, which computes the class probs at the end of all things.
        self.final_out_dense = Affine(self.decoder_size + self.feature_size, self.true_number_classes,
                                      weight_init=w_init, bias_init=Constant(0.0))
Exemple #25
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 name="",
                 weight_init=HeUniform(1.0)):
        """
        Chung, Junyoung, et al.
        "Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling."
        arXiv preprint arXiv:1412.3555 (2014).

        In the above paper:
            z is used as notation for the update gate
            r as notation for the reset gate
        """
        # TODO: bias
        # The paper makes no mention of bias in equations or text.
        # Sooo I'm not sure we need it.

        # reset gate
        self.W_xr = parameter(init_array(weight_init,
                                         (input_size, hidden_size)),
                              name=name + ".W_input_to_reset")
        self.W_hr = parameter(init_array(weight_init,
                                         (hidden_size, hidden_size)),
                              name=name + "W_hidden_to_reset")

        # update gate
        self.W_xz = parameter(init_array(weight_init,
                                         (input_size, hidden_size)),
                              name=name + ".W_input_to_update")
        self.W_hz = parameter(init_array(weight_init,
                                         (hidden_size, hidden_size)),
                              name=name + "W_hidden_to_update")

        # ~hidden is the candidate activation, so we'll denote it as c
        self.W_xc = parameter(init_array(weight_init,
                                         (input_size, hidden_size)),
                              name=name + ".W_input_to_candidate")
        self.W_hc = parameter(init_array(weight_init,
                                         (hidden_size, hidden_size)),
                              name=name + "W_hidden_to_candidate")
Exemple #26
0
def build_fcn_action_cond_encoder_net(input_shapes, levels=None):
    x_shape, u_shape = input_shapes
    x_c_dim = x_shape[0]
    x1_c_dim = 16
    levels = levels or [3]
    levels = sorted(set(levels))

    X = cgt.tensor4('X', fixed_shape=(None, ) + x_shape)
    U = cgt.matrix('U', fixed_shape=(None, ) + u_shape)

    # encoding
    Xlevels = {}
    for level in range(levels[-1] + 1):
        if level == 0:
            Xlevel = X
        else:
            if level == 1:
                xlevelm1_c_dim = x_c_dim
                xlevel_c_dim = x1_c_dim
            else:
                xlevelm1_c_dim = xlevel_c_dim
                xlevel_c_dim = 2 * xlevel_c_dim
            Xlevel_1 = nn.rectify(
                nn.SpatialConvolution(xlevelm1_c_dim,
                                      xlevel_c_dim,
                                      kernelshape=(3, 3),
                                      pad=(1, 1),
                                      stride=(1, 1),
                                      name='conv%d_1' % level,
                                      weight_init=nn.IIDGaussian(std=0.01))(
                                          Xlevels[level - 1]))
            Xlevel_2 = nn.rectify(
                nn.SpatialConvolution(
                    xlevel_c_dim,
                    xlevel_c_dim,
                    kernelshape=(3, 3),
                    pad=(1, 1),
                    stride=(1, 1),
                    name='conv%d_2' % level,
                    weight_init=nn.IIDGaussian(std=0.01))(Xlevel_1))
            Xlevel = nn.max_pool_2d(Xlevel_2,
                                    kernelshape=(2, 2),
                                    pad=(0, 0),
                                    stride=(2, 2))
        Xlevels[level] = Xlevel

    # bilinear
    Xlevels_next_pred_0 = {}
    Ylevels = OrderedDict()
    Ylevels_diff_pred = OrderedDict()
    for level in levels:
        Xlevel = Xlevels[level]
        Xlevel_diff_pred = Bilinear(input_shapes,
                                    b=None,
                                    axis=2,
                                    name='bilinear%d' % level)(Xlevel, U)
        Xlevels_next_pred_0[level] = Xlevel + Xlevel_diff_pred
        Ylevels[level] = Xlevel.reshape(
            (Xlevel.shape[0], cgt.mul_multi(Xlevel.shape[1:])))
        Ylevels_diff_pred[level] = Xlevel_diff_pred.reshape(
            (Xlevel_diff_pred.shape[0],
             cgt.mul_multi(Xlevel_diff_pred.shape[1:])))

    # decoding
    Xlevels_next_pred = {}
    for level in range(levels[-1] + 1)[::-1]:
        if level == levels[-1]:
            Xlevel_next_pred = Xlevels_next_pred_0[level]
        else:
            if level == 0:
                xlevelm1_c_dim = x_c_dim
            elif level < levels[-1] - 1:
                xlevel_c_dim = xlevelm1_c_dim
                xlevelm1_c_dim = xlevelm1_c_dim // 2
            Xlevel_next_pred_2 = SpatialDeconvolution(
                xlevel_c_dim,
                xlevel_c_dim,
                kernelshape=(2, 2),
                pad=(0, 0),
                stride=(2, 2),
                name='upsample%d' % (level + 1),
                weight_init=nn.IIDGaussian(std=0.01))(Xlevels_next_pred[
                    level +
                    1])  # TODO initialize with bilinear # TODO should rectify?
            Xlevel_next_pred_1 = nn.rectify(
                SpatialDeconvolution(
                    xlevel_c_dim,
                    xlevel_c_dim,
                    kernelshape=(3, 3),
                    pad=(1, 1),
                    stride=(1, 1),
                    name='deconv%d_2' % (level + 1),
                    weight_init=nn.IIDGaussian(std=0.01))(Xlevel_next_pred_2))
            nonlinearity = nn.rectify if level > 0 else cgt.tanh
            Xlevel_next_pred = nonlinearity(
                SpatialDeconvolution(
                    xlevel_c_dim,
                    xlevelm1_c_dim,
                    kernelshape=(3, 3),
                    pad=(1, 1),
                    stride=(1, 1),
                    name='deconv%d_1' % (level + 1),
                    weight_init=nn.IIDGaussian(std=0.01))(Xlevel_next_pred_1))
            if level in Xlevels_next_pred_0:
                coefs = nn.parameter(nn.init_array(nn.Constant(0.5), (2, )),
                                     name='sum%d.coef' % level)
                Xlevel_next_pred = coefs[0] * Xlevel_next_pred + coefs[
                    1] * Xlevels_next_pred_0[level]
            # TODO: tanh should be after sum
        Xlevels_next_pred[level] = Xlevel_next_pred

    X_next_pred = Xlevels_next_pred[0]
    Y = cgt.concatenate(Ylevels.values(), axis=1)
    Y_diff_pred = cgt.concatenate(Ylevels_diff_pred.values(), axis=1)

    X_diff = cgt.tensor4('X_diff', fixed_shape=(None, ) + x_shape)
    X_next = X + X_diff
    loss = ((X_next - X_next_pred)**2).mean(axis=0).sum() / 2.

    net_name = 'FcnActionCondEncoderNet_levels' + ''.join(
        str(level) for level in levels)
    input_vars = OrderedDict([(var.name, var) for var in [X, U, X_diff]])
    pred_vars = OrderedDict([('Y_diff_pred', Y_diff_pred), ('Y', Y),
                             ('X_next_pred', X_next_pred)])
    return net_name, input_vars, pred_vars, loss