Esempio n. 1
0
def build_model(args):
    x = tensor.tensor3('features', dtype=floatX)
    y = tensor.tensor3('targets', dtype=floatX)

    linear = Linear(input_dim=1, output_dim=4 * args.units)
    rnn = LSTM(dim=args.units, activation=Tanh())
    linear2 = Linear(input_dim=args.units, output_dim=1)

    prediction = Tanh().apply(linear2.apply(rnn.apply(linear.apply(x))))

    prediction = prediction[:-1, :, :]

    # SquaredError does not work on 3D tensor
    y = y.reshape((y.shape[0] * y.shape[1], y.shape[2]))
    prediction = prediction.reshape((prediction.shape[0] * prediction.shape[1],
                                     prediction.shape[2]))

    cost = SquaredError().apply(y, prediction)

    # Initialization
    linear.weights_init = IsotropicGaussian(0.1)
    linear2.weights_init = IsotropicGaussian(0.1)
    linear.biases_init = Constant(0)
    linear2.biases_init = Constant(0)
    rnn.weights_init = Orthogonal()

    return cost
Esempio n. 2
0
def decoder_network(latent_sample, latent_dim=J):
  # bernoulli case
  hidden2 = get_typical_layer(latent_sample, latent_dim, 500, Logistic())
  hidden2_to_output = Linear(name="last", input_dim=500, output_dim=784)
  hidden2_to_output.weights_init = IsotropicGaussian(0.01)
  hidden2_to_output.biases_init = Constant(0)
  hidden2_to_output.initialize()
  return Logistic().apply(hidden2_to_output.apply(hidden2))
def get_presoft(h, args):
    output_size = get_output_size(args.dataset)
    # If args.skip_connections: dim = args.layers * args.state_dim
    # else: dim = args.state_dim
    use_all_states = args.skip_connections or args.skip_output or (args.rnn_type in ["clockwork", "soft"])
    output_layer = Linear(
        input_dim=use_all_states * args.layers *
        args.state_dim + (1 - use_all_states) * args.state_dim,
        output_dim=output_size, name="output_layer")

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()
    presoft = output_layer.apply(h)
    if not has_indices(args.dataset):
        presoft = Tanh().apply(presoft)
    presoft.name = 'presoft'
    return presoft
def get_presoft(h, args):
    output_size = get_output_size(args.dataset)
    # If args.skip_connections: dim = args.layers * args.state_dim
    # else: dim = args.state_dim
    use_all_states = args.skip_connections or args.skip_output or (
        args.rnn_type in ["clockwork", "soft"])
    output_layer = Linear(
        input_dim=use_all_states * args.layers * args.state_dim +
        (1 - use_all_states) * args.state_dim,
        output_dim=output_size,
        name="output_layer")

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()
    presoft = output_layer.apply(h)
    if not has_indices(args.dataset):
        presoft = Tanh().apply(presoft)
    presoft.name = 'presoft'
    return presoft
Esempio n. 5
0
def build_model_hard(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    lookup = LookupTable(length=vocab_size, dim=state_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names,
                input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence([lookup.apply]))

    transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh())]
    for i in range(layers - 1):
        mlp = MLP(activations=[Logistic()],
                  dims=[2 * state_dim, 1],
                  weights_init=initialization.IsotropicGaussian(0.1),
                  biases_init=initialization.Constant(0),
                  name="mlp_" + str(i))
        transitions.append(
            HardGatedRecurrent(dim=state_dim, mlp=mlp, activation=Tanh()))

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # dim = layers * state_dim
    output_layer = Linear(input_dim=layers * state_dim,
                          output_dim=vocab_size,
                          name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs' + suffix] = pre_rnn
        init_states[d] = theano.shared(numpy.zeros(
            (args.mini_batch_size, state_dim)).astype(floatX),
                                       name='state0_%d' % d)
        kwargs['states' + suffix] = init_states[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # Now we have correctly:
    # h = [state_1, state_2, state_3 ...]

    # Save all the last states
    last_states = {}
    for d in range(layers):
        last_states[d] = h[d][-1, :, :]

    # Concatenate all the states
    if layers > 1:
        h = tensor.concatenate(h, axis=2)
    h.name = "hidden_state"

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(), presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates
def build_model_vanilla(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    lookup = LookupTable(length=vocab_size, dim=state_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names, input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence(
                    [lookup.apply]))

    transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh())
                   for _ in range(layers)]

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # If skip_connections: dim = layers * state_dim
    # else: dim = state_dim
    output_layer = Linear(
        input_dim=skip_connections * layers *
        state_dim + (1 - skip_connections) * state_dim,
        output_dim=vocab_size, name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs'] = pre_rnn
        init_states[d] = theano.shared(
            numpy.zeros((args.mini_batch_size, state_dim)).astype(floatX),
            name='state0_%d' % d)
        kwargs['states' + suffix] = init_states[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # We have
    # h = [state, state_1, state_2 ...] if layers > 1
    # h = state if layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    last_states = {}
    if layers > 1:
        # Save all the last states
        for d in range(layers):
            last_states[d] = h[d][-1, :, :]
        if skip_connections:
            h = tensor.concatenate(h, axis=2)
        else:
            h = h[-1]
    else:
        last_states[0] = h[-1, :, :]
    h.name = "hidden_state"

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(),
        presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates
def build_model_soft(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    lookup = LookupTable(length=vocab_size, dim=state_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names, input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence(
                    [lookup.apply]))

    transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh())]

    # Build the MLP
    dims = [2 * state_dim]
    activations = []
    for i in range(args.mlp_layers):
        activations.append(Rectifier())
        dims.append(state_dim)

    # Activation of the last layer of the MLP
    if args.mlp_activation == "logistic":
        activations.append(Logistic())
    elif args.mlp_activation == "rectifier":
        activations.append(Rectifier())
    elif args.mlp_activation == "hard_logistic":
        activations.append(HardLogistic())
    else:
        assert False

    # Output of MLP has dimension 1
    dims.append(1)

    for i in range(layers - 1):
        mlp = MLP(activations=activations, dims=dims,
                  weights_init=initialization.IsotropicGaussian(0.1),
                  biases_init=initialization.Constant(0),
                  name="mlp_" + str(i))
        transitions.append(
            SoftGatedRecurrent(dim=state_dim,
                               mlp=mlp,
                               activation=Tanh()))

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # dim = layers * state_dim
    output_layer = Linear(
        input_dim=layers * state_dim,
        output_dim=vocab_size, name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs' + suffix] = pre_rnn
        init_states[d] = theano.shared(
            numpy.zeros((args.mini_batch_size, state_dim)).astype(floatX),
            name='state0_%d' % d)
        kwargs['states' + suffix] = init_states[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # Now we have:
    # h = [state, state_1, gate_value_1, state_2, gate_value_2, state_3, ...]

    # Extract gate_values
    gate_values = h[2::2]
    new_h = [h[0]]
    new_h.extend(h[1::2])
    h = new_h

    # Now we have:
    # h = [state, state_1, state_2, ...]
    # gate_values = [gate_value_1, gate_value_2, gate_value_3]

    for i, gate_value in enumerate(gate_values):
        gate_value.name = "gate_value_" + str(i)

    # Save all the last states
    last_states = {}
    for d in range(layers):
        last_states[d] = h[d][-1, :, :]

    # Concatenate all the states
    if layers > 1:
        h = tensor.concatenate(h, axis=2)
    h.name = "hidden_state"

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(),
        presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates, gate_values
Esempio n. 8
0
#对cost函数进行正则化
#选择需要计算的参数 W1 为第一层所有线性转换的 W ,W2 为第二层所有线性转换的W
W1,W2 = VariableFilter(roles = [WEIGHT])(cg.variables)
#正则化公式定义,此处使用的是L2正则化
cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
cost.name = 'cost_with_regularization'

#定义一个多层神经网络,层与层之间的计算公式已经被之前定义。
#激活函数集activations定义了每一层的非线性转换函数,多层感知器每一层的输出都包含了两部分,第一部分是线性计算,然后将线性计算的结果进行非线性转换
#x是多层感知器的输入
mlp = MLP(activations = [Rectifier(),Softmax()], dims = [784, 100, 10]).apply(x)

#定义完整个神经网络的流程后,需要设置其线性转换的参数的初始值。

input_to_hidden.weights_init = IsotropicGaussian(0.01)
input_to_hidden.biases_init = Constant(0);
hidden_to_output.weights_init = IsotropicGaussian(0.01)
hidden_to_output.biases_init = Constant(0)
#对设置进行初始化设置,必须要做这一步,否则之前的设置都没有用
input_to_hidden.initialize()
hidden_to_output.initialize()

print W1.get_value()

#然后开始进行模型训练,这里使用现有的内置的数据集 MNIST,如果想要使用别的数据集,需要使用fuel对数据进行预处理
mnist = MNIST(("train",))
#定义迭代计算的方式,使用mini-batch的方法计算,每一次mini-batch使用1024条数据。以此获得数据流,data_stream
data_stream = Flatten(DataStream.default_stream(mnist, iteration_scheme = SequentialScheme(mnist.num_examples, batch_size = 256)))

#定义优化函数的最优值计算方法,这边使用SGD来做
#algorithm = GradientDescent(cost = cost, parameters = [cg.parameters], step_rule = Scale(learning_rate = 0.01))
Esempio n. 9
0
x = tensor.tensor4('features')
flat_x = tensor.flatten(x, outdim=2)
flat_x_noise = flat_x + rng.normal(size=flat_x.shape, std=0.5)
y = tensor.imatrix('targets')
flat_y = tensor.flatten(y, outdim=1)

rect = Rectifier()
mlp = MLP(dims=[784, 1200, 1200, 200], activations=[rect, rect, rect], seed=10)
mlp.weights_init = Uniform(0.0, 0.01)
mlp.biases_init = Constant(0.0)
mlp.initialize()

lin = Linear(200, 10, use_bias=True)
lin.weights_init = Uniform(0.0, 0.01)
lin.biases_init = Constant(0.0)
lin.initialize()

train_out = lin.apply(mlp.apply(flat_x))
test_out = lin.apply(mlp.apply(flat_x))

sm = Softmax(name='softmax')
loss = sm.categorical_cross_entropy(flat_y, train_out).mean()
loss.name = 'nll'
misclass = MisclassificationRate().apply(flat_y, train_out)
misclass.name = 'misclass'

test_loss = sm.categorical_cross_entropy(flat_y, test_out).mean()
test_loss.name = 'nll'
test_misclass = MisclassificationRate().apply(flat_y, test_out)
test_misclass.name = 'misclass'
Esempio n. 10
0
                dims=[n_latent, n_hidden, n_vis]).apply(z)


# Define the cost
 
KL_term = -0.5 * (1 + encoder_lognu -T.exp(encoder_lognu) - encoder_mu ** 2 ).sum(axis=1)
reconstruction_term = (x * T.log(decoder_p) + 
                       (1 - x) * T.log(1 - decoder_p)).sum(axis=1) 
cost = (KL_term -reconstruction_term).mean()
cost.name = 'negative_log_likelihood'


# Initialize the parameters

encoder_layer_1.weights_init = Uniform(width=12. / (n_vis + n_hidden))
encoder_layer_1.biases_init = Constant(0)
encoder_mu.weights_init = Uniform(width=12. / (n_hidden + n_latent))
encoder_mu.biases_init = Constant(0)
encoder_lognu.weights_init = Uniform(width=12. / (n_hidden + n_latent))
encoder_lognu.biases_init = Constant(0)


mnist = MNIST(("train",))

data_stream = Flatten(DataStream.default_stream(
        mnist,
        iteration_scheme=SequentialScheme(mnist.num_examples, batch_size=100)))

cg = ComputationGraph(cost)
algorithm = GradientDescent(cost=cost, 
                            parameters=cg.parameters,
Esempio n. 11
0
def build_model_lstm(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    virtual_dim = 4 * state_dim

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(virtual_dim)

    lookup = LookupTable(length=vocab_size, dim=virtual_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    # Make sure time_length is what we need
    fork = Fork(output_names=output_names,
                input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence([lookup.apply]))

    transitions = [
        LSTM(dim=state_dim, activation=Tanh()) for _ in range(layers)
    ]

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # If skip_connections: dim = layers * state_dim
    # else: dim = state_dim
    output_layer = Linear(input_dim=skip_connections * layers * state_dim +
                          (1 - skip_connections) * state_dim,
                          output_dim=vocab_size,
                          name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    init_cells = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs'] = pre_rnn
        init_states[d] = theano.shared(numpy.zeros(
            (args.mini_batch_size, state_dim)).astype(floatX),
                                       name='state0_%d' % d)
        init_cells[d] = theano.shared(numpy.zeros(
            (args.mini_batch_size, state_dim)).astype(floatX),
                                      name='cell0_%d' % d)
        kwargs['states' + suffix] = init_states[d]
        kwargs['cells' + suffix] = init_cells[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # h = [state, cell, in, forget, out, state_1,
    #        cell_1, in_1, forget_1, out_1 ...]

    last_states = {}
    last_cells = {}
    for d in range(layers):
        last_states[d] = h[5 * d][-1, :, :]
        last_cells[d] = h[5 * d + 1][-1, :, :]

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))
        updates.append((init_cells[d], last_states[d]))

    # h = [state, cell, in, forget, out, state_1,
    #        cell_1, in_1, forget_1, out_1 ...]

    # Extract the values
    in_gates = h[2::5]
    forget_gates = h[3::5]
    out_gates = h[4::5]

    gate_values = {
        "in_gates": in_gates,
        "forget_gates": forget_gates,
        "out_gates": out_gates
    }

    h = h[::5]

    # Now we have correctly:
    # h = [state, state_1, state_2 ...] if layers > 1
    # h = [state] if layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    if layers > 1:
        if skip_connections:
            h = tensor.concatenate(h, axis=2)
        else:
            h = h[-1]
    else:
        h = h[0]
    h.name = "hidden_state"

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(), presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    # Dont initialize as Orthogonal if we are about to load new parameters
    if args.load_path is not None:
        rnn.weights_init = initialization.Constant(0)
    else:
        rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates, gate_values
Esempio n. 12
0
x = tensor.tensor4('features')
flat_x = tensor.flatten(x, outdim=2)
flat_x_noise = flat_x + rng.normal(size=flat_x.shape, std=0.5)
y = tensor.imatrix('targets')
flat_y = tensor.flatten(y, outdim=1)

rect = Rectifier()
mlp = MLP(dims=[784, 1200, 1200, 200], activations=[rect, rect, rect], seed=10)
mlp.weights_init = Uniform(0.0, 0.01)
mlp.biases_init = Constant(0.0)
mlp.initialize()

lin = Linear(200, 10, use_bias=True)
lin.weights_init = Uniform(0.0, 0.01)
lin.biases_init = Constant(0.0)
lin.initialize()

train_out = lin.apply(mlp.apply(flat_x))
test_out = lin.apply(mlp.apply(flat_x))

sm = Softmax(name='softmax')
loss = sm.categorical_cross_entropy(flat_y, train_out).mean()
loss.name = 'nll'
misclass = MisclassificationRate().apply(flat_y, train_out)
misclass.name = 'misclass'

test_loss = sm.categorical_cross_entropy(flat_y, test_out).mean()
test_loss.name = 'nll'
test_misclass = MisclassificationRate().apply(flat_y, test_out)
test_misclass.name = 'misclass'
Esempio n. 13
0
def get_typical_layer(input_layer, input_dim, output_dim, transformation=Logistic()):
  layer = Linear(input_dim=input_dim, output_dim=output_dim)
  layer.weights_init = IsotropicGaussian(0.01)
  layer.biases_init = Constant(0)
  layer.initialize()
  return transformation.apply(layer.apply(input_layer))