Beispiel #1
0
def build_model_vanilla(args, dtype=floatX):
    logger.info('Building model ...')

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn, x_mask = get_prernn(args)

    transitions = [
        SimpleRecurrent(dim=args.state_dim, activation=Tanh())
        for _ in range(args.layers)
    ]

    rnn = RecurrentStack(transitions, skip_connections=args.skip_connections)
    initialize_rnn(rnn, args)

    # Prepare inputs and initial states for the RNN
    kwargs, inits = get_rnn_kwargs(pre_rnn, args)

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, mask=x_mask, **kwargs)

    # We have
    # h = [state, state_1, state_2 ...] if args.layers > 1
    # h = state if args.layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    last_states = {}
    hidden_states = []
    if args.layers > 1:
        # Save all the last states
        for d in range(args.layers):
            # TODO correct bug
            # h[d] = h[d] * x_mask
            last_states[d] = h[d][-1, :, :]
            h[d].name = "hidden_state_" + str(d)
            hidden_states.append(h[d])
        if args.skip_connections or args.skip_output:
            h = tensor.concatenate(h, axis=2)
        else:
            h = h[-1]
    else:
        # TODO correct bug
        # hidden_states.append(h * x_mask)
        hidden_states.append(h)
        hidden_states[0].name = "hidden_state_0"
        # Note: if we have mask, then updating initial state
        # with last state does not make sence anymore.
        last_states[0] = h[-1, :, :]

    # The updates of the hidden states
    updates = []
    for d in range(args.layers):
        updates.append((inits[0][d], last_states[d]))

    presoft = get_presoft(h, args)

    cost, unregularized_cost = get_costs(presoft, args)

    return cost, unregularized_cost, updates, hidden_states
def build_model_vanilla(args, dtype=floatX):
    logger.info('Building model ...')

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn, x_mask = get_prernn(args)

    transitions = [SimpleRecurrent(dim=args.state_dim, activation=Tanh())
                   for _ in range(args.layers)]

    rnn = RecurrentStack(transitions, skip_connections=args.skip_connections)
    initialize_rnn(rnn, args)

    # Prepare inputs and initial states for the RNN
    kwargs, inits = get_rnn_kwargs(pre_rnn, args)

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, mask=x_mask, **kwargs)

    # We have
    # h = [state, state_1, state_2 ...] if args.layers > 1
    # h = state if args.layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    last_states = {}
    hidden_states = []
    if args.layers > 1:
        # Save all the last states
        for d in range(args.layers):
            # TODO correct bug
            # h[d] = h[d] * x_mask
            last_states[d] = h[d][-1, :, :]
            h[d].name = "hidden_state_" + str(d)
            hidden_states.append(h[d])
        if args.skip_connections or args.skip_output:
            h = tensor.concatenate(h, axis=2)
        else:
            h = h[-1]
    else:
        # TODO correct bug
        # hidden_states.append(h * x_mask)
        hidden_states.append(h)
        hidden_states[0].name = "hidden_state_0"
        # Note: if we have mask, then updating initial state
        # with last state does not make sence anymore.
        last_states[0] = h[-1, :, :]

    # The updates of the hidden states
    updates = []
    for d in range(args.layers):
        updates.append((inits[0][d], last_states[d]))

    presoft = get_presoft(h, args)

    cost, unregularized_cost = get_costs(presoft, args)

    return cost, unregularized_cost, updates, hidden_states
def build_model_cw(args, dtype=floatX):
    logger.info('Building model ...')

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn, x_mask = get_prernn(args)

    # Note that this order of the periods makes faster modules flow in slower
    # ones with is the opposite of the original paper
    if args.module_order == "fast_in_slow":
        transitions = [ClockworkBase(
            dim=args.state_dim, activation=Tanh(),
            period=2 ** i) for i in range(args.layers)]
    elif args.module_order == "slow_in_fast":
        transitions = [ClockworkBase(
            dim=args.state_dim,
            activation=Tanh(),
            period=2 ** (args.layers - i - 1)) for i in range(args.layers)]
    else:
        assert False

    rnn = RecurrentStack(transitions, skip_connections=args.skip_connections)
    initialize_rnn(rnn, args)

    # Prepare inputs and initial states for the RNN
    kwargs, inits = get_rnn_kwargs(pre_rnn, args)

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, mask=x_mask, **kwargs)

    # In the Clockwork case:
    # h = [state, time, state_1, time_1 ...]
    h = h[::2]

    # Now we have correctly:
    # h = [state, state_1, state_2 ...] if args.layers > 1
    # h = [state] if args.layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    last_states = {}
    hidden_states = []
    if args.layers > 1:
        # Save all the last states
        for d in range(args.layers):
            # TODO correct the bug
            # h[d] = h[d] * x_mask
            last_states[d] = h[d][-1, :, :]
            h[d].name = "hidden_state_" + str(d)
            hidden_states.append(h[d])
        h = tensor.concatenate(h, axis=2)
    else:
        h = h[0] * x_mask
        last_states[0] = h[-1, :, :]
    h.name = "hidden_state_all"

    # The updates of the hidden states
    updates = []
    for d in range(args.layers):
        updates.append((inits[0][d], last_states[d]))

    presoft = get_presoft(h, args)

    cost, unregularized_cost = get_costs(presoft, args)

    return cost, unregularized_cost, updates, hidden_states
Beispiel #4
0
def build_model_lstm(args, dtype=floatX):
    logger.info('Building model ...')

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn, x_mask = get_prernn(args)

    transitions = [LSTM(dim=args.state_dim, activation=Tanh())
                   for _ in range(args.layers)]

    rnn = RecurrentStack(transitions, skip_connections=args.skip_connections)
    initialize_rnn(rnn, args)

    # Prepare inputs and initial states for the RNN
    kwargs, inits = get_rnn_kwargs(pre_rnn, args)

    # Apply the RNN to the inputs
    h = rnn.apply(mask=x_mask, **kwargs)

    # h = [state, cell, in, forget, out, state_1,
    #        cell_1, in_1, forget_1, out_1 ...]

    last_states = {}
    last_cells = {}
    hidden_states = []
    for d in range(args.layers):
        # TODO correct bug
        # h[5 * d] = h[5 * d] * x_mask
        # h[5 * d + 1] = h[5 * d + 1] * x_mask

        last_states[d] = h[5 * d][-1, :, :]
        last_cells[d] = h[5 * d + 1][-1, :, :]

        h[5 * d].name = "hidden_state_" + str(d)
        h[5 * d + 1].name = "hidden_cell_" + str(d)
        hidden_states.extend([h[5 * d], h[5 * d + 1]])

    # The updates of the hidden states
    # Note: if we have mask, then updating initial state
    # with last state does not make sence anymore.
    updates = []
    for d in range(args.layers):
        updates.append((inits[0][d], last_states[d]))
        updates.append((inits[1][d], last_states[d]))

    # h = [state, cell, in, forget, out, state_1,
    #        cell_1, in_1, forget_1, out_1 ...]

    # Extract the values
    in_gates = h[2::5]
    forget_gates = h[3::5]
    out_gates = h[4::5]

    gate_values = {"in_gates": in_gates,
                   "forget_gates": forget_gates,
                   "out_gates": out_gates}

    h = h[::5]

    # Now we have correctly:
    # h = [state, state_1, state_2 ...] if args.layers > 1
    # h = [state] if args.layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    if args.layers > 1:
        if args.skip_connections or args.skip_output:
            h = tensor.concatenate(h, axis=2)
        else:
            h = h[-1]
    else:
        h = h[0]
    h.name = "hidden_state_all"

    presoft = get_presoft(h, args)

    cost, unregularized_cost = get_costs(presoft, args)

    return cost, unregularized_cost, updates, gate_values, hidden_states
def build_model_cw(args, dtype=floatX):
    logger.info('Building model ...')

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn, x_mask = get_prernn(args)

    # Note that this order of the periods makes faster modules flow in slower
    # ones with is the opposite of the original paper
    if args.module_order == "fast_in_slow":
        transitions = [
            ClockworkBase(dim=args.state_dim, activation=Tanh(), period=2**i)
            for i in range(args.layers)
        ]
    elif args.module_order == "slow_in_fast":
        transitions = [
            ClockworkBase(dim=args.state_dim,
                          activation=Tanh(),
                          period=2**(args.layers - i - 1))
            for i in range(args.layers)
        ]
    else:
        assert False

    rnn = RecurrentStack(transitions, skip_connections=args.skip_connections)
    initialize_rnn(rnn, args)

    # Prepare inputs and initial states for the RNN
    kwargs, inits = get_rnn_kwargs(pre_rnn, args)

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, mask=x_mask, **kwargs)

    # In the Clockwork case:
    # h = [state, time, state_1, time_1 ...]
    h = h[::2]

    # Now we have correctly:
    # h = [state, state_1, state_2 ...] if args.layers > 1
    # h = [state] if args.layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    last_states = {}
    hidden_states = []
    if args.layers > 1:
        # Save all the last states
        for d in range(args.layers):
            # TODO correct the bug
            # h[d] = h[d] * x_mask
            last_states[d] = h[d][-1, :, :]
            h[d].name = "hidden_state_" + str(d)
            hidden_states.append(h[d])
        h = tensor.concatenate(h, axis=2)
    else:
        h = h[0] * x_mask
        last_states[0] = h[-1, :, :]
    h.name = "hidden_state_all"

    # The updates of the hidden states
    updates = []
    for d in range(args.layers):
        updates.append((inits[0][d], last_states[d]))

    presoft = get_presoft(h, args)

    cost, unregularized_cost = get_costs(presoft, args)

    return cost, unregularized_cost, updates, hidden_states
Beispiel #6
0
def build_model_soft(args, dtype=floatX):
    logger.info('Building model ...')

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn, x_mask = get_prernn(args)

    transitions = [SimpleRecurrent(dim=args.state_dim, activation=Tanh())]

    # Build the MLP
    dims = [2 * args.state_dim]
    activations = []
    for i in range(args.mlp_layers):
        activations.append(Rectifier())
        dims.append(args.state_dim)

    # Activation of the last layer of the MLP
    if args.mlp_activation == "logistic":
        activations.append(Logistic())
    elif args.mlp_activation == "rectifier":
        activations.append(Rectifier())
    elif args.mlp_activation == "hard_logistic":
        activations.append(HardLogistic())
    else:
        assert False

    # Output of MLP has dimension 1
    dims.append(1)

    for i in range(args.layers - 1):
        mlp = MLP(activations=activations, dims=dims,
                  weights_init=initialization.IsotropicGaussian(0.1),
                  biases_init=initialization.Constant(0),
                  name="mlp_" + str(i))
        transitions.append(
            SoftGatedRecurrent(dim=args.state_dim,
                               mlp=mlp,
                               activation=Tanh()))

    rnn = RecurrentStack(transitions, skip_connections=args.skip_connections)
    initialize_rnn(rnn, args)

    # Prepare inputs and initial states for the RNN
    kwargs, inits = get_rnn_kwargs(pre_rnn, args)

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, mask=x_mask, **kwargs)

    # Now we have:
    # h = [state, state_1, gate_value_1, state_2, gate_value_2, state_3, ...]

    # Extract gate_values
    gate_values = h[2::2]
    new_h = [h[0]]
    new_h.extend(h[1::2])
    h = new_h

    # Now we have:
    # h = [state, state_1, state_2, ...]
    # gate_values = [gate_value_1, gate_value_2, gate_value_3]

    for i, gate_value in enumerate(gate_values):
        gate_value.name = "gate_value_" + str(i)

    # Save all the last states
    last_states = {}
    hidden_states = []
    for d in range(args.layers):
        h[d] = h[d] * x_mask
        last_states[d] = h[d][-1, :, :]
        h[d].name = "hidden_state_" + str(d)
        hidden_states.append(h[d])

    # Concatenate all the states
    if args.layers > 1:
        h = tensor.concatenate(h, axis=2)
    h.name = "hidden_state_all"

    # The updates of the hidden states
    updates = []
    for d in range(args.layers):
        updates.append((inits[0][d], last_states[d]))

    presoft = get_presoft(h, args)

    cost, cross_entropy = get_costs(presoft, args)

    return cost, cross_entropy, updates, gate_values, hidden_states