Example #1
0
    def __init__(self,
                 input_dim,
                 hidden_dim,
                 activation=T.nnet.sigmoid,
                 with_batch=True,
                 name='RNN'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.activation = activation
        self.with_batch = with_batch
        self.name = name

        # Randomly generate weights
        self.w_x = create_shared(random_weights((input_dim, hidden_dim)),
                                 name + '__w_x')
        self.w_h = create_shared(random_weights((hidden_dim, hidden_dim)),
                                 name + '__w_h')

        # Initialize the bias vector and h_0 to zero vectors
        self.b_h = create_shared(np.zeros((hidden_dim, )), name + '__b_h')
        self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0')

        # Define parameters
        self.params = [self.w_x, self.w_h, self.b_h, self.h_0]
Example #2
0
    def __init__(self,
                 input_dim,
                 rnn_hidden_dim,
                 rnn_output_dim,
                 values_dim,
                 output_dim,
                 name='stack'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.rnn_hidden_dim = rnn_hidden_dim
        self.rnn_output_dim = rnn_output_dim
        self.values_dim = values_dim
        self.output_dim = output_dim
        self.name = name

        # Generate weights and bias to compute the push scalar (d_t), the pop scalar (u_t),
        # the value vector (v_t), and the network output (o_t)
        # Weights
        self.w_op_d = create_shared(random_weights((rnn_output_dim, 1)),
                                    name + '__w_op_d')
        self.w_op_u = create_shared(random_weights((rnn_output_dim, 1)),
                                    name + '__w_op_u')
        self.w_op_v = create_shared(
            random_weights((rnn_output_dim, values_dim)), name + '__w_op_v')
        self.w_op_o = create_shared(
            random_weights((rnn_output_dim, output_dim)), name + '__w_op_o')
        # Bias
        self.b_op_d = create_shared(np.zeros((1, )), name + '__b_op_d')
        self.b_op_u = create_shared(np.zeros((1, )), name + '__b_op_u')
        self.b_op_v = create_shared(np.zeros((values_dim, )),
                                    name + '__b_op_v')
        self.b_op_o = create_shared(np.zeros((output_dim, )),
                                    name + '__b_op_o')

        # RNN Controller weights
        self.w_xrh_hop = create_shared(
            random_weights((input_dim + values_dim + rnn_hidden_dim,
                            rnn_hidden_dim + rnn_output_dim)),
            name + '__w_xrh_hop')
        self.b_xrh_hop = create_shared(
            np.zeros((rnn_hidden_dim + rnn_output_dim, )),
            name + '__b_xrh_hop')

        # Initial hidden states H_0 - H_t = (h_t, r_t, (v_t, s_t))
        self.h_0 = create_shared(np.zeros((rnn_hidden_dim, )), name + '__h_0')
        self.r_0 = create_shared(np.zeros((values_dim, )), name + '__r_0')
        # self.v_0 = create_shared(np.zeros((values_dim,)), name + '__v_0')
        # self.s_0 = create_shared(np.zeros((1,)), name + '__s_0')

        # Define parameters
        self.params = [
            self.w_op_d, self.w_op_u, self.w_op_v, self.w_op_o, self.b_op_d,
            self.b_op_u, self.b_op_v, self.b_op_o, self.w_xrh_hop,
            self.b_xrh_hop, self.h_0
        ]  # _TODO_ check this (why not put r_0, s_0, v_0)
Example #3
0
    def __init__(self, nb_filters, stack_size, filter_height, filter_width, wide, name):
        """
        Construct a convolutional layer
        `wide`:
            False: only apply filter to complete patches of the image.
            Generates output of shape: image_shape - filter_shape + 1
            True: zero-pads image to multiple of filter shape to generate
            output of shape: image_shape + filter_shape - 1
        """
        self.nb_filters = nb_filters
        self.stack_size = stack_size
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.wide = wide
        self.name = name
        self.filter_shape = (nb_filters, stack_size, filter_height, filter_width)

        fan_in = stack_size * filter_height * filter_width       # number of inputs to each hidden unit
        fan_out = ((nb_filters * filter_height * filter_width))  # each unit in the lower layer receives a gradient from
        drange = np.sqrt(6. / (fan_in + fan_out))                # initialize filters with random values

        self.filters = create_shared(drange * random_weights(self.filter_shape), name + '__filters')
        self.bias = create_shared(np.zeros((nb_filters,)), name + '__bias')

        # parameters in the layer
        self.params = [self.filters, self.bias]
Example #4
0
    def __init__(self, input_dim, hidden_dim, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.name = name

        self.W = create_shared(random_weights((input_dim, hidden_dim * 4)), name + 'W')
        self.U = create_shared(random_weights((hidden_dim, hidden_dim * 4)), name + 'U')
        self.b = create_shared(random_weights((hidden_dim * 4, )), name + 'b')

        self.c_0 = create_shared(np.zeros((hidden_dim,)), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0')

        self.params = [self.W, self.U, self.b]
Example #5
0
    def __init__(self, nb_filters, stack_size, filter_height, filter_width,
                 wide, name):
        """
        Construct a convolutional layer
        `wide`:
            False: only apply filter to complete patches of the image.
            Generates output of shape: image_shape - filter_shape + 1
            True: zero-pads image to multiple of filter shape to generate
            output of shape: image_shape + filter_shape - 1
        """
        self.nb_filters = nb_filters
        self.stack_size = stack_size
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.wide = wide
        self.name = name
        self.filter_shape = (nb_filters, stack_size, filter_height,
                             filter_width)

        fan_in = stack_size * filter_height * filter_width  # number of inputs to each hidden unit
        fan_out = ((nb_filters * filter_height * filter_width)
                   )  # each unit in the lower layer receives a gradient from
        drange = np.sqrt(
            6. / (fan_in + fan_out))  # initialize filters with random values

        self.filters = create_shared(
            drange * random_weights(self.filter_shape), name + '__filters')
        self.bias = create_shared(np.zeros((nb_filters, )), name + '__bias')

        # parameters in the layer
        self.params = [self.filters, self.bias]
Example #6
0
    def __init__(self, nb_filters, stack_size, filter_height, filter_width,
                 border_mode, stride, name):
        """
        Construct a convolutional layer.
        """
        self.nb_filters = nb_filters
        self.stack_size = stack_size
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.border_mode = border_mode
        self.filter_shape = (nb_filters, stack_size, filter_height,
                             filter_width)
        self.stride = stride
        self.name = name

        fan_in = stack_size * filter_height * filter_width  # number of inputs to each hidden unit
        fan_out = ((nb_filters * filter_height * filter_width)
                   )  # each unit in the lower layer receives a gradient from
        drange = np.sqrt(
            6. / (fan_in + fan_out))  # initialize filters with random values

        self.filters = create_shared(
            drange * random_weights(self.filter_shape), name + '__filters')
        self.bias = create_shared(
            np.ones((nb_filters, )) * 0.1, name + '__bias')

        # parameters in the layer
        self.params = [self.filters, self.bias]
Example #7
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 bias=True,
                 activation='sigmoid',
                 name='hidden_layer'):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.bias = bias
        self.name = name
        if activation is None:
            self.activation = None
        elif activation == 'tanh':
            self.activation = T.tanh
        elif activation == 'sigmoid':
            self.activation = T.nnet.sigmoid
        elif activation == 'softmax':
            self.activation = T.nnet.softmax
        elif activation == 'relu':
            self.activation = T.nnet.relu
        else:
            raise Exception("Unknown activation function: %s" % activation)

        # Initialize weights and bias
        self.weights = create_shared(random_weights((input_dim, output_dim)),
                                     name + '__weights')

        self.bias = create_shared(np.zeros((output_dim, )), name + '__bias')

        # Define parameters
        if self.bias:
            self.params = [self.weights, self.bias]
        else:
            self.params = [self.weights]
Example #8
0
    def __init__(self, input_dim, output_dim, bias=True, activation='sigmoid',
                 name='hidden_layer'):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.bias = bias
        self.name = name
        if activation is None:
            self.activation = None
        elif activation == 'tanh':
            self.activation = T.tanh
        elif activation == 'sigmoid':
            self.activation = T.nnet.sigmoid
        elif activation == 'softmax':
            self.activation = T.nnet.softmax
        elif activation == 'relu':
            self.activation = T.nnet.relu
        else:
            raise Exception("Unknown activation function: %s" % activation)

        # Initialize weights and bias
        self.weights = create_shared(
            random_weights((input_dim, output_dim)),
            name + '__weights'
        )

        if activation == 'relu':
            self.bias = create_shared(np.ones((output_dim,)) * 0.1, name + '__bias')
        else:
            self.bias = create_shared(np.zeros((output_dim,)), name + '__bias')

        # Define parameters
        if self.bias:
            self.params = [self.weights, self.bias]
        else:
            self.params = [self.weights]
Example #9
0
    def __init__(self, input_dim, hidden_dim, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.name = name

        self.W = create_shared(random_weights((input_dim, hidden_dim * 4)),
                               name + 'W')
        self.U = create_shared(random_weights((hidden_dim, hidden_dim * 4)),
                               name + 'U')
        self.b = create_shared(random_weights((hidden_dim * 4, )), name + 'b')

        self.c_0 = create_shared(np.zeros((hidden_dim, )), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0')

        self.params = [self.W, self.U, self.b]
def generate_context_vector(input_vector, hidden_state_vector):
    irows = input_vector.shape[1]
    icols = input_vector.shape[0]  # TODO: replace this with timesteps
    input_wt = utils.random_weights(irows, icols)

    hrows = hidden_state_vector.shape[1]
    hcols = hidden_state_vector.shape[0]
    hidden_wt = utils.random_weights(hrows, hcols)

    beta = np.tanh(
        np.dot(input_vector, input_wt) +
        np.dot(hidden_state_vector, hidden_wt))

    alpha = utils.softmax(beta)

    context_vector = np.multiply(
        alpha, np.concatenate((input_vector, hidden_state_vector), axis=1))

    return context_vector
Example #11
0
    def __init__(self, input_dim, rnn_hidden_dim, rnn_output_dim, values_dim, output_dim, name='stack'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.rnn_hidden_dim = rnn_hidden_dim
        self.rnn_output_dim = rnn_output_dim
        self.values_dim = values_dim
        self.output_dim = output_dim
        self.name = name

        # Generate weights and bias to compute the push scalar (d_t), the pop scalar (u_t),
        # the value vector (v_t), and the network output (o_t)
        # Weights
        self.w_op_d = create_shared(random_weights((rnn_output_dim, 1)), name + '__w_op_d')
        self.w_op_u = create_shared(random_weights((rnn_output_dim, 1)), name + '__w_op_u')
        self.w_op_v = create_shared(random_weights((rnn_output_dim, values_dim)), name + '__w_op_v')
        self.w_op_o = create_shared(random_weights((rnn_output_dim, output_dim)), name + '__w_op_o')
        # Bias
        self.b_op_d = create_shared(np.zeros((1,)), name + '__b_op_d')
        self.b_op_u = create_shared(np.zeros((1,)), name + '__b_op_u')
        self.b_op_v = create_shared(np.zeros((values_dim,)), name + '__b_op_v')
        self.b_op_o = create_shared(np.zeros((output_dim,)), name + '__b_op_o')

        # RNN Controller weights
        self.w_xrh_hop = create_shared(random_weights((input_dim + values_dim + rnn_hidden_dim, rnn_hidden_dim + rnn_output_dim)), name + '__w_xrh_hop')
        self.b_xrh_hop = create_shared(np.zeros((rnn_hidden_dim + rnn_output_dim,)), name + '__b_xrh_hop')

        # Initial hidden states H_0 - H_t = (h_t, r_t, (v_t, s_t))
        self.h_0 = create_shared(np.zeros((rnn_hidden_dim,)), name + '__h_0')
        self.r_0 = create_shared(np.zeros((values_dim,)), name + '__r_0')
        # self.v_0 = create_shared(np.zeros((values_dim,)), name + '__v_0')
        # self.s_0 = create_shared(np.zeros((1,)), name + '__s_0')

        # Define parameters
        self.params = [
            self.w_op_d, self.w_op_u, self.w_op_v, self.w_op_o,
            self.b_op_d, self.b_op_u, self.b_op_v, self.b_op_o,
            self.w_xrh_hop, self.b_xrh_hop,
            self.h_0
        ] # _TODO_ check this (why not put r_0, s_0, v_0)
Example #12
0
    def __init__(self, input_dim, hidden_dim, activation=T.nnet.sigmoid,
                 with_batch=True, name='RNN'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.activation = activation
        self.with_batch = with_batch
        self.name = name

        # Randomly generate weights
        self.w_x = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_x')
        self.w_h = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_h')

        # Initialize the bias vector and h_0 to zero vectors
        self.b_h = create_shared(np.zeros((hidden_dim,)), name + '__b_h')
        self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0')

        # Define parameters
        self.params = [self.w_x, self.w_h, self.b_h, self.h_0]
Example #13
0
    def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.with_batch = with_batch
        self.name = name

        # Update gate weights and bias
        self.w_z = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_z')
        self.u_z = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_z')
        self.b_z = create_shared(np.zeros((hidden_dim,)), name + '__b_z')

        # Reset gate weights and bias
        self.w_r = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_r')
        self.u_r = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_r')
        self.b_r = create_shared(np.zeros((hidden_dim,)), name + '__b_r')

        # New memory content weights and bias
        self.w_c = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_c')
        self.u_c = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_c')
        self.b_c = create_shared(np.zeros((hidden_dim,)), name + '__b_c')

        # Initialize the bias vector, h_0, to the zero vector
        self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0')

        # Define parameters
        self.params = [self.w_z, self.u_z, self.b_z,
                       self.w_r, self.u_r, self.b_r,
                       self.w_c, self.u_c, self.b_c,
                       self.h_0]
Example #14
0
    def __init__(self, input_dim, output_dim, name='embedding_layer'):
        """
        Typically, input_dim is the vocabulary size,
        and output_dim the embedding dimension.
        """
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.name = name

        # Randomly generate weights
        self.embeddings = create_shared(
            random_weights((input_dim, output_dim)),
            self.name + '__embeddings')

        # Define parameters
        self.params = [self.embeddings]
Example #15
0
    def __init__(self, input_dim, output_dim, name='embedding_layer'):
        """
        Typically, input_dim is the vocabulary size,
        and output_dim the embedding dimension.
        """
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.name = name

        # Randomly generate weights
        self.embeddings = create_shared(
            random_weights((input_dim, output_dim)),
            self.name + '__embeddings'
        )

        # Define parameters
        self.params = [self.embeddings]
Example #16
0
def LogisticLinearLeaner(dataset, learning_rate=0.01, epochs=100):
    """
    [Section 18.6.4]
    Linear classifier with logistic regression.
    """
    idx_i = dataset.inputs
    idx_t = dataset.target
    examples = dataset.examples
    num_examples = len(examples)

    # X transpose
    X_col = [dataset.values[i] for i in idx_i]  # vertical columns of X

    # add dummy
    ones = [1 for _ in range(len(examples))]
    X_col = [ones] + X_col

    # initialize random weights
    num_weights = len(idx_i) + 1
    w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights)

    for epoch in range(epochs):
        err = []
        h = []
        # pass over all examples
        for example in examples:
            x = [1] + example
            y = sigmoid(dot_product(w, x))
            h.append(sigmoid_derivative(y))
            t = example[idx_t]
            err.append(t - y)

        # update weights
        for i in range(len(w)):
            buffer = [x * y for x, y in zip(err, h)]
            w[i] = w[i] + learning_rate * (dot_product(buffer, X_col[i]) /
                                           num_examples)

    def predict(example):
        x = [1] + example
        return sigmoid(dot_product(w, x))

    return predict
Example #17
0
def LinearLearner(dataset, learning_rate=0.01, epochs=100):
    """
    [Section 18.6.3]
    Linear classifier with hard threshold.
    """
    idx_i = dataset.inputs
    idx_t = dataset.target
    examples = dataset.examples
    num_examples = len(examples)

    # X transpose
    X_col = [dataset.values[i] for i in idx_i]  # vertical columns of X

    # add dummy
    ones = [1 for _ in range(len(examples))]
    X_col = [ones] + X_col

    # initialize random weights
    num_weights = len(idx_i) + 1
    w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights)

    for epoch in range(epochs):
        err = []
        # pass over all examples
        for example in examples:
            x = [1] + example
            y = dot_product(w, x)
            t = example[idx_t]
            err.append(t - y)

        # update weights
        for i in range(len(w)):
            w[i] = w[i] + learning_rate * (dot_product(err, X_col[i]) /
                                           num_examples)

    def predict(example):
        x = [1] + example
        return dot_product(w, x)

    return predict
Example #18
0
    def __init__(self, nb_filters, stack_size, filter_height, filter_width, border_mode, stride, name):
        """
        Construct a convolutional layer.
        """
        self.nb_filters = nb_filters
        self.stack_size = stack_size
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.border_mode = border_mode
        self.filter_shape = (nb_filters, stack_size, filter_height, filter_width)
        self.stride = stride
        self.name = name

        fan_in = stack_size * filter_height * filter_width       # number of inputs to each hidden unit
        fan_out = ((nb_filters * filter_height * filter_width))  # each unit in the lower layer receives a gradient from
        drange = np.sqrt(6. / (fan_in + fan_out))                # initialize filters with random values

        self.filters = create_shared(drange * random_weights(self.filter_shape), name + '__filters')
        self.bias = create_shared(np.ones((nb_filters,)) * 0.1, name + '__bias')

        # parameters in the layer
        self.params = [self.filters, self.bias]
Example #19
0
    def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.with_batch = with_batch
        self.name = name

        # Update gate weights and bias
        self.w_z = create_shared(random_weights((input_dim, hidden_dim)),
                                 name + '__w_z')
        self.u_z = create_shared(random_weights((hidden_dim, hidden_dim)),
                                 name + '__u_z')
        self.b_z = create_shared(np.zeros((hidden_dim, )), name + '__b_z')

        # Reset gate weights and bias
        self.w_r = create_shared(random_weights((input_dim, hidden_dim)),
                                 name + '__w_r')
        self.u_r = create_shared(random_weights((hidden_dim, hidden_dim)),
                                 name + '__u_r')
        self.b_r = create_shared(np.zeros((hidden_dim, )), name + '__b_r')

        # New memory content weights and bias
        self.w_c = create_shared(random_weights((input_dim, hidden_dim)),
                                 name + '__w_c')
        self.u_c = create_shared(random_weights((hidden_dim, hidden_dim)),
                                 name + '__u_c')
        self.b_c = create_shared(np.zeros((hidden_dim, )), name + '__b_c')

        # Initialize the bias vector, h_0, to the zero vector
        self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0')

        # Define parameters
        self.params = [
            self.w_z, self.u_z, self.b_z, self.w_r, self.u_r, self.b_r,
            self.w_c, self.u_c, self.b_c, self.h_0
        ]
Example #20
0
    def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.with_batch = with_batch
        self.name = name

        # Input gate weights
        self.w_xi = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xi')
        self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hi')
        self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ci')

        # Forget gate weights
        self.w_xf = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xf')
        self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hf')
        self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_cf')

        # Output gate weights
        self.w_xo = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xo')
        self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ho')
        self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_co')

        # Cell weights
        self.w_xc = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xc')
        self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hc')

        # Initialize the bias vectors, c_0 and h_0 to zero vectors
        self.b_i = create_shared(np.zeros((hidden_dim,)), name + '__b_i')
        self.b_f = create_shared(np.zeros((hidden_dim,)), name + '__b_f')
        self.b_c = create_shared(np.zeros((hidden_dim,)), name + '__b_c')
        self.b_o = create_shared(np.zeros((hidden_dim,)), name + '__b_o')
        self.c_0 = create_shared(np.zeros((hidden_dim,)), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0')

        # Define parameters
        self.params = [self.w_xi, self.w_hi,  # self.w_ci,
                       self.w_xf, self.w_hf,  # self.w_cf,
                       self.w_xo, self.w_ho,  # self.w_co,
                       self.w_xc, self.w_hc,
                       self.b_i, self.b_c, self.b_o, self.b_f,
                       self.c_0, self.h_0]
Example #21
0
    def __init__(self, input_dim, hidden_dim, output_emb_dim, output_dim,
                 with_batch=True, name='LSTM'):
        """
        Initialize neural network.
          - input_dim: dimension of input vectors
          - hidden_dim: dimension of hidden vectors
          - output_emb_dim: dimension of output embeddings
          - output_dim: number of possible outputs
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_emb_dim = output_emb_dim
        self.output_dim = output_dim
        self.with_batch = with_batch
        self.name = name

        # Input gate weights
        self.w_xi = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xi')
        self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hi')
        self.w_yi = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yi')
        self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ci')

        # Forget gate weights
        self.w_xf = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xf')
        self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hf')
        self.w_yf = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yf')
        self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_cf')

        # Output gate weights
        self.w_xo = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xo')
        self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ho')
        self.w_yo = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yo')
        self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_co')

        # Cell weights
        self.w_xc = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xc')
        self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hc')
        self.w_yc = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yc')

        # Initialize the bias vectors, c_0 and h_0 to zero vectors
        self.b_i = create_shared(np.zeros((hidden_dim,)), name + '__b_i')
        self.b_f = create_shared(np.zeros((hidden_dim,)), name + '__b_f')
        self.b_c = create_shared(np.zeros((hidden_dim,)), name + '__b_c')
        self.b_o = create_shared(np.zeros((hidden_dim,)), name + '__b_o')
        self.c_0 = create_shared(np.zeros((hidden_dim,)), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0')
        # self.y_0 = create_shared(np.zeros((output_emb_dim,)), name + '__y_0')

        # Weights for projection to final output, and outputs embeddings
        self.embeddings = create_shared(random_weights((output_dim + 1, output_emb_dim)), name + '__embeddings')
        self.weights = create_shared(random_weights((hidden_dim, output_dim)), name + '__weights')
        self.bias = create_shared(random_weights((output_dim,)), name + '__bias')

        # Define parameters
        self.params = [self.w_xi, self.w_hi, self.w_yi, self.w_ci,
                       self.w_xf, self.w_hf, self.w_yf, self.w_cf,
                       self.w_xo, self.w_ho, self.w_yo, self.w_co,
                       self.w_xc, self.w_hc, self.w_yc,
                       self.b_i, self.b_c, self.b_o, self.b_f,
                       self.c_0, self.h_0,  # self.y_0,
                       self.embeddings, self.weights, self.bias]
Example #22
0
    def __init__(self,
                 input_dim,
                 hidden_dim,
                 output_emb_dim,
                 output_dim,
                 with_batch=True,
                 name='LSTM'):
        """
        Initialize neural network.
          - input_dim: dimension of input vectors
          - hidden_dim: dimension of hidden vectors
          - output_emb_dim: dimension of output embeddings
          - output_dim: number of possible outputs
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_emb_dim = output_emb_dim
        self.output_dim = output_dim
        self.with_batch = with_batch
        self.name = name

        # Input gate weights
        self.w_xi = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xi')
        self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hi')
        self.w_yi = create_shared(random_weights((output_emb_dim, hidden_dim)),
                                  name + '__w_yi')
        self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_ci')

        # Forget gate weights
        self.w_xf = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xf')
        self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hf')
        self.w_yf = create_shared(random_weights((output_emb_dim, hidden_dim)),
                                  name + '__w_yf')
        self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_cf')

        # Output gate weights
        self.w_xo = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xo')
        self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_ho')
        self.w_yo = create_shared(random_weights((output_emb_dim, hidden_dim)),
                                  name + '__w_yo')
        self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_co')

        # Cell weights
        self.w_xc = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xc')
        self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hc')
        self.w_yc = create_shared(random_weights((output_emb_dim, hidden_dim)),
                                  name + '__w_yc')

        # Initialize the bias vectors, c_0 and h_0 to zero vectors
        self.b_i = create_shared(np.zeros((hidden_dim, )), name + '__b_i')
        self.b_f = create_shared(np.zeros((hidden_dim, )), name + '__b_f')
        self.b_c = create_shared(np.zeros((hidden_dim, )), name + '__b_c')
        self.b_o = create_shared(np.zeros((hidden_dim, )), name + '__b_o')
        self.c_0 = create_shared(np.zeros((hidden_dim, )), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0')
        # self.y_0 = create_shared(np.zeros((output_emb_dim,)), name + '__y_0')

        # Weights for projection to final output, and outputs embeddings
        self.embeddings = create_shared(
            random_weights((output_dim + 1, output_emb_dim)),
            name + '__embeddings')
        self.weights = create_shared(random_weights((hidden_dim, output_dim)),
                                     name + '__weights')
        self.bias = create_shared(random_weights((output_dim, )),
                                  name + '__bias')

        # Define parameters
        self.params = [
            self.w_xi,
            self.w_hi,
            self.w_yi,
            self.w_ci,
            self.w_xf,
            self.w_hf,
            self.w_yf,
            self.w_cf,
            self.w_xo,
            self.w_ho,
            self.w_yo,
            self.w_co,
            self.w_xc,
            self.w_hc,
            self.w_yc,
            self.b_i,
            self.b_c,
            self.b_o,
            self.b_f,
            self.c_0,
            self.h_0,  # self.y_0,
            self.embeddings,
            self.weights,
            self.bias
        ]
Example #23
0
    def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.with_batch = with_batch
        self.name = name

        # Input gate weights
        self.w_xi = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xi')
        self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hi')
        self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_ci')

        # Forget gate weights
        self.w_xf = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xf')
        self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hf')
        self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_cf')

        # Output gate weights
        self.w_xo = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xo')
        self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_ho')
        self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_co')

        # Cell weights
        self.w_xc = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xc')
        self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hc')

        # Initialize the bias vectors, c_0 and h_0 to zero vectors
        self.b_i = create_shared(np.zeros((hidden_dim, )), name + '__b_i')
        self.b_f = create_shared(np.zeros((hidden_dim, )), name + '__b_f')
        self.b_c = create_shared(np.zeros((hidden_dim, )), name + '__b_c')
        self.b_o = create_shared(np.zeros((hidden_dim, )), name + '__b_o')
        self.c_0 = create_shared(np.zeros((hidden_dim, )), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0')

        # Define parameters
        self.params = [
            self.w_xi,
            self.w_hi,  # self.w_ci,
            self.w_xf,
            self.w_hf,  # self.w_cf,
            self.w_xo,
            self.w_ho,  # self.w_co,
            self.w_xc,
            self.w_hc,
            self.b_i,
            self.b_c,
            self.b_o,
            self.b_f,
            self.c_0,
            self.h_0
        ]
Example #24
0
def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid):
    """
    [Figure 18.23]
    The back-propagation algorithm for multilayer networks.
    """
    # initialise weights
    for layer in net:
        for node in layer:
            node.weights = random_weights(min_value=-0.5, max_value=0.5, num_weights=len(node.weights))

    examples = dataset.examples
    # As of now dataset.target gives an int instead of list,
    # Changing dataset class will have effect on all the learners.
    # Will be taken care of later.
    o_nodes = net[-1]
    i_nodes = net[0]
    o_units = len(o_nodes)
    idx_t = dataset.target
    idx_i = dataset.inputs
    n_layers = len(net)

    inputs, targets = init_examples(examples, idx_i, idx_t, o_units)

    for epoch in range(epochs):
        # iterate over each example
        for e in range(len(examples)):
            i_val = inputs[e]
            t_val = targets[e]

            # activate input layer
            for v, n in zip(i_val, i_nodes):
                n.value = v

            # forward pass
            for layer in net[1:]:
                for node in layer:
                    inc = [n.value for n in node.inputs]
                    in_val = dotproduct(inc, node.weights)
                    node.value = node.activation(in_val)

            # initialize delta
            delta = [[] for _ in range(n_layers)]

            # compute outer layer delta

            # error for the MSE cost function
            err = [t_val[i] - o_nodes[i].value for i in range(o_units)]

            # calculate delta at output
            if node.activation == sigmoid:
                delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
            elif node.activation == relu:
                delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
            elif node.activation == tanh:
                delta[-1] = [tanh_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
            elif node.activation == elu:
                delta[-1] = [elu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
            else:
                delta[-1] = [leaky_relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]

            # backward pass
            h_layers = n_layers - 2
            for i in range(h_layers, 0, -1):
                layer = net[i]
                h_units = len(layer)
                nx_layer = net[i + 1]

                # weights from each ith layer node to each i + 1th layer node
                w = [[node.weights[k] for node in nx_layer] for k in range(h_units)]

                if activation == sigmoid:
                    delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1])
                                for j in range(h_units)]
                elif activation == relu:
                    delta[i] = [relu_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1])
                                for j in range(h_units)]
                elif activation == tanh:
                    delta[i] = [tanh_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1])
                                for j in range(h_units)]
                elif activation == elu:
                    delta[i] = [elu_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1])
                                for j in range(h_units)]
                else:
                    delta[i] = [leaky_relu_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1])
                                for j in range(h_units)]

            # update weights
            for i in range(1, n_layers):
                layer = net[i]
                inc = [node.value for node in net[i - 1]]
                units = len(layer)
                for j in range(units):
                    layer[j].weights = vector_add(layer[j].weights,
                                                  scalar_vector_product(learning_rate * delta[i][j], inc))

    return net