Example #1
0
    def __init__(self, prenet_layers, model_target_dim, attn_dim,
                 location_filters, location_kernel_size, num_rnn_cells,
                 rnn_cell_size, dropout, max_decoder_steps):
        """Constructor
        """
        super().__init__()

        self.prenet_layers = prenet_layers
        self.model_target_dim = model_target_dim
        self.attn_dim = attn_dim
        self.location_filters = location_filters
        self.location_kernel_size = location_kernel_size
        self.num_rnn_cells = num_rnn_cells
        self.rnn_cell_size = rnn_cell_size
        self.dropout = dropout
        self.max_decoder_steps = max_decoder_steps

        self.prenet = Prenet(prenet_layers=prenet_layers, dropout=dropout)

        self.attention = LocationSensitiveAttention(
            attn_dim=attn_dim,
            location_filters=location_filters,
            location_kernel_size=location_kernel_size)

        self.decoder = [
            tf.keras.layers.LSTMCell(units=rnn_cell_size,
                                     use_bias=True,
                                     kernel_initializer="glorot_uniform")
            for _ in range(num_rnn_cells)
        ]

        self.acoustic_projection = Linear(hidden_dim=model_target_dim,
                                          bias=True)
        self.stop_token_projection = Linear(hidden_dim=1, bias=True)
Example #2
0
    def __init__(self, attn_dim, location_filters, location_kernel_size):
        """Constructor
        """
        super().__init__()

        self.attn_dim = attn_dim
        self.location_filters = location_filters
        self.location_kernel_size = location_kernel_size

        self.query_layer = Linear(hidden_dim=attn_dim, bias=True)
        self.memory_layer = Linear(hidden_dim=attn_dim, bias=True)
        self.location_layer = LocationLayer(
            attn_dim=attn_dim,
            location_filters=location_filters,
            location_kernel_size=location_kernel_size)
        self.v = Linear(hidden_dim=1, bias=True)

        self.score_mask_value = -float("inf")
    def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):
        super().__init__()

        self.n_inputs = n_inputs  # кол-во данных во входе
        self.n_hidden = n_hidden  # кол-во данных в скрытом слое
        self.n_output = n_output  # кол-во данных в выходном слое

        if activation == 'sigmoid':
            self.activation = Sigmoid()
        elif activation == 'tanh':
            self.activation == Tanh()
        else:
            raise Exception("Non-linearity not found")

        self.w_ih = Linear(
            n_inputs,
            n_hidden)  # вес для перобразование из входного слоя в скрытый
        self.w_hh = Linear(
            n_hidden,
            n_hidden)  # вес для перобразование из скрытого слоя в скрытый
        self.w_ho = Linear(
            n_hidden,
            n_output)  # вес для перобразование из скрытого слоя в выходной

        self.parameters += self.w_ih.get_parameters()
        self.parameters += self.w_hh.get_parameters()
        self.parameters += self.w_ho.get_parameters()
Example #4
0
    def __init__(self, attn_dim, location_filters, location_kernel_size):
        """Constructor
        """
        super().__init__()

        self.attn_dim = attn_dim
        self.location_filters = location_filters
        self.location_kernel_size = location_kernel_size

        self.location_conv = Conv1D(filters=location_filters,
                                    kernel_size=location_kernel_size,
                                    bias=True)
        self.location_dense = Linear(hidden_dim=attn_dim, bias=True)
Example #5
0
def get_linear_logit(features,
                     feature_columns,
                     use_bias=False,
                     init_std=0.0001,
                     seed=1024,
                     prefix='linear',
                     l2_reg=0):
    linear_emb_list, dense_input_list = input_from_feature_columns(
        features, feature_columns, 1, l2_reg, init_std, seed, prefix=prefix)
    if len(linear_emb_list) > 0 and len(dense_input_list) > 0:
        sparse_input = concat_fun(linear_emb_list)
        dense_input = concat_fun(dense_input_list)
        linear_logit = Linear(l2_reg, mode=2,
                              use_bias=use_bias)([sparse_input, dense_input])
    elif len(linear_emb_list) > 0:  # 只有sparse特征
        sparse_input = concat_fun(linear_emb_list)
        linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias)(sparse_input)
    elif len(dense_input_list) > 0:  # 只有dense特征
        dense_input = concat_fun(dense_input_list)
        linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias)(dense_input)
    else:
        raise NotImplementedError

    return linear_logit
class RNNCell(Layer):
    def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):
        super().__init__()

        self.n_inputs = n_inputs  # кол-во данных во входе
        self.n_hidden = n_hidden  # кол-во данных в скрытом слое
        self.n_output = n_output  # кол-во данных в выходном слое

        if activation == 'sigmoid':
            self.activation = Sigmoid()
        elif activation == 'tanh':
            self.activation == Tanh()
        else:
            raise Exception("Non-linearity not found")

        self.w_ih = Linear(
            n_inputs,
            n_hidden)  # вес для перобразование из входного слоя в скрытый
        self.w_hh = Linear(
            n_hidden,
            n_hidden)  # вес для перобразование из скрытого слоя в скрытый
        self.w_ho = Linear(
            n_hidden,
            n_output)  # вес для перобразование из скрытого слоя в выходной

        self.parameters += self.w_ih.get_parameters()
        self.parameters += self.w_hh.get_parameters()
        self.parameters += self.w_ho.get_parameters()

    def forward(self, input, hidden):
        from_prev_hidden = self.w_hh.forward(
            hidden
        )  # преобразование скрытого слоя в скрытый для нового "нейрона"
        combined = self.w_ih.forward(
            input
        ) + from_prev_hidden  # объединяем обработанный вход и получившийся новый скрытый слов
        new_hidden = self.activation.forward(
            combined
        )  # создание скрытого слоя из рекуррентного "нейрона" для слебудещго "нейрона"(== память сети)
        output = self.w_ho.forward(
            new_hidden)  # создание выходных данных из рекуррентного "нейрона"
        return output, new_hidden

    def init_hidden(self, batch_size=1):
        return Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)
Example #7
0
def train(epochs, batch_size, lr, verbose):
    """Main method that trains the network"""
    # autograd globally off
    torch.set_grad_enabled(False)
    # generate training and testing datasets
    train_data, train_label = generate_data()
    test_data, test_label = generate_data()
    # normalize data be centered at 0
    train_data, test_data = normalize(train_data, test_data)

    if verbose:
        print("--- Dataset ---")
        print("Train X: ", train_data.size(), " | Train y: ",
              train_label.size())
        print(" Test X: ", test_data.size(), " |  Test y: ", test_label.size())

    layers = []
    # input layer (2 input units)
    linear1 = Linear(2, 25, bias=True, weight_init=xavier_uniform)

    # 3 hidden layers (each 25 units)
    linear2 = Linear(25, 25, bias=True, weight_init=xavier_uniform)
    linear3 = Linear(25, 25, bias=True, weight_init=xavier_uniform)
    linear4 = Linear(25, 25, bias=True, weight_init=xavier_uniform)

    # output layer (2 output units)
    linear5 = Linear(25, 2, bias=True, weight_init=xavier_uniform)

    layers.append(linear1)
    layers.append(Relu())
    layers.append(linear2)
    layers.append(Relu())
    layers.append(linear3)
    layers.append(Relu())
    layers.append(linear4)
    layers.append(Tanh())
    layers.append(linear5)

    model = Sequential(layers)
    if verbose:
        print("Number of model parameters: {}".format(
            sum([len(p) for p in model.param()])))

    criterion = MSE()
    optimizer = SGD(model, lr=lr)

    train_losses, test_losses = [], []
    train_accuracies, test_accuracies = [], []
    train_errors, test_errors = [], []

    if verbose: print("--- Training ---")
    for epoch in range(1, epochs + 1):
        if verbose: print("Epoch: {}".format(epoch))

        # TRAINING
        for batch_idx in range(0, train_data.size(0), batch_size):
            # axis 0, start from batch_idx until batch_idx+batch_size
            output = model.forward(train_data.narrow(0, batch_idx, batch_size))

            # Calculate loss
            loss = criterion.forward(
                output, train_label.narrow(0, batch_idx, batch_size))
            train_losses.append(loss)
            if verbose: print("Train Loss: {:.2f}".format(loss.item()))

            # put to zero weights and bias
            optimizer.zero_grad()

            ## Backpropagation
            # Calculate grad of loss
            loss_grad = criterion.backward()

            # Grad of the model
            model.backward(loss_grad)

            # Update parameters
            optimizer.step()

        train_prediction = model.forward(train_data)
        acc = accuracy(train_prediction, train_label)
        train_accuracies.append(acc)
        train_errors.append(1 - acc)
        if verbose: print("Train Accuracy: {:.2f}".format(acc.item()))

        # EVALUATION
        for batch_idx in range(0, test_data.size(0), batch_size):
            # axis 0, start from batch_idx until batch_idx+batch_size
            output = model.forward(test_data.narrow(0, batch_idx, batch_size))

            # Calculate loss
            loss = criterion.forward(
                output, test_label.narrow(0, batch_idx, batch_size))
            test_losses.append(loss)
            if verbose: print("Test Loss: {:.2f}".format(loss.item()))

        test_prediction = model.forward(test_data)
        acc = accuracy(test_prediction, test_label)
        test_accuracies.append(acc)
        test_errors.append(1 - acc)
        if verbose: print("Test Accuracy: {:.2f}".format(acc.item()))

    return train_losses, test_losses, train_accuracies, test_accuracies, train_errors, test_errors
    def __init__(self, config):

        self.model = None
        self.check_list = {
            'text_maxlen', 'sentence_maxnum', 'sentence_maxlen', 'hidden_size',
            'delimiter', 'pad_word', 'unk_word', 'start_sent', 'end_sent',
            'vocab_size', 'embed_size', 'learning_rate'
        }
        self.config = config
        assert self.check(), 'parametre check failed'

        self.size = self.config['hidden_size']

        self.Emb = Embedding(self.config['vocab_size'],
                             self.config['embed_size'],
                             trainable=True)
        self.Splitlayer_keephead = SplitLayer(
            delimiter=self.config['delimiter'],
            output_sentence_len=self.config['sentence_maxlen'],
            output_sentence_num=self.config['sentence_maxnum'],
            pad_word=self.config['pad_word'],
            cut_head=False,
            name='Split_Layer_keep_head')
        self.Splitlayer_cuthead = SplitLayer(
            delimiter=self.config['delimiter'],
            output_sentence_len=self.config['sentence_maxlen'],
            output_sentence_num=self.config['sentence_maxnum'],
            pad_word=self.config['pad_word'],
            cut_head=True,
            name='Split_Layer_cut_head')
        self.Sentence_reshape1D = Reshape((self.config['sentence_maxnum'] *
                                           self.config['sentence_maxlen'], ),
                                          name='Sentence_reshape1D')

        self.Sentence_reshape2D = Reshape((
            self.config['sentence_maxnum'],
            self.config['sentence_maxlen'],
            self.config['embed_size'],
        ),
                                          name='Sentence_reshape2D')
        self.Encoder_word = CuDNNLSTM(units=self.size,
                                      name='Encoder_word',
                                      return_state=True)
        self.Decoder_word_cell = LSTMCell(units=self.size,
                                          name='Decoder_word_cell')

        self.AttentionMapper = Linear(output_size=self.size,
                                      bias=True,
                                      bias_start=0.0,
                                      activation='tanh')
        self.Join = Dense(units=1, use_bias=False,
                          name='Join')  # shape : [attention_vec_size]
        self.Exp = Lambda(lambda x: K.exp(x), name='Exp')
        self.Calcprob = Dense(units=self.config['vocab_size'],
                              activation='softmax',
                              name='Calcprob')
        self.ArgMax = Lambda(lambda x: K.argmax(x, axis=-1), dtype='int32')
        self.Printer = Lambda(lambda x: K.tf.Print(x, [x]))
        self.Identical = Lambda(lambda x: x, name='Identical')

        self.EncoderModel = None
        self.DecoderModel_onestep = None

        self._mask = None
        self._targets = None

        self.optim = optimizers.SGD(config['learning_rate'])
        return
Example #9
0
    def attention_decoder(self,
                          decoder_inputs,
                          initial_state,
                          encoder_states,
                          enc_padding_mask,
                          Cell,
                          initial_state_attention=False,
                          pointer_gen=True,
                          use_coverage=False,
                          prev_coverage=None):

        # Requirements:
        # decoder_inputs: A list of 2D Tensors [batch_size x input_size].
        #
        # initial_state: 2D Tensor [batch_size x cell.state_size].
        #                 for the initialization of decoder states
        # encoder_states: (batchsize, timestep, 2*hiddenunits)
        #                 [batch_size, attn_length, attn_size].
        #
        # enc_padding_mask: 2D Tensor [batch_size x attn_length] containing 1s and 0s;
        # indicates which of the encoder locations are padding (0) or a real token (1).
        # cell: rnn_cell.RNNCell defining the cell function and size.
        #
        # initial_state_attention:
        # Note that this attention decoder passes each decoder input through a linear layer
        # with the previous step's context vector to get a modified version of the input.
        # If initial_state_attention is False,
        # on the first decoder step the "previous context vector" is just a zero vector.
        # If initial_state_attention is True, we use initial_state to (re)calculate the previous step's context vector.
        # We set this to False for train/eval mode (because we call attention_decoder once for all decoder steps)
        # and True for decode mode (because we call attention_decoder once for each decoder step).
        #
        # pointer_gen: boolean. If True, calculate the generation probability p_gen for each decoder step.
        #
        # use_coverage: boolean. If True, use coverage mechanism.
        #
        # prev_coverage:
        # If not None, a tensor with shape (batch_size, attn_length). The previous step's coverage vector.
        # This is only not None in decode mode when using coverage.

        # NOTE:
        # To initialize a keras CUDNNLSTM layer's state:
        # ##################################################
        # if isinstance(inputs, list):
        #     initial_state = inputs[1:]
        #     inputs = inputs[0]
        # elif initial_state is not None:
        #     pass
        # elif self.stateful:
        #     initial_state = self.states
        # else:
        #    initial_state = self.get_initial_state(inputs)
        #
        # ##################################################
        attn_size = K.int_shape(encoder_states)[2]
        input_size = K.int_shape(decoder_inputs[0])[1]

        encoder_states = Lambda(lambda x: K.expand_dims(x, axis=2))(
            encoder_states)
        # now : encoder_states.shape = (batch_size,attn_length,1,attention_vec_size)
        attention_vec_size = attn_size
        W_h_shape = (1, 1, attn_size, attention_vec_size)
        Encoder_Feature_Extractor = Conv2D(kernel_size=(W_h_shape[0],
                                                        W_h_shape[1]),
                                           filters=W_h_shape[3],
                                           padding="same",
                                           data_format="channels_last")
        # W_h = [filter_height, filter_width, in_channels, out_channels]
        encoder_features = Encoder_Feature_Extractor(encoder_states)
        # nn_ops.conv2d(encoder_states, W_h, [1, 1, 1, 1], "SAME")
        # shape (batch_size,attn_length,    1   , attention_vec_size)
        if use_coverage:
            w_c = (1, 1, 1, attention_vec_size)
            Coverage_Feature_Extractor = Conv2D(kernel_size=(w_c[0], w_c[1]),
                                                filters=w_c[3],
                                                padding="same",
                                                data_format="channels_last")

        if prev_coverage is not None:
            expand_2_3 = Lambda(
                lambda x: K.expand_dims(K.expand_dims(x, 2), 3))
            prev_coverage = expand_2_3(prev_coverage)

        # v: shared vector, attention_vec_size-dim -> 1-dim, calculating
        V = Dense(1, use_bias=False, kernel_initializer='glorot_uniform'
                  )  # shape : [attention_vec_size]
        Attn_Dist_and_Encoder_States_to_Context_Vector = Lambda(
            lambda X: attn_dist_and_encoder_states_to_context_vector(
                X, attn_size))
        Masked_Attention = Lambda(
            lambda x: masked_attention(x, enc_padding_mask))
        Features_Adder = Lambda(lambda x: sum_and_tanh(x))
        Squeezer_3_2 = Lambda(
            lambda x: K.squeeze(K.squeeze(x, axis=3), axis=2))
        Expand_Dim_2_2 = Lambda(
            lambda x: K.expand_dims(K.expand_dims(x, 2), 2))
        Attention_Linear_layer = Linear(attention_vec_size, True)
        # the linear layer used in attention(...),
        # transform decoder_state to decoder_features
        Decoder_Input_to_Cell_Input = Linear(input_size, True)
        Calculate_pgen_Linear_layer = Linear(1, True, activation='sigmoid')
        AttnOutputProjection_Linear_layer = Linear(Cell.output_dim, True)
        Expand_1_1 = Lambda(
            lambda x: K.expand_dims(K.expand_dims(x, axis=1), axis=1))

        def attention(decoder_state, coverage=None):
            #   Calculate the context vector and attention distribution from the decoder state.
            # Args:
            #   decoder_state: state of the decoder
            #   coverage: Optional. Previous timestep's coverage vector, shape (batch_size, attn_len, 1, 1).
            # Returns:
            #   context_vector: weighted sum of encoder_states
            #   attn_dist: attention distribution
            #   coverage: new coverage vector. shape (batch_size, attn_len, 1, 1)

            decoder_features = Attention_Linear_layer(
                decoder_state)  # shape (batch_size, attention_vec_size)
            decoder_features = Expand_1_1(
                decoder_features
            )  # reshape to (batch_size, 1, 1, attention_vec_size)

            if use_coverage and coverage is not None:
                coverage_features = Coverage_Feature_Extractor(coverage)
                added_features = Features_Adder(
                    [encoder_features, decoder_features, coverage_features])
                # added_features: shape (batch_size,attn_length, 1, 1)
                e = Squeezer_3_2(V(added_features))
                # e: shape (batch_size,attn_length)
                # Calculate attention distribution
                attn_dist = Masked_Attention(e)
                # Update coverage vector
                # sum over the input sequence

                coverage = Lambda(lambda x: x[0] + Reshape((-1, 1, 1))(x[1]))(
                    [coverage, attn_dist])
            else:
                added_features = Features_Adder(
                    [encoder_features, decoder_features])
                # added_features: shape (batch_size,attn_length, 1, 1)
                e = Squeezer_3_2(V(added_features))
                attn_dist = Masked_Attention(e)
                if use_coverage:  # first step of training
                    coverage = Expand_Dim_2_2(attn_dist)  # initialize coverage

            context_vector = Attn_Dist_and_Encoder_States_to_Context_Vector(
                [attn_dist, encoder_states])
            # context_vector = math_ops.reduce_sum(array_ops.reshape(attn_dist,
            #                                                        [batch_size, -1, 1, 1]) * encoder_states,
            #                                                        [1, 2]) # shape (batch_size, attn_size).
            # context_vector = array_ops.reshape(context_vector, [-1, attn_size])

            return context_vector, attn_dist, coverage

        # ####END OF ATTENTION#### #

        # Return values:
        outputs = []
        attn_dists = []
        p_gens = []
        # initial_state is a list/ tuple
        state_h, state_c = initial_state[0], initial_state[1]
        coverage_ret = prev_coverage  # initialize coverage to None or whatever was passed in

        # re-typed to tf.Tensor for backend operations
        context_vector_ret = Lambda(
            lambda x: K.zeros(shape=(self._batch_size, attn_size)))([])
        # Get a zero-initialized context vector
        if initial_state_attention:
            # Re-calculate the context vector from the previous step
            # so that we can pass it through a linear layer with this step's input
            # to get a modified version of the input
            context_vector_ret, _, coverage_ret = attention(
                initial_state, coverage_ret)
            # in decode mode, this is what updates the coverage vector
        # otherwise, context_vector & coverage are zero vectors
        for i, inp in enumerate(decoder_inputs):
            transformed_inp = Decoder_Input_to_Cell_Input(
                [inp, context_vector_ret])
            cell_output, state_h, state_c = Cell(
                [transformed_inp, state_h, state_c])
            if i == 0 and initial_state_attention:  # always true in decode mode
                context_vector_ret, attn_dist_ret, _ = attention(
                    [state_h, state_c], coverage_ret)
                # don't allow coverage to update
            else:
                context_vector_ret, attn_dist_ret, coverage_ret = attention(
                    [state_h, state_c], coverage_ret)
            attn_dists.append(attn_dist_ret)

            if pointer_gen:
                p_gen = Calculate_pgen_Linear_layer(
                    [context_vector_ret, state_h, state_c, transformed_inp])
                p_gens.append(p_gen)

            output = AttnOutputProjection_Linear_layer(
                [cell_output, context_vector_ret])
            outputs.append(output)

        print('finished adding attention_decoder for each time step!')
        if coverage_ret is not None:
            coverage_ret = Lambda(
                lambda x: K.reshape(x, [self._batch_size, -1]))(coverage_ret)

        return outputs, [state_h, state_c], attn_dists, p_gens, coverage_ret
Example #10
0
    def __init__(self, n_inputs, n_hidden, n_output):
        super().__init__()

        self.n_inputs = n_inputs  # кол-во данных во входе
        self.n_hidden = n_hidden  # кол-во данных в скрытом слое
        self.n_output = n_output  # кол-во данных в выходном слое

        self.xf = Linear(n_inputs, n_hidden)
        self.xi = Linear(n_inputs, n_hidden)
        self.xo = Linear(n_inputs, n_hidden)
        self.xc = Linear(n_inputs, n_hidden)

        self.hf = Linear(n_hidden, n_hidden, bias=False)
        self.hi = Linear(n_hidden, n_hidden, bias=False)
        self.ho = Linear(n_hidden, n_hidden, bias=False)
        self.hc = Linear(n_hidden, n_hidden, bias=False)

        self.w_ho = Linear(n_hidden, n_output, bias=False)

        self.parameters += self.xf.get_parameters()
        self.parameters += self.xi.get_parameters()
        self.parameters += self.xo.get_parameters()
        self.parameters += self.xc.get_parameters()

        self.parameters += self.hf.get_parameters()
        self.parameters += self.hi.get_parameters()
        self.parameters += self.ho.get_parameters()
        self.parameters += self.hc.get_parameters()

        self.parameters += self.w_ho.get_parameters()
Example #11
0
class LSTMCell(Layer):
    def __init__(self, n_inputs, n_hidden, n_output):
        super().__init__()

        self.n_inputs = n_inputs  # кол-во данных во входе
        self.n_hidden = n_hidden  # кол-во данных в скрытом слое
        self.n_output = n_output  # кол-во данных в выходном слое

        self.xf = Linear(n_inputs, n_hidden)
        self.xi = Linear(n_inputs, n_hidden)
        self.xo = Linear(n_inputs, n_hidden)
        self.xc = Linear(n_inputs, n_hidden)

        self.hf = Linear(n_hidden, n_hidden, bias=False)
        self.hi = Linear(n_hidden, n_hidden, bias=False)
        self.ho = Linear(n_hidden, n_hidden, bias=False)
        self.hc = Linear(n_hidden, n_hidden, bias=False)

        self.w_ho = Linear(n_hidden, n_output, bias=False)

        self.parameters += self.xf.get_parameters()
        self.parameters += self.xi.get_parameters()
        self.parameters += self.xo.get_parameters()
        self.parameters += self.xc.get_parameters()

        self.parameters += self.hf.get_parameters()
        self.parameters += self.hi.get_parameters()
        self.parameters += self.ho.get_parameters()
        self.parameters += self.hc.get_parameters()

        self.parameters += self.w_ho.get_parameters()

    def forward(self, input, hidden):
        prev_hidden = hidden[0]  # кратковременная память  сети
        prev_cell = hidden[1]  # долгосрочная память  сети

        # определяем какую информацию мы можем забыть и возвращаем результат, как часть того сколько нужно забыть, балгодаря сигмойде [0, 1]
        f = (self.xf.forward(input) + self.hf.forward(prev_hidden)).sigmoid()

        # определеяем какую информациюнадо сохранить. Точно также приводим к [0, 1]
        i = (self.xi.forward(input) + self.hi.forward(prev_hidden)).sigmoid()

        # определеяем какую информациюнадо можно добавить.
        g = (self.xc.forward(input) + self.hc.forward(prev_hidden)).tanh()

        # Заменяем старое состояние ячейки на новоое, забывая (f) и прибаляя (i * g) то, что нам нужнло
        c = (f * prev_cell) + (i * g)

        # Решаем какую долю информации нам вернуть в виде окончательного рещзультата [0, 1]
        o = (self.xo.forward(input) + self.ho.forward(prev_hidden)).sigmoid()

        # Выводим инофрмацию, с приведением нового сотостояния к диапазону от [-1, 1]
        h = o * c.tanh()

        output = self.w_ho.forward(h)
        return output, (h, c)

    def init_hidden(self, batch_size=1):
        init_hidden = Tensor(np.zeros((batch_size, self.n_hidden)),
                             autograd=True)
        init_cell = Tensor(np.zeros((batch_size, self.n_hidden)),
                           autograd=True)
        init_hidden.data[:, 0] += 1
        init_cell.data[:, 0] += 1
        return (init_hidden, init_cell)
Example #12
0
 def __init__(self):
     self.conv1 = conv2d(1, 1, 3)
     self.fc1 = Linear(14 * 14, 10)
     self.max_pool = max_pool2d(2, 2)
     self.relu = relu()
     self.sigmoid = sigmoid()