예제 #1
0
def train(x_train, y_train, alpha=0.001, weight_decay=0.0, epochs=40):
    for i in range(epochs):
        train_loss = 0
        count = 0
        for j in range(img_num):
            input = x_train[j].copy().reshape(1, 784) / 255.0
            target = y_train[j].copy()

            #Extracting hidden features using RBM
            input, _ = model_test.sample_h_given_v(input)

            #Forward
            linear = Linear.forward(input)
            loss = cross_entropy(linear, target)
            train_loss += loss

            #Backward
            prob = np.exp(linear) / np.sum(np.exp(linear), axis=1)
            prob[0, target] -= 1
            Linear.backward(input, prob, alpha, weight_decay)
            #Training Accuracy
            train_prob = np.exp(linear)
            if np.argmax(train_prob) == target:
                count += 1

        #Testing Accuracy
        num_test = 10000
        test_count = 0
        for k in range(num_test):
            test_input = x_test[k].copy().reshape(1, 784) / 255.0
            test_input, _ = model_test.sample_h_given_v(test_input)
            test_target = y_test[k].copy()
            test_prob = Linear.forward(test_input)
            test_prob = np.exp(test_prob)

            if np.argmax(test_prob) == test_target:
                test_count += 1

        print("Epoch", i, " ", "Loss", " ", train_loss / img_num, "train_Acc",
              " ", count / img_num, "test_Acc", " ", test_count / num_test)

    classifier_weight_rbm = open(b"model/classifier_weight_rbm.npy", "wb")
    pickle.dump(Linear.weight, classifier_weight_rbm)

    classifier_bias_rbm = open(b"model/classifier_bias_rbm.npy", "wb")
    pickle.dump(Linear.bias, classifier_bias_rbm)
예제 #2
0
class RNNAutoencoder:
    def __init__(self, vac_size: int, hidden_sizes: Tuple[int, int],
                 seq_size: int):
        """
        Class implements RNNAutoencoder.

        Architecture of RNNAutoencoder have 2 lstm layers in encoder and
        2 lstm layers with linear layer in decoder

        :param vac_size: int
        :param hidden_sizes: Tuple[int, int]
        :param seq_size: int
        """

        self.vac_size = vac_size
        self.hidden_size_1 = hidden_sizes[0]
        self.hidden_size_2 = hidden_sizes[1]
        self.seq_size = seq_size

        #Encode
        self.lstm1 = LSTMCell(vac_size=self.vac_size,
                              hidden_size=self.hidden_size_1,
                              return_seq=True)
        self.lstm2 = LSTMCell(vac_size=self.hidden_size_1,
                              hidden_size=self.hidden_size_2,
                              return_seq=False)

        self.repeat = RepeatVector(self.seq_size)

        #Decode
        self.lstm3 = LSTMCell(self.hidden_size_2,
                              self.hidden_size_1,
                              return_seq=True)
        self.lstm4 = LSTMCell(self.hidden_size_1,
                              self.vac_size,
                              return_seq=True)

        self.linear = Linear(self.vac_size, self.vac_size)

    def params(self):
        """
        returns parameters of all model

        :return: dict
        """
        return {
            'lstm1': self.lstm1.params(),
            'lstm2': self.lstm2.params(),
            'lstm3': self.lstm3.params(),
            'lstm4': self.lstm4.params(),
            'linear': self.linear.params()
        }

    def clear_gradients(self):
        """
        function which clears gradients
        :return:
        """
        self.lstm1.clear_gradients()
        self.lstm2.clear_gradients()
        self.lstm3.clear_gradients()
        self.lstm4.clear_gradients()
        self.linear.clear_gradients()

    def forward(self, X: np.ndarray):
        """
        forward pass through the model

        :param X: np.ndarray
        :return: predictions of model
        """
        self.clear_gradients()

        encode = self.lstm2.forward(self.lstm1.forward(X))
        bridge = self.repeat.forward(encode)
        decode = self.lstm4.forward(self.lstm3.forward(bridge))

        decode = decode.reshape(decode.shape[0], decode.shape[1])

        pred = self.linear.forward(decode)

        return pred

    def compute_loss_and_gradient(self, X: np.ndarray, y: np.ndarray):
        """
        function which implement forward pass and calculation of loss and its derivative

        :param X: not-sorted one-hot array (seq_size, vac_size, 1)
        :param y: sorted sequence (seq_size, )
        :return: loss and its derivative
        """
        pred = self.forward(X)
        loss, dpredication = softmax_cross_entropy(pred, y)
        return loss, dpredication

    def repeat_backward(self, x: np.ndarray):
        """
        function which repeat vector for backward pass

        :param x: np.ndarray size (vac_size, 1)
        :return: d_out :np.ndarray size(seq_size, vac_size, 1)
        """
        d_out = np.zeros((self.seq_size, *x.shape))
        d_out[-1] = x
        return d_out

    def backward(self, d_out: np.ndarray):
        """
        backward pass through model

        :param d_out: derivative of loss
        :return:
        """
        d_l = self.linear.backward(d_out)
        d_l = d_l.reshape(*d_l.shape, 1)

        d_l = self.lstm3.backward(self.lstm4.backward(d_l))

        bridge = self.repeat_backward(self.repeat.backward(d_l))

        d_x = self.lstm1.backward(self.lstm2.backward(bridge))

        return d_x

    def predict(self, X: np.ndarray):
        """
        predict answer of the model
        :param X:
        :return:
        """
        pred = self.forward(X)
        probs = softmax(pred)
        return np.argmax(probs, axis=1)