예제 #1
0
class ConvNeuralNetwork:
    def __init__(self,
                 part='2',
                 img_width=28,
                 filter_width=28,
                 num_filters=2,
                 num_classes=2,
                 alpha=.01,
                 activation_function='sigmoid',
                 relu_alpha=0,
                 sig_lambdas=(1, 1, 1),
                 subset_size=1,
                 tanh_lambda=1):

        self.part = part
        if self.part == '2':
            self.filter_width = 28
            self.num_filters = 2
            num_classes = 2
            train_dir = '../data/part2/train/*'
            test_dir = '../data/part2/train/*'

        if self.part == '3a' or part == '3b':
            self.filter_width = 7
            self.num_filters = 16
            num_classes = 10
            train_dir = '../data/part3/train/*'
            test_dir = '../data/part3/train/*'

        self.img_width = img_width

        self.output_dim = num_classes
        self.alpha = alpha
        self.activation_function = activation_function

        self.relu_alpha = relu_alpha
        self.sig_lambdas = sig_lambdas
        self.tanh_lambda = tanh_lambda

        #computed properties
        self.conv_mat_H = np.power((img_width - self.filter_width + 1),
                                   2)  #number kernel positions
        self.conv_mat_S = img_width - self.filter_width  #space between kerel and outside of image
        self.conv_output_dim = self.conv_mat_H * self.num_filters

        #create data provider to feed in data
        self.dp = DataProvider(train_dir, test_dir, num_classes, subset_size)

        if part == '2' or part == '3a':
            self.init_weights_3A()
        else:
            self.init_weights_3B()

    def train(self, epochs, shuffle, dp=False):

        epoch_sample_errors = []
        epoch_mses_train = []
        epoch_mses_test = []
        epoch_gradient_mags = []
        sample_gradient_mags = []

        if not dp:
            dp = self.dp

        for features, target, epoch_over in dp.enumerate_dataset(
                train=True, epochs=epochs, shuffle=shuffle):

            if self.part != '3b':
                sample_error = self.forward_pass_3A(features, target)
                sample_gradient = self.backward_pass_3A(features, target)
            else:
                sample_error = self.forward_pass_3B(features, target)
                sample_gradient = self.backward_pass_3B(features, target)

            epoch_sample_errors.append(sample_error)
            sample_gradient_mags.append(sample_gradient)

            if epoch_over:
                if self.part != '2':
                    print('Epoch {} complete'.format(len(epoch_mses_train)))

                test_mse = self.evaluate()

                epoch_mses_train.append(pd.Series(epoch_sample_errors).mean())
                epoch_gradient_mags.append(
                    pd.Series(sample_gradient_mags).mean())
                epoch_mses_test.append(test_mse)

                epoch_sample_errors = []
                sample_gradient_mags = []

        return epoch_mses_train, epoch_mses_test, epoch_gradient_mags

    ############# Funcitons for Part 2 B ######################################
    def init_weights_3A(self):
        '''Initialize all weights and biases in the network, will add options for different strategies'''
        wConvVar = np.power(self.conv_mat_H, -.5)
        self.WConv = np.random.normal(0,
                                      wConvVar,
                                      size=(self.filter_width,
                                            self.filter_width,
                                            self.num_filters))
        self.bConv = np.random.normal(0, wConvVar, size=(self.num_filters, 1))

        w2Var = np.power(self.conv_output_dim, -.5)
        self.W2 = np.random.normal(0, w2Var,
                                   (self.output_dim, self.conv_output_dim))
        self.b2 = np.random.normal(0, w2Var, (self.output_dim, 1))

        #Create Toeplitz Matrix for backprop and deconv
        self.WConvC = self.filter_to_C(self.WConv)

    def forward_pass_3A(self, features, target):
        '''Compute forward pass over this data sample, returning SSE for pass'''
        features_sq = features.reshape(self.img_width, self.img_width)
        self.ConvY = np.zeros((self.num_filters, self.conv_mat_H))

        for filter in range(self.num_filters):
            activation = correlate2d(features_sq,
                                     self.WConv[:, :, filter],
                                     mode='valid')
            self.ConvY[filter, :] = activation.reshape(1, self.conv_mat_H)

        self.ConvY = self.activation(self.ConvY + self.bConv,
                                     self.sig_lambdas[0])
        self.ConvY = self.ConvY.reshape(
            self.ConvY.shape[0] * self.ConvY.shape[1], 1)

        self.Y2 = self.W2.dot(self.ConvY)
        self.Y2 = self.activation(self.Y2 + self.b2, self.sig_lambdas[1])

        self.error = target - self.Y2
        return np.sum(np.square(self.error)) / 2

    def backward_pass_3A(self, features, target):
        '''Compute backward pass over this data sample'''

        #Ouput layer deltas
        d2 = np.multiply(self.error,
                         self.activation_p(self.Y2, self.sig_lambdas[1]))

        #Connected layer weight gradients
        W2_grad = np.outer(d2, self.ConvY)
        W2_update = np.multiply(self.alpha, W2_grad)
        b2_update = np.multiply(self.alpha, d2)

        #Conv output deltas
        dConv = np.multiply(self.activation_p(self.ConvY, self.sig_lambdas[0]),
                            np.transpose(self.W2).dot(d2))
        dConv = dConv.reshape(self.conv_mat_H, self.num_filters)
        self.dConv = dConv

        #Conv weight gradients
        input_tiled = self.create_input_tiled(features.T).T
        WConv_grads = np.matmul(input_tiled,
                                dConv).reshape(self.filter_width,
                                               self.filter_width,
                                               self.num_filters)
        WConv_update = np.multiply(self.alpha, WConv_grads)
        self.WConv_grads = WConv_grads

        #Conv bias
        bConv_grads = np.sum(dConv.T, axis=0)
        bConv_update = np.multiply(self.alpha, bConv_grads)

        #Perform weight updates
        self.W2 = self.W2 + W2_update
        self.b2 = self.b2 + b2_update

        self.WConv = self.WConv + WConv_update
        self.bConv = self.bConv + bConv_update

        return np.sum(np.square(dConv))

    ############End Functions for part 3 A ####################################
    ############# Funcitons for Part 3 B ######################################

    def init_weights_3B(self):
        '''Initialize all weights and biases in the network, will add options for different strategies'''

        wConvVar = np.power(self.conv_mat_H, -.5)
        self.WConv = np.random.normal(0,
                                      wConvVar,
                                      size=(self.filter_width,
                                            self.filter_width,
                                            self.num_filters))
        self.bConv = np.random.normal(0, wConvVar, size=(self.num_filters, 1))

        w2Avar = np.power(self.conv_output_dim, -.5)
        self.W2a = np.random.normal(0, w2Avar, (128, self.conv_output_dim))
        self.b2a = np.random.normal(0, w2Avar, (128, 1))
        w2dim = 128

        w2Var = np.power(w2dim, -.5)
        self.W2 = np.random.normal(0, w2Var, (self.output_dim, w2dim))
        self.b2 = np.random.normal(0, w2Var, (self.output_dim, 1))

        #Create Toeplitz Matrix for backprop and deconv
        self.WConvC = self.filter_to_C(self.WConv)

    def forward_pass_3B(self, features, target):
        '''Compute forward pass over this data sample, returning SSE for pass'''

        features_sq = features.reshape(self.img_width, self.img_width)
        self.ConvY = np.zeros((self.num_filters, self.conv_mat_H))

        for filter in range(self.num_filters):
            activation = correlate2d(features_sq,
                                     self.WConv[:, :, filter],
                                     mode='valid')
            self.ConvY[filter, :] = activation.reshape(1, self.conv_mat_H)

        self.ConvY = self.activation(self.ConvY + self.bConv,
                                     self.sig_lambdas[0])
        self.ConvY = self.ConvY.reshape(
            self.ConvY.shape[0] * self.ConvY.shape[1], 1)

        self.Y2a = self.W2a.dot(self.ConvY)
        self.Y2a = self.activation(self.Y2a + self.b2a, self.sig_lambdas[1])

        self.Y2 = self.W2.dot(self.Y2a)
        self.Y2 = self.activation(self.Y2 + self.b2, self.sig_lambdas[2])

        self.error = target - self.Y2
        return np.sum(np.square(self.error)) / 2

    def backward_pass_3B(self, features, target):
        '''Compute backward pass over this data sample'''

        #Ouput layer deltas
        d2 = np.multiply(self.error,
                         self.activation_p(self.Y2, self.sig_lambdas[1]))

        W2_grad = np.outer(d2, self.Y2a)
        W2_update = np.multiply(self.alpha, W2_grad)
        b2_update = np.multiply(self.alpha, d2)
        d2a = np.multiply(self.activation_p(self.Y2a, self.sig_lambdas[0]),
                          np.transpose(self.W2).dot(d2))

        #Connected layer weight gradients
        W2a_grad = np.outer(d2a, self.ConvY)
        W2a_update = np.multiply(self.alpha, W2a_grad)
        b2a_update = np.multiply(self.alpha, d2a)

        #Conv output deltas
        dConv = np.multiply(self.activation_p(self.ConvY, self.sig_lambdas[0]),
                            np.transpose(self.W2a).dot(d2a))
        dConv = dConv.reshape(self.conv_mat_H, self.num_filters)
        self.dConv = dConv

        #Conv weight gradients
        input_tiled = self.create_input_tiled(features.T).T
        WConv_grads = np.matmul(input_tiled,
                                dConv).reshape(self.filter_width,
                                               self.filter_width,
                                               self.num_filters)
        WConv_update = np.multiply(self.alpha, WConv_grads)
        self.WConv_grads = WConv_grads

        #Conv bias
        bConv_grads = np.sum(dConv.T, axis=0)
        bConv_update = np.multiply(self.alpha, bConv_grads)

        #Perform weight updates
        self.W2 = self.W2 + W2_update
        self.b2 = self.b2 + b2_update

        self.W2a = self.W2a + W2a_update
        self.b2a = self.b2a + b2a_update

        self.WConv = self.WConv + WConv_update
        self.bConv = self.bConv + bConv_update

        return np.sum(np.square(dConv))

    ####################### End functions for part 3 B ########################################################

    def evaluate(self):

        mses = []
        for features, target, epoch_over in self.dp.enumerate_dataset(
                train=False, epochs=1, shuffle=False):

            if self.part != '3b':
                sample_error = self.forward_pass_3A(features, target)
            else:
                sample_error = self.forward_pass_3B(features, target)

            mses.append(sample_error)

        return pd.Series(mses).mean()

    def get_model_accuracy(self):

        predicted_train = []
        actual_train = []
        predicted_test = []
        actual_test = []

        for features, target, epoch_over in self.dp.enumerate_dataset(
                train=True, epochs=1, shuffle=False):

            if self.part != '3b':
                self.forward_pass_3A(features, target)
            else:
                self.forward_pass_3B(features, target)

            predicted_train.append(self.Y2.argmax())
            actual_train.append(target.argmax())

        for features, target, epoch_over in self.dp.enumerate_dataset(
                train=False, epochs=1, shuffle=False):

            if self.part != '3b':
                self.forward_pass_3A(features, target)
            else:
                self.forward_pass_3B(features, target)

            predicted_test.append(self.Y2.argmax())
            actual_test.append(target.argmax())

        prediction_scores_train = {
            'predicted': predicted_train,
            'actual': actual_train
        }
        prediction_scores_test = {
            'predicted': predicted_test,
            'actual': actual_test
        }

        return prediction_scores_train, prediction_scores_test

    def filter_to_C(self, WConv):
        '''Helper method: Converts a square filter to a staggered matrix C'''
        conv_vec_buff_h = np.zeros(
            (self.filter_width, self.conv_mat_S, self.num_filters))
        conv_vec = np.concatenate([WConv, conv_vec_buff_h], axis=1)
        conv_vec_buff_v = np.zeros(
            (self.conv_mat_S, conv_vec.shape[1], self.num_filters))
        conv_vec = np.concatenate([conv_vec, conv_vec_buff_v])
        conv_vec = conv_vec.reshape(1, np.power(self.img_width, 2),
                                    self.num_filters)
        conv_mat = np.tile(conv_vec, (self.conv_mat_H, 1, 1))

        #Create standard conv matrix
        for i in range(1, self.conv_mat_H):
            if i % (self.conv_mat_S + 1) == 0:
                conv_mat[i:, :, :] = np.roll(conv_mat[i:, :, :],
                                             self.filter_width,
                                             axis=1)
            else:
                conv_mat[i:, :, :] = np.roll(conv_mat[i:, :, :], 1, axis=1)

        return conv_mat

    def create_input_tiled(self, features):
        '''Creates a tiled input matrix to use for back proping weignts'''

        input_tiled = np.tile(features, (1, self.WConvC.shape[0])).T
        input_tiled = input_tiled[self.WConvC[:, :, 0] != 0].reshape(
            self.conv_mat_H, self.filter_width**2)
        return input_tiled

    def perform_deconvolution(self):

        for imclass in range(self.dp.deconv_samples.shape[0]):

            features, target = self.dp.deconv_samples[
                imclass, 1:], self.dp.deconv_samples[imclass, 0]
            print('Image class: '.format(target))
            if self.part != '3b':
                self.forward_pass_3A(features, target)
            else:
                self.forward_pass_3B(features, target)

            WConvC = self.filter_to_C(self.WConv)

            for filter in range(self.num_filters):
                dConv = self.dConv[:, filter]
                WConvCs = WConvC[:, :, filter]
                deconv_activations = dConv.dot(WConvCs)
                deconv_activations = deconv_activations.reshape(
                    self.img_width, self.img_width)
                plt.figure()
                plt.imshow(deconv_activations)
                plt.title('Activations for filter {}'.format(filter))

    def activation(self, array, sig_lambda=1, tanh_lambda=1):
        if self.activation_function == 'sigmoid':
            return 1 / (1 + np.exp(sig_lambda * -array))

        elif self.activation_function == 'tanh':
            num = (np.exp(self.tanh_lambda * array) -
                   np.exp(self.tanh_lambda * -array))
            denom = (np.exp(self.tanh_lambda * array) +
                     np.exp(self.tanh_lambda * -array))
            return num / denom

        elif self.activation_function == 'relu':
            return abs(array) * (array > 0)

    def activation_p(self, array, sig_lambda=1):

        if self.activation_function == 'sigmoid':
            return sig_lambda * array * (1.0 - array)

        elif self.activation_function == 'tanh':
            return self.tanh_lambda * 1 - np.power(array, 2)

        elif self.activation_function == 'relu':
            return np.where(array <= 0, self.relu_alpha, 1)