Example #1
0
def encoder(x, params, config):

    mb_size = config['mb_size']
    num_hidden = config['num_hidden']

    x = T.specify_shape(x, (128, 1, 28, 28))

    #c_1 = ConvPoolLayer(in_length = 4000, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = True, W = params['Wc_enc_1'], b = params['bc_enc_1'])

    #c_2 = ConvPoolLayer(in_length = 399, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = True, W = params['Wc_enc_2'], b = params['bc_enc_2'])

    #c_3 = ConvPoolLayer(in_length = 38, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = True, W = params['Wc_enc_3'], b = params['bc_enc_3'])

    h_out_1 = HiddenLayer(num_in = 784, num_out = num_hidden, W = params['W_enc_1'], b = params['b_enc_1'], activation = 'relu', batch_norm = True)

    h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_enc_2'], b = params['b_enc_2'], activation = 'relu', batch_norm = True)

    print "x ndim", x.ndim

    #c_1_value = T.specify_shape(c_1.output(x), (128, 96, 16, 16))
    #c_2_value = c_2.output(c_1_value)
    #c_3_value = c_3.output(c_2_value)

    h_out_1_value = T.specify_shape(h_out_1.output(x.flatten(2)), (128, num_hidden))
    h_out_2_value = h_out_2.output(h_out_1_value)

    return {'h' : h_out_2_value}
Example #2
0
    def __init__(self, data, all_y_trues, neurons_in_hl=[0]):
        # Data Members
        self.data = data
        self.all_y_trues = all_y_trues
        self.hidden_layers = []

        # # Hidden Layer 1
        # self.hl = HiddenLayer(neurons_in_hl[0])

        # For each hidden layer save in the hidden_layer array
        amount_of_weights = len(self.data[0])
        for hl_count in range(len(neurons_in_hl)):
            hl = HiddenLayer(neurons_in_hl[hl_count])
            for neuron in hl.neurons():
                neuron.changeProps(
                    [np.random.normal() for i in range(amount_of_weights)],
                    np.random.normal())

            amount_of_weights = neurons_in_hl[hl_count]
            self.hidden_layers.append(hl)

        # Output Neuron
        self.o1 = Neuron(
            [np.random.normal() for i in range(amount_of_weights)],
            np.random.normal())
Example #3
0
class MLP(object):
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
        self.x = input
        self.y = label

        if rng is None:
            rng = np.random.RandomState(1234)

        # construct hidden layer
        self.hidden_layer = HiddenLayer(input=self.x,
                                        n_in=n_in,
                                        n_out=n_hidden,
                                        rng=rng,
                                        activation=tanh)

        # construct log_layer

        self.log_layer = LR(input=self.hidden_layer.output,
                            label=self.y,
                            n_in=n_hidden,
                            n_out=n_out)

    def train(self):
        # forward hidden_layer
        layer_input = self.hidden_layer.forward()

        # forward & backward log_layer
        self.log_layer.train(input=layer_input)

        # backward hidden_layer
        self.hidden_layer.backward(prev_layer=self.log_layer)

    def predict(self, x):
        x = self.hidden_layer.output(input=x)
        return self.log_layer.predict(x)
Example #4
0
class MLP(object):
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):

        self.x = input
        self.y = label

        if rng is None:
            rng = numpy.random.RandomState(1234)

        # construct hidden_layer (tanh or sigmoid so far)
        self.hidden_layer = HiddenLayer(input=self.x,
                                        n_in=n_in,
                                        n_out=n_hidden,
                                        rng=rng,
                                        activation=numpy.tanh)

        # construct log_layer (softmax)
        self.log_layer = LogisticRegression(input=self.hidden_layer.output,
                                            label=self.y,
                                            n_in=n_hidden,
                                            n_out=n_out)

    def train(self):
        layer_input = self.hidden_layer.forward()
        self.log_layer.train(input=layer_input)
        self.hidden_layer.backward(prev_layer=self.log_layer)

    def predict(self, x):
        x = self.hidden_layer.output(x)
        return self.log_layer.predict(x)
Example #5
0
class MLP(object):
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):

        self.x = input
        self.y = label

        if rng is None:
            rng = numpy.random.RandomState(1234)

        # construct hidden_layer (tanh or sigmoid so far)
        self.hidden_layer = HiddenLayer(input=self.x,
                                        n_in=n_in,
                                        n_out=n_hidden,
                                        rng=rng,
                                        activation=numpy.tanh)

        # construct log_layer (softmax)
        self.log_layer = LogisticRegression(input=self.hidden_layer.output,
                                            label=self.y,
                                            n_in=n_hidden,
                                            n_out=n_out)

    def train(self):
        layer_input = self.hidden_layer.forward()
        self.log_layer.train(input=layer_input)
        self.hidden_layer.backward(prev_layer=self.log_layer)
        

    def predict(self, x):
        x = self.hidden_layer.output(x)
        return self.log_layer.predict(x)
Example #6
0
 def __init__(self, att_number, learn_rate, epochs, n_hidden_layer, n_output_layer):
     self.epochs = epochs
     self.learn_rate = learn_rate
     self.att_number = att_number
     self.n_hidden_layer = n_hidden_layer
     self.n_output_layer = n_output_layer
     self.hidden_layer = HiddenLayer(att_number, learn_rate, n_hidden_layer)
     self.output_layer = OutputLayer(n_hidden_layer, learn_rate, n_output_layer)
Example #7
0
    def __init__(self,
                 N,
                 label,
                 n_hidden,
                 n_out,
                 image_size,
                 channel,
                 n_kernels,
                 kernel_sizes,
                 pool_sizes,
                 rng=None,
                 activation=ReLU):

        if rng is None:
            rng = numpy.random.RandomState(1234)

        self.N = N
        self.n_hidden = n_hidden

        self.n_kernels = n_kernels

        self.pool_sizes = pool_sizes

        self.conv_layers = []
        self.conv_sizes = []

        # construct 1st conv_layer
        conv_layer0 = ConvPoolLayer(N, image_size, channel, n_kernels[0],
                                    kernel_sizes[0], pool_sizes[0], rng,
                                    activation)
        self.conv_layers.append(conv_layer0)

        conv_size = [
            (image_size[0] - kernel_sizes[0][0] + 1) / pool_sizes[0][0],
            (image_size[1] - kernel_sizes[0][1] + 1) / pool_sizes[0][1]
        ]
        self.conv_sizes.append(conv_size)

        # construct 2nd conv_layer
        conv_layer1 = ConvPoolLayer(N, conv_size, n_kernels[0], n_kernels[1],
                                    kernel_sizes[1], pool_sizes[1], rng,
                                    activation)
        self.conv_layers.append(conv_layer1)

        conv_size = [
            (conv_size[0] - kernel_sizes[1][0] + 1) / pool_sizes[1][0],
            (conv_size[1] - kernel_sizes[1][0] + 1) / pool_sizes[1][1]
        ]
        self.conv_sizes.append(conv_size)

        # construct hidden_layer
        self.hidden_layer = HiddenLayer(
            None, n_kernels[-1] * conv_size[0] * conv_size[1], n_hidden, None,
            None, rng, activation)

        # construct log_layer
        self.log_layer = LogisticRegression(None, label, n_hidden, n_out)
Example #8
0
def decoder(z, z_extra, params, config):

    mb_size = config['mb_size']
    num_latent = config['num_latent']
    num_hidden = config['num_hidden']

    h_out_1 = HiddenLayer(num_in = num_latent, num_out = num_hidden, W = params['W_dec_1'], b = params['b_dec_1'], activation = 'relu', batch_norm = True)

    h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_dec_2'], b = params['b_dec_2'], activation = 'relu', batch_norm = True)

    h_out_3 = HiddenLayer(num_in = num_hidden, num_out = 4096, activation = 'relu', W = params['W_dec_3'], b = params['b_dec_3'], batch_norm = True)

    c1 = DeConvLayer(in_channels = 512, out_channels = 256, activation = 'relu', up_rate = 5, W = params['Wc_dec_1'], b = params['bc_dec_1'], batch_norm = True)

    c2 = DeConvLayer(in_channels = 256, out_channels = 128, activation = 'relu', up_rate = 10, W = params['Wc_dec_2'], b = params['bc_dec_2'], batch_norm = False)

    c3 = DeConvLayer(in_channels = 128, out_channels = 1, activation = None, up_rate = 10, W = params['Wc_dec_3'], b = params['bc_dec_3'], batch_norm = False)

    z = T.concatenate([z,z_extra], axis = 1)

    h_out_1_value = h_out_1.output(z)
    h_out_2_value = h_out_2.output(h_out_1_value)
    h_out_3_value = h_out_3.output(h_out_2_value)

    c1_o = c1.output(h_out_3_value.reshape((128,512,8,1)))
    c2_o = c2.output(c1_o)
    c3_o = c3.output(c2_o)

    out = c3_o.reshape((128,4000))

    return {'h' : out}
Example #9
0
    def __init__(self,
                 rng,
                 input,
                 n_in,
                 n_hidden,
                 n_out,
                 srng=None,
                 dropout_rate=0,
                 activation='tanh',
                 outputActivation='softmax',
                 params=None):
        """Initialize the parameters for the multilayer perceptron

        rng: random number generator, e.g. numpy.random.RandomState(1234)

        input: theano.tensor matrix of shape (n_examples, n_in)

        n_in: int, dimensionality of input

        n_hidden: int, number of hidden units

        n_out: int, number of hidden units

        dropout_rate: float, if dropout_rate is non zero, then we implement a Dropout in the hidden layer

        activation: string, nonlinearity to be applied in the hidden layer

        """

        hiddenLayer = HiddenLayer(rng=rng,
                                  input=input,
                                  n_in=n_in,
                                  n_out=n_hidden,
                                  activation=activation,
                                  params=maybe(lambda: params[0]))

        h = hiddenLayer.output
        if dropout_rate > 0:
            assert (srng is not None)
            h = dropout(srng, dropout_rate, h)

        outputLayer = HiddenLayer(rng=rng,
                                  input=h,
                                  n_in=n_hidden,
                                  n_out=n_out,
                                  activation=outputActivation,
                                  params=maybe(lambda: params[1]))

        self.layers = [hiddenLayer, outputLayer]
        self.params = layers_params(self.layers)
        self.L1 = layers_L1(self.layers)
        self.L2_sqr = layers_L2_sqr(self.layers)

        self.output = outputLayer.output
Example #10
0
    def __init__(self, n_voc, trainset, testset,dataname, classes, prefix):
        if prefix != None:
            prefix += '/'
        self.trainset = trainset
        self.testset = testset

        docs = T.imatrix()
        label = T.ivector()
        length = T.fvector()
        sentencenum = T.fvector()
        wordmask = T.fmatrix()
        sentencemask = T.fmatrix()
        maxsentencenum = T.iscalar()
        isTrain = T.iscalar()

        rng = numpy.random

        layers = []
        layers.append(EmbLayer(rng, docs, n_voc, 200, 'emblayer', dataname, prefix))
        layers.append(LSTMLayer(rng, layers[-1].output, wordmask, 200, 200, 'wordlstmlayer', prefix)) 
        layers.append(MeanPoolLayer(layers[-1].output, length))
        layers.append(SentenceSortLayer(layers[-1].output,maxsentencenum))
        layers.append(LSTMLayer(rng, layers[-1].output, sentencemask, 200, 200, 'sentencelstmlayer', prefix))
        layers.append(MeanPoolLayer(layers[-1].output, sentencenum))
        layers.append(HiddenLayer(rng, layers[-1].output, 200, 200, 'fulllayer', prefix))
        layers.append(HiddenLayer(rng, layers[-1].output, 200, int(classes), 'softmaxlayer', prefix, activation=T.nnet.softmax))
        self.layers = layers
        
        cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]), label], acc_dtype='float32')
        correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label), acc_dtype='int32')
        err = T.argmax(layers[-1].output, axis=1) - label
        mse = T.sum(err * err)
        
        params = []
        for layer in layers:
            params += layer.params
        L2_rate = numpy.float32(1e-5)
        for param in params[1:]:
            cost += T.sum(L2_rate * (param * param), acc_dtype='float32')
        gparams = [T.grad(cost, param) for param in params]

        updates = AdaUpdates(params, gparams, 0.95, 1e-6)

        self.train_model = theano.function(
            inputs=[docs, label,length,sentencenum,wordmask,sentencemask,maxsentencenum],
            outputs=cost,
            updates=updates,
        )

        self.test_model = theano.function(
            inputs=[docs, label,length,sentencenum,wordmask,sentencemask,maxsentencenum],
            outputs=[correct, mse],
        )
def encoder(x, params, config):

    mb_size = config['mb_size']
    num_hidden = config['num_hidden']

    h_out_1 = HiddenLayer(num_in = 4000, num_out = num_hidden, W = params['W_enc_1'], b = params['b_enc_1'], activation = 'relu', batch_norm = True)

    h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_enc_2'], b = params['b_enc_2'], activation = 'relu', batch_norm = True)

    h_out_1_value = h_out_1.output(x)
    h_out_2_value = h_out_2.output(h_out_1_value)

    return {'h' : h_out_2_value}
    def __init__(self, rng, input, n_hidden, n_out, embeddingsLookups,
                 embedding_matrix_to_update):
        """Initialize the parameters for the multilayer perceptron
        """
        self.n_hidden = n_hidden
        self.n_out = n_out
        self.ft_names = []

        #for ft in embeddingsLookups:
        #    self.ft_names.append(ft.getName())

        # First a lookup layer to map indices to their corresponding embedding vector
        self.embeddingLayer = EmbeddingLayer(input, embeddingsLookups)

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        self.hiddenLayer = HiddenLayer(rng=rng,
                                       input=self.embeddingLayer.output,
                                       n_in=self.embeddingLayer.n_out,
                                       n_out=n_hidden,
                                       activation=T.tanh)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = SoftmaxLayer(input=self.hiddenLayer.output,
                                               n_in=n_hidden,
                                               n_out=n_out)

        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = abs(self.hiddenLayer.W).sum() \
                + abs(self.logRegressionLayer.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \
                    + (self.logRegressionLayer.W ** 2).sum()

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = self.hiddenLayer.params + self.logRegressionLayer.params + embedding_matrix_to_update
Example #13
0
    def __construct_layers(self, n_ins, n_outs, hidden_layer_sizes, rng):
        for i in range(self.n_layers):
            # layer_input
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layer_sizes[i - 1]
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        rng=rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            rbm_layer = RBM(input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layer_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

            # ay = self.sigmoid_layers[-1].sample_h_given_v()
            # print("sigmoid_layers", ay.shape, sigmoid_layer.W.shape)
            self.lr_layer = LogisticRegression(
                input=self.sigmoid_layers[-1].sample_h_given_v(),
                label=self.y,
                n_in=hidden_layer_sizes[-1],
                n_out=n_outs)
            self.finetune_cost = self.lr_layer.negative_log_likelihood()
Example #14
0
    def __init__(self,
                 input=None,
                 label=None,
                 n_ins=2,
                 hidden_layer_sizes=[3, 3],
                 n_outs=2,
                 rng=None):

        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if rng is None:
            rng = numpy.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        for i in xrange(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        rng=rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            # construct rbm_layer
            rbm_layer = RBM(
                input=layer_input,
                n_visible=input_size,
                n_hidden=hidden_layer_sizes[i],
                W=sigmoid_layer.W,  # W, b are shared
                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(
            input=self.sigmoid_layers[-1].sample_h_given_v(),
            label=self.y,
            n_in=hidden_layer_sizes[-1],
            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()
Example #15
0
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
        """
        n_hidden: python list represent the hidden dimention 
        """

        self.x = input
        self.y = label

        if rng is None:
            rng = numpy.random.RandomState(1234)

        # construct hidden_layer
        layers_dim = numpy.hstack([n_in, n_hidden])
        self.hidden_layer = []

        for hidden_idx in xrange(len(layers_dim) - 1):
            self.hidden_layer.append(
                HiddenLayer(input=self.x,
                            n_in=layers_dim[hidden_idx],
                            n_out=layers_dim[hidden_idx + 1],
                            rng=rng,
                            activation=tanh))

        # construct log_layer
        self.log_layer = LogisticRegression(input=self.hidden_layer[-1].output,
                                            label=self.y,
                                            n_in=n_hidden[-1],
                                            n_out=n_out)
def decoder(z, params, config):

    mb_size = config['mb_size']
    num_latent = config['num_latent']
    num_hidden = config['num_hidden']

    h_out_1 = HiddenLayer(num_in = num_latent, num_out = num_hidden, W = params['W_dec_1'], b = params['b_dec_1'], activation = 'relu', batch_norm = True)

    h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_dec_2'], b = params['b_dec_2'], activation = 'relu', batch_norm = True)

    h_out_3 = DenseLayer((mb_size, num_hidden), num_units = 4000, nonlinearity=None, W = params['W_dec_3'], b = params['b_dec_3'])

    h_out_1_value = h_out_1.output(z)
    h_out_2_value = h_out_2.output(h_out_1_value)
    h_out_3_value = h_out_3.get_output_for(h_out_2_value)

    return {'h' : h_out_3_value}
Example #17
0
 def build_model(self, n_classes, learning_rate):
     self.model = []
     self.model.append(ConvolutionLayer(n_filters=32, filter_size=(3,3), learning_rate=learning_rate))
     self.model.append(ConvolutionLayer(n_filters=64, filter_size=(3,3), learning_rate=learning_rate))
     self.model.append(MaxPoolLayer(window_size=(2, 2), stride=2))
     self.model.append(FlattenLayer())
     self.model.append(HiddenLayer(n_neurons=128, input_shape=294912, activation_function=relu, learning_rate=learning_rate))
     self.model.append(OutputLayer(n_classes, input_shape=128, activation_function=sigmoid, learning_rate=learning_rate))
Example #18
0
def discriminator(x, z, params, mb_size, num_hidden, num_latent):

    import random as rng
    srng = theano.tensor.shared_randomstreams.RandomStreams(420)

    c_1 = ConvPoolLayer(in_channels = 1, out_channels = 128, in_length = 4000, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = False, W = params['W_c_1'], b = params['b_c_1'])

    c_2 = ConvPoolLayer(in_channels = 128, out_channels = 256, in_length = 399, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = False, W = params['W_c_2'], b = params['b_c_2'])

    c_3 = ConvPoolLayer(in_channels = 256, out_channels = 512, in_length = 38, batch_size = mb_size, kernel_len = 10, stride = 5, activation = "relu", batch_norm = False, W = params['W_c_3'], b = params['b_c_3'])

    c_h_1 = HiddenLayer(num_in = 6 * 512, num_out = num_hidden, W = params['W_ch_1'], b = params['b_ch_1'], activation = 'relu', batch_norm = False)

    h_out_1 = HiddenLayer(num_in = num_hidden + num_latent, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_1'], b = params['b_disc_1'])

    h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_2'], b = params['b_disc_2'])

    h_out_3 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_3'], b = params['b_disc_3'])

    h_out_4 = HiddenLayer(num_in = num_hidden, num_out = 1, activation = None, batch_norm = False, W = params['W_disc_4'], b = params['b_disc_4'])

    c_1_value = T.specify_shape(c_1.output(dropout(x, 0.8).reshape((128,1,4000))), (128,128,399))

    c_2_value = T.specify_shape(c_2.output(c_1_value), (128,256,38))

    c_3_value = T.specify_shape(c_3.output(c_2_value), (128,512,6))

    c_h_1_value = c_h_1.output(c_3_value.flatten(2))

    h_out_1_value = dropout(h_out_1.output(T.concatenate([z, c_h_1_value], axis = 1)))

    h_out_2_value = dropout(h_out_2.output(h_out_1_value), 0.2)

    h_out_3_value = dropout(h_out_3.output(h_out_2_value), 0.2)

    h_out_4_value = h_out_4.output(h_out_3_value)

    raw_y = h_out_4_value

    classification = T.nnet.sigmoid(raw_y)

    results = {'c' : classification}

    return results
Example #19
0
    def __init__(self,
                 input=None,
                 label=None,
                 n_ins=2,
                 hidden_layer_sizes=[3, 3],
                 n_outs=2,
                 rng=None):

        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.dA_layers = []

        self.n_layers = len(hidden_layer_sizes)

        if rng is None:
            rng = np.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        rng=rng,
                                        activation=sigmoid)

            self.sigmoid_layers.append(sigmoid_layer)

            dA_layer = dA(input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layer_sizes[i],
                          W=sigmoid_layer.W,
                          hbias=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        self.log_layer = LR(input=self.sigmoid_layers[-1].sample_h_given_v(),
                            label=self.y,
                            n_in=hidden_layer_sizes[-1],
                            n_out=n_outs)

        self.finetune_cost = self.log_layer.negative_log_likelihood()
Example #20
0
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
        self.x = input
        self.y = label

        if rng is None:
            rng = np.random.RandomState(1234)

        # construct hidden layer
        self.hidden_layer = HiddenLayer(input=self.x,
                                        n_in=n_in,
                                        n_out=n_hidden,
                                        rng=rng,
                                        activation=tanh)

        # construct log_layer

        self.log_layer = LR(input=self.hidden_layer.output,
                            label=self.y,
                            n_in=n_hidden,
                            n_out=n_out)
Example #21
0
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):

        self.x = input
        self.y = label

        if rng is None:
            rng = numpy.random.RandomState(1234)

        # construct hidden_layer (tanh or sigmoid so far)
        self.hidden_layer = HiddenLayer(input=self.x,
                                        n_in=n_in,
                                        n_out=n_hidden,
                                        rng=rng,
                                        activation=numpy.tanh)

        # construct log_layer (softmax)
        self.log_layer = LogisticRegression(input=self.hidden_layer.output,
                                            label=self.y,
                                            n_in=n_hidden,
                                            n_out=n_out)
Example #22
0
    def build_model(self, input_shape, hidden_shape, output_shape, learning_rate, is_agent_mode_enabled):
        self.input_layer = InputLayer()
        self.hidden_layers = []

        last_output_shape = input_shape
        for shape in hidden_shape:
            self.hidden_layers.append(HiddenLayer(shape, last_output_shape, sigmoid, learning_rate, is_agent_mode_enabled))
            last_output_shape = shape

        last_layer_output_shape = self.hidden_layers[-1].get_output_shape()
        self.output_layer = OutputLayer(output_shape, last_layer_output_shape, sigmoid, learning_rate)
Example #23
0
    def __init__(self, rng=rng, input_shape=1, output_shape=1, dropout=0.7):
        
        self.nslices = 4        
        self.dropout0 = DropoutLayer(dropout, rng=rng)
        self.dropout1 = DropoutLayer(dropout, rng=rng)
        self.dropout2 = DropoutLayer(dropout, rng=rng)
        self.activation = ActivationLayer('ELU')
        
        self.W0 = HiddenLayer((self.nslices, 512, input_shape-1), rng=rng, gamma=0.01)
        self.W1 = HiddenLayer((self.nslices, 512, 512), rng=rng, gamma=0.01)
        self.W2 = HiddenLayer((self.nslices, output_shape, 512), rng=rng, gamma=0.01)
    
        self.b0 = BiasLayer((self.nslices, 512))
        self.b1 = BiasLayer((self.nslices, 512))
        self.b2 = BiasLayer((self.nslices, output_shape))

        self.layers = [
            self.W0, self.W1, self.W2,
            self.b0, self.b1, self.b2]

        self.params = sum([layer.params for layer in self.layers], [])
Example #24
0
def encoder(x, params, config):

    mb_size = config['mb_size']
    num_hidden = config['num_hidden']

    c_1 = ConvPoolLayer(in_channels = 1, out_channels = 128, in_length = 4000, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = True, W = params['Wc_enc_1'], b = params['bc_enc_1'])

    c_2 = ConvPoolLayer(in_channels = 128, out_channels = 256, in_length = 399, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = True, W = params['Wc_enc_2'], b = params['bc_enc_2'])

    c_3 = ConvPoolLayer(in_channels = 256, out_channels = 512, in_length = 38, batch_size = mb_size, kernel_len = 10, stride = 5, activation = "relu", batch_norm = True, W = params['Wc_enc_3'], b = params['bc_enc_3'])

    h_out_1 = HiddenLayer(num_in = 512 * 6, num_out = num_hidden, W = params['W_enc_1'], b = params['b_enc_1'], activation = 'relu', batch_norm = True)

    h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_enc_2'], b = params['b_enc_2'], activation = 'relu', batch_norm = True)

    print "x ndim", x.ndim

    c_1_value = T.specify_shape(c_1.output(x.reshape((128,1,4000))), (128, 128, 399))
    c_2_value = c_2.output(c_1_value)
    c_3_value = c_3.output(c_2_value)

    h_out_1_value = h_out_1.output(c_3_value.flatten(2))
    h_out_2_value = h_out_2.output(h_out_1_value)

    return {'h' : h_out_2_value}
Example #25
0
    def __init__(self,
                 rng,
                 input,
                 n_in,
                 n_out,
                 layer_sizes=[],
                 dropout_rate=0,
                 srng=None,
                 activation='tanh',
                 outputActivation='softmax',
                 params=None):
        """Initialize the parameters for the multilayer perceptron

        rng: random number generator, e.g. numpy.random.RandomState(1234)

        input: theano.tensor matrix of shape (n_examples, n_in)

        n_in: int, dimensionality of input

        layer_sizes: array of ints, dimensionality of the hidden layers

        n_out: int, number of hidden units

        dropout_rate: float, if dropout_rate is non zero, then we implement a Dropout in the hidden layer

        activation: string, nonlinearity to be applied in the hidden layer
        """

        ff = ForwardFeed(
            rng=rng,
            input=input,
            layer_sizes=[n_in] + layer_sizes,
            activation=activation,
            params=maybe(lambda: params[0]),
            dropout_rate=dropout_rate,
            srng=srng,
        )

        outputLayer = HiddenLayer(rng=rng,
                                  input=ff.output,
                                  n_in=layer_sizes[-1],
                                  n_out=n_out,
                                  activation=outputActivation,
                                  params=maybe(lambda: params[1]))

        self.layers = [ff, outputLayer]

        self.params = layers_params(self.layers)
        self.L1 = layers_L1(self.layers)
        self.L2_sqr = layers_L2_sqr(self.layers)

        self.output = outputLayer.output
Example #26
0
    def __init__(self, rng, input, n_hidden, n_out, embeddingsLookups, embedding_matrix_to_update):
        """Initialize the parameters for the multilayer perceptron
        """
        self.n_hidden = n_hidden
        self.n_out = n_out
        self.ft_names = []
        
        #for ft in embeddingsLookups:
        #    self.ft_names.append(ft.getName())
        
        # First a lookup layer to map indices to their corresponding embedding vector
        self.embeddingLayer = EmbeddingLayer(input, embeddingsLookups)
        
      

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        self.hiddenLayer = HiddenLayer(rng=rng, input=self.embeddingLayer.output,
                                       n_in=self.embeddingLayer.n_out, n_out=n_hidden,
                                       activation=T.tanh)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = SoftmaxLayer(
            input=self.hiddenLayer.output,
            n_in=n_hidden,
            n_out=n_out)

        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = abs(self.hiddenLayer.W).sum() \
                + abs(self.logRegressionLayer.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \
                    + (self.logRegressionLayer.W ** 2).sum()

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = self.hiddenLayer.params + self.logRegressionLayer.params + embedding_matrix_to_update
    def __init__(self, learningrate, n_features, num_of_hidden_layers, Ws, bs):
        self.learning_rate = learningrate
        self.n_class = Ws[-1].shape[1]
        self.features = n_features
        self.L = num_of_hidden_layers + 1
        self.input_layer = InputLayer()
        self.output_layer = OutputLayer()
        self.hidden_layers = np.array([])
        self.connections = np.array([])
        for i in range(num_of_hidden_layers):
            self.hidden_layers = np.append(self.hidden_layers, HiddenLayer())
        for i in range(1, self.L + 1):
            self.connections = np.append(self.connections, Connection())
            self.connections[i - 1].set(Ws[i - 1], bs[i - 1], i - 1, i)

        return
Example #28
0
class MLP(object):
    def __init__(self, att_number, learn_rate, epochs, n_hidden_layer, n_output_layer):
        self.epochs = epochs
        self.learn_rate = learn_rate
        self.att_number = att_number
        self.n_hidden_layer = n_hidden_layer
        self.n_output_layer = n_output_layer
        self.hidden_layer = HiddenLayer(att_number, learn_rate, n_hidden_layer)
        self.output_layer = OutputLayer(n_hidden_layer, learn_rate, n_output_layer)

    def feedforward(self, inputs, expected):
        self.hidden_layer.run_layer(inputs)
        self.output_layer.run_layer(self.hidden_layer.outputs, expected)
        self.hidden_layer.update_layer(inputs, self.output_layer)

    def train(self, train_data, att):
        for _ in range(self.epochs):
            np.random.shuffle(train_data)
            for d in train_data:
                selected_inputs, expected = self.inputs_and_expected(d, att)
                self.feedforward(selected_inputs, expected)

    def test(self, test_data, att):
        hits = 0
        for data in test_data:
            selected_inputs, expected = self.inputs_and_expected(data, att)
            self.hidden_layer.run_layer(selected_inputs)
            outputs = self.output_layer.run_test(self.hidden_layer.outputs)
            predict = self.predict(outputs)
            hits = hits + 1 if np.array_equal(predict, expected) else hits

        return (hits / len(test_data)) * 100

    def get_predict(self, inputs):
        hiden_out = self.hidden_layer.run_layer(inputs)
        out_out = self.output_layer.run_test(hiden_out)
        
        return self.predict(out_out)

    @staticmethod
    def inputs_and_expected(d, att):
        expected = np.array([d[len(d)-1]]) if (isinstance(d[len(d)-1], np.floating)) else np.array(list(d[len(d)-1])).astype(np.int)
        selected_inputs = [d[att[i]] for i in range(len(att))]
        return selected_inputs, expected

    @staticmethod
    def predict(outputs):
        predict = [1 if output == np.amax(outputs) else 0 for output in outputs]
        return predict
Example #29
0
def discriminator(x, z, params, mb_size, num_hidden, num_latent):

    import random as rng
    srng = theano.tensor.shared_randomstreams.RandomStreams(420)

    #c_1 = ConvPoolLayer(in_length = 4000, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = False, W = params['W_c_1'], b = params['b_c_1'])

    #c_2 = ConvPoolLayer(in_length = 399, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = False, W = params['W_c_2'], b = params['b_c_2'])

    #c_3 = ConvPoolLayer(in_length = 38, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = False, W = params['W_c_3'], b = params['b_c_3'])

    #c_h_1 = HiddenLayer(num_in = 6 * 512, num_out = num_hidden, W = params['W_ch_1'], b = params['b_ch_1'], activation = 'relu', batch_norm = False)

    h_out_1 = HiddenLayer(num_in = num_hidden + num_latent, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_1'], b = params['b_disc_1'])

    h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_2'], b = params['b_disc_2'])

    h_out_3 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_3'], b = params['b_disc_3'])

    h_out_4 = HiddenLayer(num_in = num_hidden, num_out = 1, activation = None, batch_norm = False, W = params['W_disc_4'], b = params['b_disc_4'])

    #c_1_value = c_1.output(dropout(x, 0.8))

    #c_2_value = c_2.output(c_1_value)

    #c_3_value = c_3.output(c_2_value)

    #c_h_1_value = c_h_1.output(c_3_value.flatten(2))

    h_out_1_value = dropout(h_out_1.output(T.concatenate([z, dropout(noise(x.flatten(2)), 0.8)], axis = 1)), 0.5)

    h_out_2_value = dropout(h_out_2.output(h_out_1_value), 0.5)

    h_out_3_value = dropout(h_out_3.output(h_out_2_value), 0.5)

    h_out_4_value = h_out_4.output(h_out_3_value)

    raw_y = h_out_4_value

    classification = T.nnet.sigmoid(raw_y)

    results = {'c' : classification}

    return results
Example #30
0
    def __init__(self, input, label,\
                 n_in, hidden_layer_sizes, n_out,\
                 rng=None, activation=ReLU):

        self.x = input
        self.y = label

        self.hidden_layers = []
        self.n_layers = len(hidden_layer_sizes)

        if rng is None:
            rng = numpy.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        for i in range(self.n_layers):

            # layer_size
            if i == 0:
                input_size = n_in
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x

            else:
                layer_input = self.hidden_layers[-1].output()

            # construct hidden_layer
            hidden_layer = HiddenLayer(input=layer_input,
                                       n_in=input_size,
                                       n_out=hidden_layer_sizes[i],
                                       rng=rng,
                                       activation=activation)

            self.hidden_layers.append(hidden_layer)

        # layer for ouput using Logistic Regression (softmax)
        self.log_layer = LogisticRegression(
            input=self.hidden_layers[-1].output(),
            label=self.y,
            n_in=hidden_layer_sizes[-1],
            n_out=n_out)
Example #31
0
    def __init__(self,
                 rng,
                 input,
                 layer_sizes=[],
                 dropout_rate=0,
                 srng=None,
                 params=None,
                 activation='tanh'):
        """Initialize the parameters for the forward feed

        rng: random number generator, e.g. numpy.random.RandomState(1234)

        input: theano.tensor matrix of shape (n_examples, n_in)

        layer_sizes: array of ints, dimensionality of each layer size, input to output

        activation: string, nonlinearity to be applied in the hidden layer
        """

        output = input
        layers = []
        for i in range(0, len(layer_sizes) - 1):
            hiddenLayer = HiddenLayer(rng=rng,
                                      input=output,
                                      params=maybe(lambda: params[i]),
                                      n_in=layer_sizes[i],
                                      n_out=layer_sizes[i + 1],
                                      activation=activation)

            h = hiddenLayer.output
            if dropout_rate > 0:
                assert (srng is not None)
                h = dropout(srng, dropout_rate, h)

            output = h
            layers.append(hiddenLayer)

        self.layers = layers
        self.output = output

        self.params = layers_params(self.layers)
        self.L1 = layers_L1(self.layers)
        self.L2_sqr = layers_L2_sqr(self.layers)
Example #32
0
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):

        self.x = input
        self.y = label

        if rng is None:
            rng = numpy.random.RandomState(1234)

        # construct hidden_layer (tanh or sigmoid so far)
        self.hidden_layer = HiddenLayer(input=self.x,
                                        n_in=n_in,
                                        n_out=n_hidden,
                                        rng=rng,
                                        activation=numpy.tanh)

        # construct log_layer (softmax)
        self.log_layer = LogisticRegression(input=self.hidden_layer.output,
                                            label=self.y,
                                            n_in=n_hidden,
                                            n_out=n_out)
Example #33
0
    def __init__(self,
                 input,
                 label,
                 n_in,
                 hidden_layer_sizes,
                 n_out,
                 rng=None,
                 activation=ReLU):
        self.x = input
        self.y = label
        self.hidden_layers = []
        self.n_layers = len(hidden_layer_sizes)

        if rng is None:
            rng = np.random.RandomState(1234)

        assert self.n_layers > 0

        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_in
            else:
                input_size = hidden_layer_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.hidden_layers[-1].output()

            hidden_layer = HiddenLayer(input=layer_input,
                                       n_in=input_size,
                                       n_out=hidden_layer_sizes[i],
                                       rng=rng,
                                       activation=activation)
            self.hidden_layers.append(hidden_layer)

        self.log_layer = LR(input=self.hidden_layers[-1].output(),
                            label=self.y,
                            n_in=hidden_layer_sizes[-1],
                            n_out=n_out)
Example #34
0
class Network:
    __A = (1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
    __B = (0, 1, 0, 0, 0, 0, 0, 0, 0, 0)
    __C = (0, 0, 1, 0, 0, 0, 0, 0, 0, 0)
    __D = (0, 0, 0, 1, 0, 0, 0, 0, 0, 0)
    __E = (0, 0, 0, 0, 1, 0, 0, 0, 0, 0)
    __F = (0, 0, 0, 0, 0, 1, 0, 0, 0, 0)
    __G = (0, 0, 0, 0, 0, 0, 1, 0, 0, 0)
    __H = (0, 0, 0, 0, 0, 0, 0, 1, 0, 0)
    __I = (0, 0, 0, 0, 0, 0, 0, 0, 1, 0)
    __J = (0, 0, 0, 0, 0, 0, 0, 0, 0, 1)

    def __init__(self, training_set, test_set, option):
        self.__inLayer = InputLayer(training_set, test_set, option)
        self.__hidLayer = HiddenLayer(28 * 28, 30, option)
        self.__outLayer = OutputLayer(30, 10, option)
        self.option = option

    def set_hid_weights(self, weights):
        self.__hidLayer.set_weights(weights)
        return self

    def set_out_weights(self, weights):
        self.__outLayer.set_weights(weights)
        return self

    def set_hid_bias(self, bias):
        self.__hidLayer.set_bias(bias)
        return self

    def set_out_bias(self, bias):
        self.__outLayer.set_bias(bias)
        return self

    def train(self, last_time):
        loss = 0
        for i in range(self.__inLayer.training_set_size()):

            desired_output = self.get_desired_output(
                self.__inLayer.get_training_label(i))
            self.__outLayer.set_desired_output(desired_output)

            inp = self.__inLayer.get_image(i)
            if self.option.is_dropout():
                prob = np.random.randint(0, 2, (1, 784))
                inp = np.multiply(inp, prob)
            self.__hidLayer.calc(inp)

            hid = self.__hidLayer.get_output()
            if self.option.is_dropout():
                prob = np.random.randint(0, 2, (1, 30))
                hid = np.multiply(hid, prob)
            self.__outLayer.calc(hid)

            loss += self.__outLayer.loss_function()

            self.__outLayer.back_propagate(hid)
            self.__hidLayer.back_propagate(inp, self.__outLayer)
        if last_time:
            np.savez_compressed('weights',
                                hid_weights=self.__hidLayer.get_weights(),
                                out_weights=self.__outLayer.get_weights(),
                                hid_bias=self.__hidLayer.get_bias(),
                                out_bias=self.__outLayer.get_bias())
        return loss / self.__inLayer.training_set_size()

    def test(self, last_time):
        count = 0
        loss = 0
        for i in range(self.__inLayer.test_set_size()):

            desired_output = self.get_desired_output(
                self.__inLayer.get_test_label(i))
            self.__outLayer.set_desired_output(desired_output)

            inp = self.__inLayer.get_test_image(i)
            self.__hidLayer.calc(inp)
            self.__outLayer.calc(self.__hidLayer.get_output())

            loss += self.__outLayer.loss_function()

            must_be = self.__inLayer.get_test_label(i)
            y_predict = np.zeros(10, dtype=np.int)
            max_i = np.argmax(self.__outLayer.get_output())
            y_predict[max_i] = 1
            prediction = self.prediction(tuple(y_predict))
            if must_be == prediction:
                count += 1
        if last_time:
            print("accuracy: %.2f%%" %
                  (100 * count / self.__inLayer.test_set_size()))
        return loss / self.__inLayer.test_set_size()

    def get_desired_output(self, desired):
        if desired == "A":
            return Network.__A
        elif desired == "B":
            return Network.__B
        elif desired == "C":
            return Network.__C
        elif desired == "D":
            return Network.__D
        elif desired == "E":
            return Network.__E
        elif desired == "F":
            return Network.__F
        elif desired == "G":
            return Network.__G
        elif desired == "H":
            return Network.__H
        elif desired == "I":
            return Network.__I
        elif desired == "J":
            return Network.__J

    def prediction(self, output):
        if output == Network.__A:
            return "A"
        elif output == Network.__B:
            return "B"
        elif output == Network.__C:
            return "C"
        elif output == Network.__D:
            return "D"
        elif output == Network.__E:
            return "E"
        elif output == Network.__F:
            return "F"
        elif output == Network.__G:
            return "G"
        elif output == Network.__H:
            return "H"
        elif output == Network.__I:
            return "I"
        elif output == Network.__J:
            return "J"
Example #35
0
 def __init__(self, training_set, test_set, option):
     self.__inLayer = InputLayer(training_set, test_set, option)
     self.__hidLayer = HiddenLayer(28 * 28, 30, option)
     self.__outLayer = OutputLayer(30, 10, option)
     self.option = option
Example #36
0
    def __init__(self, rng, input, n_in, n_hidden_sizes, n_out, n_domains, n_domain_hidden_layer_size, a_function=T.tanh):
        self.rng=rng,
        self.hidden_layer_sizes = n_hidden_sizes
        self.input=input,
        self.n_in=n_in,
        #get the size of the last hidden layer this is the size of the input for softmax output layer
        self.n_out=n_hidden_sizes[len(n_hidden_sizes)-1],
        self.hidden_layers = []
        self.domain_hidden_layers = []
        self.logistic_layers = []
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden_sizes: list
        :param n_hidden: list of number of hidden units at each hidden layer

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie

        """

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function


        #Create hidden layers for the mlp

        for i_hidden_layer, i_hidden_layer_size in enumerate(n_hidden_sizes):
            if i_hidden_layer > 0:
                n_in = n_hidden_sizes[i_hidden_layer-1]
                input = self.hidden_layers[i_hidden_layer - 1].output

            hidden_layer = HiddenLayer(rng=rng,
                                        input=input,
                                        n_in=n_in,
                                        n_out=i_hidden_layer_size,
                                        activation=a_function)
            self.hidden_layers.append(hidden_layer)

        for i in range(n_domains):
            domain_hidden_layer = HiddenLayer(rng=rng,
                                        input=hidden_layer.output,
                                        n_in=n_hidden_sizes[len(n_hidden_sizes) - 1],
                                        n_out=n_domain_hidden_layer_size,
                                        activation=a_function)
            self.domain_hidden_layers.append(domain_hidden_layer)
        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        for domain_hidden_layer in self.domain_hidden_layers:
            logRegressionLayer = LogisticRegression(
                input=domain_hidden_layer.output,
                n_in=domain_hidden_layer.n_out,
                n_out=n_out
            )
            self.logistic_layers.append(logRegressionLayer)


        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        l1_weight = shared(0)
        domain_l1_layer = shared(0)

        for hl in self.hidden_layers:
            l1_weight += abs(hl.W).sum()
        for domain_hidden_layer in self.domain_hidden_layers:
            domain_l1_layer += abs(domain_hidden_layer.W).sum()

        self.L1 = (
            l1_weight + domain_l1_layer
        )

        for logRegressionLayer in self.logistic_layers:
            l1_weight += abs(logRegressionLayer.W).sum()



        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        l2_weight = shared(0)
        domain_l2_weight = shared(0)

        for hl in self.hidden_layers:
            l2_weight += (hl.W ** 2).sum()

        for domain_hidden_layer in self.domain_hidden_layers:
            domain_l2_weight += (domain_hidden_layer.W ** 2).sum()

        self.L2_sqr = (
            l2_weight + domain_l2_weight
        )
        for logRegressionLayer in self.logistic_layers:
            l2_weight += (logRegressionLayer.W ** 2).sum()

        print(self.L2_sqr)
        print(type(self.L2_sqr))



        # the parameters of the model are the parameters of the two layer it is
        # made out of
        hidden_layer_params = []
        domain_hidden_layer_params = []

        for hl in self.hidden_layers:
            hidden_layer_params += hl.params

        for hidden_layer in self.domain_hidden_layers:
            domain_hidden_layer_params += hidden_layer.params

        self.params = hidden_layer_params + domain_hidden_layer_params

        for logRegressionLayer in self.logistic_layers:
            self.params += logRegressionLayer.params
    def __init__(self, input=None, label=None,\
                 n_ins=2, hidden_layer_sizes=[], n_outs=2,\
                 rng=None, W=None, b=None):
        '''

        :param input:前两个参数,最好不要用,只有在FIT的时候才需要数据,
        :param label:
        :param n_ins:
        :param hidden_layer_sizes:
        :param n_outs:
        :param rng:
        :param W:
        :param b:
        '''

        self.x = input
        self.y = label

        self.sigmoid_layers = []
        #这个是隐层数
        self.hidden_n_layers = len(
            hidden_layer_sizes)  # = len(self.rbm_layers)
        # print("hidden_n_layers=", self.hidden_n_layers)
        if rng is None:
            rng = numpy.random.RandomState(1234)

        assert self.hidden_n_layers >= 0

        # construct multi-layer
        # layer_input=None
        for i in range(self.hidden_n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(
                # input=layer_input,
                n_in=input_size,
                n_out=hidden_layer_sizes[i],
                rng=rng,
                W=W,
                b=b,
                activation=tanh)
            self.sigmoid_layers.append(sigmoid_layer)

        # 添加输出层
        input_size = None
        if self.hidden_n_layers == 0:
            input_size = n_ins
        else:
            input_size = hidden_layer_sizes[-1]

        self.log_layer = LogisticRegression(  #input=self.sigmoid_layers[-1].sample_h_given_v(),
            label=self.y,
            n_in=input_size,
            n_out=n_outs,
            W=W,
            b=b,
            outputMap=
            "sigmoid"  #"softmax" #"sigmoid" #"tanh" #"identity" #"sigmoid"
        )
Example #38
0
def evaluate_lenet5(
        learning_rate=0.1,
        n_epochs=200,
        dataset='/Users/yigenliang/PycharmProjects/theano/assets/mnist.pkl.gz',
        nkerns=[20, 50],
        batch_size=500):
    """
 learning_rate:学习速率,随机梯度前的系数。
 n_epochs训练步数,每一步都会遍历所有batch,即所有样本
 batch_size,这里设置为500,即每遍历完500个样本,才计算梯度并更新参数
 nkerns=[20, 50],每一个LeNetConvPoolLayer卷积核的个数,第一个LeNetConvPoolLayer有
 20个卷积核,第二个有50个
    """

    rng = numpy.random.RandomState(23455)

    #加载数据
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # 计算batch的个数
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    #定义几个变量,index表示batch下标,x表示输入的训练数据,y对应其标签
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    #我们加载进来的batch大小的数据是(batch_size, 28 * 28),但是LeNetConvPoolLayer的输入是四维的,所以要reshape
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # layer0即第一个LeNetConvPoolLayer层
    #输入的单张图片(28,28),经过conv得到(28-5+1 , 28-5+1) = (24, 24),
    #经过maxpooling得到(24/2, 24/2) = (12, 12)
    #因为每个batch有batch_size张图,第一个LeNetConvPoolLayer层有nkerns[0]个卷积核,
    #故layer0输出为(batch_size, nkerns[0], 12, 12)
    layer0 = ConvPoolLayer(rng,
                           input=layer0_input,
                           image_shape=(batch_size, 1, 28, 28),
                           filter_shape=(nkerns[0], 1, 5, 5),
                           poolsize=(2, 2))

    #layer1即第二个LeNetConvPoolLayer层
    #输入是layer0的输出,每张特征图为(12,12),经过conv得到(12-5+1, 12-5+1) = (8, 8),
    #经过maxpooling得到(8/2, 8/2) = (4, 4)
    #因为每个batch有batch_size张图(特征图),第二个LeNetConvPoolLayer层有nkerns[1]个卷积核
    #,故layer1输出为(batch_size, nkerns[1], 4, 4)
    layer1 = ConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12,
                     12),  #输入nkerns[0]张特征图,即layer0输出nkerns[0]张特征图
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2))

    #前面定义好了两个LeNetConvPoolLayer(layer0和layer1),layer1后面接layer2,这是一个全连接层,相当于MLP里面的隐含层
    #故可以用MLP中定义的HiddenLayer来初始化layer2,layer2的输入是二维的(batch_size, num_pixels) ,
    #故要将上层中同一张图经不同卷积核卷积出来的特征图合并为一维向量,
    #也就是将layer1的输出(batch_size, nkerns[1], 4, 4)flatten为(batch_size, nkerns[1]*4*4)=(500,800),作为layer2的输入。
    #(500,800)表示有500个样本,每一行代表一个样本。layer2的输出大小是(batch_size,n_out)=(500,500)
    layer2_input = layer1.output.flatten(2)
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    #最后一层layer3是分类层,用的是逻辑回归中定义的LogisticRegression,
    #layer3的输入是layer2的输出(500,500),layer3的输出就是(batch_size,n_out)=(500,10)
    layer3 = SoftMax(input=layer2.output, n_in=500, n_out=10)

    #代价函数NLL
    cost = layer3.negative_log_likelihood(y)

    # test_model计算测试误差,x、y根据给定的index具体化,然后调用layer3,
    #layer3又会逐层地调用layer2、layer1、layer0,故test_model其实就是整个CNN结构,
    #test_model的输入是x、y,输出是layer3.errors(y)的输出,即误差。
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })
    #validate_model,验证模型,分析同上。
    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    #下面是train_model,涉及到优化算法即SGD,需要计算梯度、更新参数
    #参数集
    params = layer3.params + layer2.params + layer1.params + layer0.params

    #对各个参数的梯度
    grads = T.grad(cost, params)

    #因为参数太多,在updates规则里面一个一个具体地写出来是很麻烦的,所以下面用了一个for..in..,自动生成规则对(param_i, param_i - learning_rate * grad_i)
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    #train_model,代码分析同test_model。train_model里比test_model、validation_model多出updates规则
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    #   开始训练  #
    ###############
    print '... training'
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995

    validation_frequency = min(n_train_batches, patience / 2)
    #这样设置validation_frequency可以保证每一次epoch都会在验证集上测试。

    best_validation_loss = numpy.inf  #最好的验证集上的loss,最好即最小
    best_iter = 0  #最好的迭代次数,以batch为单位。比如best_iter=10000,说明在训练完第10000个batch时,达到best_validation_loss
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    #下面就是训练过程了,while循环控制的时步数epoch,一个epoch会遍历所有的batch,即所有的图片。
    #for循环是遍历一个个batch,一次一个batch地训练。for循环体里会用train_model(minibatch_index)去训练模型,
    #train_model里面的updatas会更新各个参数。
    #for循环里面会累加训练过的batch数iter,当iter是validation_frequency倍数时则会在验证集上测试,
    #如果验证集的损失this_validation_loss小于之前最佳的损失best_validation_loss,
    #则更新best_validation_loss和best_iter,同时在testset上测试。
    #如果验证集的损失this_validation_loss小于best_validation_loss*improvement_threshold时则更新patience。
    #当达到最大步数n_epoch时,或者patience<iter时,结束训练
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)
            #cost_ij 没什么用,后面都没有用到,只是为了调用train_model,而train_model有返回值
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                if this_validation_loss < best_validation_loss:


                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained at iteration %i, '
        'with test performance %f %%' %
        (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
def evaluate_lenet5(topo, learning_rate=0.005, n_epochs=500, datasetName='mnist.pkl.gz',
                    batch_size=4242, stateIn = None, stateOut = None):

    rng = numpy.random.RandomState(23455)
    theano_rng = RandomStreams(numpy.random.randint(2 ** 30))

    #Original
    #datasets = load_data(dataset)
    #n_out = 10

    datasets = Preprocessing.load_pictures()
    # pickle.dump(datasets, open( datasetName, "wb" ) ) #Attention y is wrong
    # print("Saveing the pickeled data-set")

    #Loading the pickled images
    #print("Loading the pickels data-set " + str(datasetName))
    #datasets = pickle.load(open(datasetName, "r"))

    n_out = 6
    batch_size = 10
    print("       Learning rate " + str(learning_rate))


    # Images for face recognition
    #train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[0]
    test_set_x, test_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'
    print 'Number of Kernels' + str(topo.nkerns)


    in_2 = 14      #Input in second layer (layer1)


    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, topo.ishape[0], topo.ishape[1]))

    # Using presistent state from last run
    w0 = w1 = b0 = b1 = wHidden = bHidden = wLogReg = bLogReg = None
    if stateIn is not None:
        print("  Loading previous state ...")
        state = pickle.load(open(stateIn, "r"))
        convValues = state.convValues
        w0 = convValues[0][0]
        b0 = convValues[0][1]
        w1 = convValues[1][0]
        b1 = convValues[1][1]
        hiddenVals = state.hiddenValues
        wHidden = hiddenVals[0]
        bHidden = hiddenVals[1]
        logRegValues = state.logRegValues
        wLogReg = logRegValues[0]
        bLogReg = logRegValues[1]
        print("Hallo Gallo")

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                                image_shape=(batch_size, 1, topo.ishape[0],  topo.ishape[0]),
                                filter_shape=(topo.nkerns[0], 1, topo.filter_1, topo.filter_1),
                                poolsize=(topo.pool_1, topo.pool_1), wOld=w0, bOld=b0)

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
                                image_shape=(batch_size, topo.nkerns[0], topo.in_2, topo.in_2),
                                filter_shape=(topo.nkerns[1], topo.nkerns[0], topo.filter_2, topo.filter_2),
                                poolsize=(topo.pool_2, topo.pool_2), wOld=w1, bOld=b1)

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # Evt. some drop out for the fully connected layer
    # Achtung p=1 entspricht keinem Dropout.
    # layer2_input = theano_rng.binomial(size=layer2_input.shape, n=1, p=1 - 0.02) * layer2_input
    # paper_6 no dropout
    # paper_14 again 0.02 dropout
    # paper_15 again no dropout

    layer2 = HiddenLayer(rng, input=layer2_input, n_in=topo.nkerns[1] * topo.hidden_input,
                         n_out=topo.numLogisticInput, activation=T.tanh, Wold = wHidden, bOld = bHidden)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=topo.numLogisticInput, n_out=n_out, Wold = wLogReg, bOld=bLogReg )

    # Some regularisation (not for the conv-Kernels)
    L2_sqr = (layer2.W ** 2).sum() + (layer3.W ** 2).sum()

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y) + 0.001 * L2_sqr
    # paper7
    # paper9 back to 0.001 again
    # paper10 no reg. 
    # paper12 back to 0.001 again

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([index], layer3.errors(y),
                                 givens={
                                     x: test_set_x[index * batch_size: (index + 1) * batch_size],
                                     y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function([index], layer3.errors(y),
                                     givens={
                                         x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                                         y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))



    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000 # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    epoch_fraction = 0.0
    while (epoch < n_epochs) and (not done_looping):
        # New epoch the training set is disturbed again
        print("  Starting new training epoch")
        print("  Manipulating the training set")
        train_set_x, train_set_y = Preprocessing.giveMeNewTraining()
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        n_train_batches /= batch_size
        validation_frequency = min(n_train_batches, patience / 2)
        print("  Compiling new function")
        learning_rate *= 0.993 #See Paper from Cican
        train_model = theano.function([index], cost, updates=updates,
                                      givens={
                                          x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                          y: train_set_y[index * batch_size: (index + 1) * batch_size]})
        print("  Finished compiling the training set")

        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches): #Alle einmal anfassen
            iter = (epoch - 1) * n_train_batches + minibatch_index
            epoch_fraction +=  1.0 / float(n_train_batches)
            if iter % 100 == 0:
                print 'training @ iter = ', iter, ' epoch_fraction ', epoch_fraction
            cost_ij = train_model(minibatch_index)
            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                # test it on the test set
                test_start = time.clock();
                test_losses = [test_model(i) for i in xrange(n_test_batches)]
                train_costs = [train_model(i) for i in xrange(n_test_batches)]
                dt = time.clock() - test_start
                print'Testing %i faces in %f msec image / sec  %f', batch_size * n_test_batches, dt, dt/(n_test_batches * batch_size)
                test_score = numpy.mean(test_losses)
                train_cost = numpy.mean(train_costs)
                print('%i, %f, %f, %f, %f, 0.424242' % (epoch,  this_validation_loss * 100.,test_score * 100., learning_rate, train_cost))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # # test it on the test set
                    # test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    # test_score = numpy.mean(test_losses)
                    # print(('     epoch %i, minibatch %i/%i, test error of best '
                    #        'model %f %%') %
                    #       (epoch, minibatch_index + 1, n_train_batches,
                    #        test_score * 100.))

                # if (this_validation_loss < 0.02):
                #     learning_rate /= 2
                #     print("Decreased learning rate due to low xval error to " + str(learning_rate))


            if patience <= iter:
                print("--------- Finished Looping ----- earlier ")
                done_looping = True
                break

    end_time = time.clock()
    print('----------  Optimization complete -------------------------')
    print('Res: ', str(topo.nkerns))
    print('Res: ', learning_rate)
    print('Res: Best validation score of %f %% obtained at iteration %i,' \
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('Res: The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
    # Oliver
    if not os.path.isdir("conv_images"):
        os.makedirs("conv_images")
        os.chdir("conv_images")

    # d = layer0.W.get_value() #e.g.  (20, 1, 5, 5) number of filter, num of incomming filters, dim filter
    # for i in range(0, numpy.shape(d)[0]):
    #     dd = d[i][0]
    #     rescaled = (255.0 / dd.max() * (dd - dd.min())).astype(numpy.uint8)
    #     img = Image.fromarray(rescaled)
    #     img.save('filter_l0' + str(i) + '.png')
    #
    # d = layer1.W.get_value() #e.g.  (20, 1, 5, 5) number of filter, num of incomming filters, dim filter
    # for i in range(0, numpy.shape(d)[0]):
    #     dd = d[i][0]
    #     rescaled = (255.0 / dd.max() * (dd - dd.min())).astype(numpy.uint8)
    #     img = Image.fromarray(rescaled)
    #     img.save('filter_l1' + str(i) + '.png')

    state = LeNet5State(topology=topo,
                        convValues = [layer0.getParametersAsValues(), layer1.getParametersAsValues()],
                        hiddenValues = layer2.getParametersAsValues(),
                        logRegValues = layer3.getParametersAsValues())
    print
    if stateOut is not None:
        pickle.dump(state, open(stateOut, 'wb') ) #Attention y is wrong
        print("Saved the pickeled data-set")

    return learning_rate