예제 #1
0
    def _build_layers(self, whiten):
        # decide if or not to whiten data
        if whiten:
            pca_forward = self.v[:, :self.retain] / self.stds[:self.retain]
            pca_backward = (self.v[:, :self.retain] *
                            self.stds[:self.retain]).T
        else:
            pca_forward = self.v[:, :self.retain]
            pca_backward = pca_forward.T

        # build transforming layers
        pca_forward_w = theano.shared(value=pca_forward,
                                      name='pca_fwd',
                                      borrow=True)
        pca_forward_bvis = theano.shared(value=self.mean,
                                         name='pca_fwd_bvis',
                                         borrow=True)
        self.forward_layer = NeuralizedPCALayer(n_in=self.ndim,
                                                n_out=self.retain,
                                                init_w=pca_forward_w,
                                                init_bvis=pca_forward_bvis)

        pca_backward_w = theano.shared(value=pca_backward,
                                       name='pca_bkwd',
                                       borrow=True)
        pca_backward_bvis = theano.shared(value=self.mean,
                                          name='pca_bkwd_bvis',
                                          borrow=True)
        self.backward_layer = LinearLayer(n_in=self.retain,
                                          n_out=self.ndim,
                                          init_w=pca_backward_w,
                                          init_b=pca_backward_bvis)
        self.outdim = self.retain
예제 #2
0
    def buildForwardGraph(self, batch_size, discrimivative=False):
        """

        :param batch_size: Minibatch Size. Currently unused. Using None.
        :param discrimivative: True for discriminative pretraining (Creates a graph with zero hidden layers). Default \
        value: False (Creates a graph with specified hidden layers)
        """
        with tf.variable_scope('forward_variables', reuse=False):
            self.input = tf.placeholder(tf.float32, (None, self.input_dim),
                                        'input_nodes')
            self.output = tf.placeholder(tf.float32, (None, self.output_dim),
                                         'output_nodes')
            inpt = self.input
            if not discrimivative:
                inpt = self.__buildFullGraph__()
                self.layers.append(
                    LinearLayer(
                        self.layer_dims[-2], self.layer_dims[-1], inpt,
                        str(len(self.layer_dims) - 2) + 'layerNet_output'))
            else:
                self.layers.append(
                    LinearLayer(self.layer_dims[0], self.layer_dims[-1], inpt,
                                '0layerNet_output'))
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            self.step_incr = tf.assign_add(self.global_step, 1)
예제 #3
0
def test_draw_weight():
    test_model = LinearLayer(3072, 3)
    test_weight2 = numpy.ones((3072, 3))
    test_weight2[:1024, 0] = 255
    test_weight2[1025:2048, 1] = 255
    test_weight2[2049:3072, 2] = 255
    test_model.w.set_value(test_weight2.astype(theano.config.floatX))
    test_model.draw_weight(patch_shape=(32,32,3))
예제 #4
0
def test_draw_weight():
    test_model = LinearLayer(3072, 3)
    test_weight2 = numpy.ones((3072, 3))
    test_weight2[:1024, 0] = 255
    test_weight2[1025:2048, 1] = 255
    test_weight2[2049:3072, 2] = 255
    test_model.w.set_value(test_weight2.astype(theano.config.floatX))
    test_model.draw_weight(patch_shape=(32, 32, 3))
예제 #5
0
    def _build_layers(self, whiten):        
        # decide if or not to whiten data
        if whiten:
            pca_forward = self.v[:, :self.retain] / self.stds[:self.retain]
            pca_backward = (self.v[:, :self.retain] * self.stds[:self.retain]).T
        else:
            pca_forward = self.v[:, :self.retain]
            pca_backward = pca_forward.T

        # build transforming layers
        pca_forward_w = theano.shared(
            value=pca_forward, name='pca_fwd', borrow=True
        )
        pca_forward_bvis = theano.shared(
            value = self.mean, name='pca_fwd_bvis', borrow=True
        )
        self.forward_layer = NeuralizedPCALayer(
            n_in=self.ndim, n_out=self.retain,
            init_w=pca_forward_w, init_bvis=pca_forward_bvis
        )

        pca_backward_w = theano.shared(
            value=pca_backward, name='pca_bkwd', borrow=True
        )
        pca_backward_bvis = theano.shared(
            value=self.mean, name='pca_bkwd_bvis', borrow=True
        )
        self.backward_layer = LinearLayer(
            n_in=self.retain, n_out=self.ndim,
            init_w=pca_backward_w, init_b=pca_backward_bvis
        )
        self.outdim = self.retain
예제 #6
0
    def addLayer(self, idx):
        """
        :param idx: index of the layer(in the list passed to initialize the network) to be added. Note 0 is the input\
         layers
        :return: return a list of newly created variables that has to initialized
        """
        with tf.variable_scope('forward_variables', reuse=False):
            self.layers = self.layers[:-1]
            print 'layers len', len(self.layers)
            if len(self.layers) == 0:
                inpt = self.input
            else:
                inpt = self.layers[-1].activations
            self.layers.append(
                HiddenLayer(self.layer_dims[idx - 1], self.layer_dims[idx],
                            inpt, 'layer' + str(idx)))
            self.layers.append(
                LinearLayer(self.layer_dims[-2], self.layer_dims[-1],
                            self.layers[-1].activations,
                            str(idx) + 'layerNet_output'))
            self.__buildLossGraph__()

        params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                   scope='forward_variables/layer' + str(idx))
        params += tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                    scope='forward_variables_' + str(idx))
        params += tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                    scope='forward_variables/' + str(idx) +
                                    'layerNet_output')
        print 'params are ', params
        self.buildEvalGraph()
        self.buildSummaryGraph()
        return params
예제 #7
0
 def decoder(self):
     if self.vistype == 'binary':
         return SigmoidLayer(self.n_hid,
                             self.n_in,
                             varin=self.encoder().output(),
                             init_w=self.wT,
                             init_b=self.bT)
     elif self.vistype == 'real':
         return LinearLayer(self.n_hid,
                            self.n_in,
                            varin=self.encoder().output(),
                            init_w=self.wT,
                            init_b=self.bT)
예제 #8
0
 def encoder(self):
     if self.hidtype == 'binary':
         return SigmoidLayer(self.n_in,
                             self.n_hid,
                             varin=self.varin,
                             init_w=self.w,
                             init_b=self.b)
     elif self.hidtype == 'real':
         return LinearLayer(self.n_in,
                            self.n_hid,
                            varin=self.varin,
                            init_w=self.w,
                            init_b=self.b)
예제 #9
0
def main():
    """
    main function
    """
    num_range = 2
    # Load the dataset
    train_set, valid_set, _ = get_data()

    train_x, train_y = get_data_range(train_set, num_range=num_range)
    valid_x, valid_y = get_data_range(valid_set, num_range=num_range)
    print "size x: %s, y: %s" % (train_x.shape, train_y.shape)

    # n_classes = np.unique(train_y).size
    model = NeutraNetwork(
        [
            LinearLayer(64, Activation('relu')),
            # LinearLayer(64, Activation('relu')),
            LinearLayer(32, Activation('relu')),
            # LinearLayer(32, Activation('relu')),
            LinearLayer(num_range, Activation('sigmoid')),
        ],
        MSECostLayer())

    # model.grad_check(valid_x[:, : 1000], valid_y[:1000])

    # Train neural network
    print 'Training neural network'
    model.train(train_x,
                train_y,
                num_epochs=50,
                batch_size=32,
                learning_rate=0.1)

    # Evaluate on training data
    error = model.error(valid_x, valid_y)
    print 'valid error rate: %.4f' % error
예제 #10
0
    def _build_layers(self, whiten):
        # decide if or not to whiten data
        if whiten:
            zca_forward = numpy.dot(
                self.v[:, :self.retain] / self.stds[:self.retain],
                self.v[:, :self.retain].T)
            zca_backward = numpy.dot(self.v[:, :self.retain],
                                     (self.v[:, :self.retain] *
                                      self.stds[:self.retain]).T)
        else:
            zca_forward = numpy.dot(self.v[:, :self.retain],
                                    self.v[:, :self.retain].T)
            zca_backward = zca_forward

        # build transforming layers
        zca_forward_w = theano.shared(value=zca_forward,
                                      name='zca_fwd',
                                      borrow=True)
        zca_forward_bvis = theano.shared(value=self.mean,
                                         name='zca_fwd_bvis',
                                         borrow=True)
        self.forward_layer = NeuralizedPCALayer(n_in=self.ndim,
                                                n_out=self.ndim,
                                                init_w=zca_forward_w,
                                                init_bvis=zca_forward_bvis)

        zca_backward_w = theano.shared(value=zca_backward,
                                       name='zca_bkwd',
                                       borrow=True)
        zca_backward_bvis = theano.shared(value=self.mean,
                                          name='zca_bkwd_bvis',
                                          borrow=True)
        self.backward_layer = LinearLayer(n_in=self.ndim,
                                          n_out=self.ndim,
                                          init_w=zca_backward_w,
                                          init_b=zca_backward_bvis)
        self.outdim = self.ndim
예제 #11
0
def test_Dropout():
    npy_rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(123)
    data_x = theano.shared(
        100 * npy_rng.normal(0, 1, [1000, 50]).astype(theano.config.floatX))
    data_y = theano.shared(
        npy_rng.randint(0, 10, 1000))
    
    ae = ClassicalAutoencoder(
        50, 70, vistype='real', hidtype='binary', tie=True
    )
    sl = LinearLayer(50, 70) + LogisticRegression(70, 10)
    # sl.print_layer()
    lg = LogisticRegression(50, 10)
    # lg.print_layer()
    
    ae_recon = theano.function(
        [],
        ae.reconstruction(),
        givens={ae.varin: data_x}
    )
    sl_output = theano.function(
        [],
        sl.output(),
        givens={sl.varin: data_x}
    )
    lg_output = theano.function(
        [],
        lg.output(),
        givens={lg.varin: data_x}
    )

    recon_before_dropout = ae_recon()
    output_before_dropout = sl_output()
    lgoutput_before_dropout = lg_output()
    
    dropout_ae = Dropout(ae, [0.2, 0.5], theano_rng=theano_rng)
    dropout_sl = Dropout(sl, [0.7, 0.5], theano_rng=theano_rng)
    dropout_lg = Dropout(lg, [0.5], theano_rng=theano_rng)
    # dropout_ae.dropout_model.print_layer()
    # dropout_sl.dropout_model.print_layer()
    # dropout_lg.dropout_model.print_layer()
    
    ae_recon = theano.function(
        [],
        ae.reconstruction(),
        givens={ae.varin: data_x}
    )
    sl_output = theano.function(
        [],
        sl.output(),
        givens={sl.varin: data_x}
    )
    lg_output = theano.function(
        [],
        lg.output(),
        givens={lg.varin: data_x}
    )
    recon_after_dropout = ae_recon()
    output_after_dropout = sl_output()
    lgoutput_after_dropout = lg_output()
    
    assert numpy.allclose(recon_before_dropout, recon_after_dropout)
    assert numpy.allclose(output_before_dropout, output_after_dropout)
    assert numpy.allclose(lgoutput_before_dropout, lgoutput_after_dropout)
예제 #12
0
 def encoder(self):
     return LinearLayer(self.n_in,
                        self.n_hid,
                        varin=self.varin,
                        init_w=self.w,
                        init_b=self.b)
예제 #13
0
class PCA(object):
    """
    A theano based PCA capable of using GPU.
    """
    """
    considering to make PCA a layer object

    def __init__(self, n_in, n_out, varin=None):
        pca_forward_w = theano.shared(
            value=pca_forward, name='pca_fwd', borrow=True
        )
        pca_forward_bvis = theano.shared(
            value = self.mean, name='pca_fwd_bvis', borrow=True
        )
        self.forward_layer = NeuralizedPCALayer(
            n_in=self.ndim, n_out=self.retain,
            init_w=pca_forward_w, init_bvis=pca_forward_bvis
        )

        pca_backward_w = theano.shared(
            value=pca_backward, name='pca_bkwd', borrow=True
        )
        pca_backward_bvis = theano.shared(
            value=self.mean, name='pca_bkwd_bvis', borrow=True
        )
        self.backward_layer = LinearLayer(
            n_in=self.retain, n_out=self.ndim,
            init_w=pca_backward_w, init_b=pca_backward_bvis
        )
    """

    def fit(self, data, retain=None, verbose=True, whiten=False):
        """
        Part of the code is adapted from Roland Memisevic's code.

        fit() deals with small datasets, i.e., those datasets that can be
        loaded into memory at once. It establishes 2 LinearLayer objects:
        PCAForwardLayer and PCABackwardLayer. They define how the data is
        mapped after the PCA mapping is learned.
        """
        self.retain = retain
        assert isinstance(data, numpy.ndarray), \
               "data has to be a numpy ndarray."
        data = data.copy().astype(theano.config.floatX)
        ncases, self.ndim = data.shape
        
        # centralizing data
        """
        If you don\'t centralize the dataset, then you are still going to get
        perfect reconstruction from the forward/backward mapping matrices, but
        1. the eigenvalues you get will no longer match the variance of each
           principle components, 
        2. the \'principle component\' you get will no longer match the
           projection of largest variance, and
        3. the output will not be centered at the initial data center, neither
           at the origin too. However, the shape of the data scatter would
           still remain intact.
        It just rotates the data by an unwanted angle and shifts the data by an
        unexpected vector.
        """
        if verbose:
            print "Centralizing data... ",
        data_variable = T.matrix('data_variable')
        np_ncases = numpy.array([ncases]).astype(theano.config.floatX)
        fun_partmean = theano.function(
            inputs=[data_variable],
            outputs=T.sum(data_variable / np_ncases, axis=0)
        )
        self.mean = numpy.zeros(self.ndim, dtype=theano.config.floatX)
        self.mean += fun_partmean(data)
        data -= self.mean
        if verbose:     print "Done."
        
        # compute convariance matrix
        if verbose:
            print "Computing covariance... ",
        covmat = theano.shared(
            value=numpy.zeros((self.ndim, self.ndim),
                              dtype=theano.config.floatX),
            name='covmat',
            borrow=True
        )
        fun_update_covmat = theano.function(
            inputs=[data_variable],
            outputs=[],
            updates={covmat: covmat + \
                             T.dot(data_variable.T, data_variable) / np_ncases}
        )
        fun_update_covmat(data)
        self.covmat = covmat.get_value()
        if verbose:     print "Done."

        # compute eigenvalue and eigenvector
        if verbose:     print "Eigen-decomposition...",; sys.stdout.flush()
        # u should be real valued vector, which stands for the variace of data
        # at each PC. v should be a real valued orthogonal matrix.
        u, v_unsorted = numpy.linalg.eigh(self.covmat)
        self.v = v_unsorted[:, numpy.argsort(u)[::-1]]
        u.sort()
        u = u[::-1]
        # throw away some eigenvalues for numerical stability
        self.stds = numpy.sqrt(u[u > 0.])
        self.variance_fracs = (self.stds ** 2).cumsum() / (self.stds ** 2).sum()
        self.maxPCs = self.stds.shape[0]
        if verbose:     print "Done. Maximum stable PCs: %d" % self.maxPCs 
        
        # decide number of principle components.
        error_info = "Wrong \"retain\" value. Should be " + \
                     "a real number within the interval of (0, 1), " + \
                     "an integer in (0, maxPCs], None, or \'mle\'."
        if self.retain == None:
            self.retain = self.maxPCs
        elif self.retain == 'mle':
            raise NotImplementedError("Adaptive dimension matching," + \
                                      "not implemented yet...")
        elif isinstance(self.retain, int):
            assert (self.retain > 0 and self.retain <= self.maxPCs), error_info
        elif isinstance(self.retain, float):
            assert (self.retain > 0 and self.retain < 1), error_info
            self.retain = numpy.sum(self.variance_fracs < self.retain) + 1
        if verbose:
            print "Number of selected PCs: %d, ratio of retained variance: %f"%\
                (self.retain, self.variance_fracs[self.retain-1])
        self._build_layers(whiten)
   
    def fit_partwise(self, data_genfun, data_resetfun, ncases, ndim,
                     retain=None, verbose=True, whiten=False):
        """
        fit_partwise() is for computing PCA for large datasets. the data part
        is generated by a generator, and at each iteration the generated data
        should be in the form of a single numpy.ndarray, with 2-d structure. 
        The method establishes 2 LinearLayer objects: PCAForwardLayer and
        PCABackwardLayer. They define how the data is mapped after the PCA
        mapping is learned.
        """
        self.retain = retain
        self.ndim = ndim

        # centralizing data
        if verbose:
            print "Centralizing data..."
        data_variable = T.matrix('data_variable')
        np_ncases = numpy.array([ncases]).astype(theano.config.floatX)
        fun_partmean = theano.function(
            inputs=[data_variable],
            outputs=T.sum(data_variable / np_ncases, axis=0)
        )
        self.mean = numpy.zeros(self.ndim, dtype=theano.config.floatX)
        data_resetfun()
        data_generator = data_genfun()
        for data_part in data_generator:
            assert isinstance(data_part, numpy.ndarray), (
                "data_genfun has to be a generator function yielding "
                "numpy.ndarray.")
            data_part = data_part.astype(theano.config.floatX)
            _, self.ndim = data_part.shape
            self.mean += fun_partmean(data_part)
            if verbose:
                print ".",
                sys.stdout.flush()
        if verbose:     print "Done."
        
        # compute convariance matrix
        if verbose:
            print "Computing covariance..."
        covmat = theano.shared(
            value=numpy.zeros((self.ndim, self.ndim),
                              dtype=theano.config.floatX),
            name='covmat',
            borrow=True
        )
        fun_update_covmat = theano.function(
            inputs=[data_variable],
            outputs=[],
            updates={covmat: covmat + \
                             T.dot(data_variable.T, data_variable) / np_ncases}
        )
        data_resetfun()
        data_generator = data_genfun()
        for data_part in data_generator:
            data_part = data_part.astype(theano.config.floatX) - self.mean
            fun_update_covmat(data_part)
            if verbose:
                print ".",
                sys.stdout.flush()
        self.covmat = covmat.get_value()
        if verbose:     print "Done."

        # compute eigenvalue and eigenvector
        if verbose:     print "Eigen-decomposition...",; sys.stdout.flush()
        # u should be real valued vector, which stands for the variace of data
        # at each PC. v should be a real valued orthogonal matrix.
        u, v_unsorted = numpy.linalg.eigh(self.covmat)
        self.v = v_unsorted[:, numpy.argsort(u)[::-1]]
        u.sort()
        u = u[::-1]
        # throw away some eigenvalues for numerical stability
        self.stds = numpy.sqrt(u[u > 0.])
        self.variance_fracs = (self.stds ** 2).cumsum() / (self.stds ** 2).sum()
        self.maxPCs = self.stds.shape[0]
        if verbose:     print "Done. Maximum stable PCs: %d" % self.maxPCs 
        
        # decide number of principle components.
        error_info = "Wrong \"retain\" value. Should be " + \
                     "a real number within the interval of (0, 1), " + \
                     "an integer in (0, maxPCs], None, or \'mle\'."
        if self.retain == None:
            self.retain = self.maxPCs
        elif self.retain == 'mle':
            raise NotImplementedError("Adaptive dimension matching," + \
                                      "not implemented yet...")
        elif isinstance(self.retain, int):
            assert (self.retain > 0 and self.retain <= self.maxPCs), error_info
        elif isinstance(self.retain, float):
            assert (self.retain > 0 and self.retain < 1), error_info
            self.retain = numpy.sum(self.variance_fracs < self.retain) + 1
        if verbose:
            print "Number of selected PCs: %d, ratio of retained variance: %f"%\
                (self.retain, self.variance_fracs[self.retain-1])
        self._build_layers(whiten)
 
    def _build_layers(self, whiten):        
        # decide if or not to whiten data
        if whiten:
            pca_forward = self.v[:, :self.retain] / self.stds[:self.retain]
            pca_backward = (self.v[:, :self.retain] * self.stds[:self.retain]).T
        else:
            pca_forward = self.v[:, :self.retain]
            pca_backward = pca_forward.T

        # build transforming layers
        pca_forward_w = theano.shared(
            value=pca_forward, name='pca_fwd', borrow=True
        )
        pca_forward_bvis = theano.shared(
            value = self.mean, name='pca_fwd_bvis', borrow=True
        )
        self.forward_layer = NeuralizedPCALayer(
            n_in=self.ndim, n_out=self.retain,
            init_w=pca_forward_w, init_bvis=pca_forward_bvis
        )

        pca_backward_w = theano.shared(
            value=pca_backward, name='pca_bkwd', borrow=True
        )
        pca_backward_bvis = theano.shared(
            value=self.mean, name='pca_bkwd_bvis', borrow=True
        )
        self.backward_layer = LinearLayer(
            n_in=self.retain, n_out=self.ndim,
            init_w=pca_backward_w, init_b=pca_backward_bvis
        )
        self.outdim = self.retain

    def forward(self, data, batch_size=10000, verbose=True):
        """
        Maps the given data to PCA representation, in a batchwise manner.
        
        There is no need to do the batchwise mapping though, but this
        implementation is for the unloaded version in the future. That will
        allow us to do PCA mapping on arbitrarilly large dataset.
        
        Parameters
        ------------
        data : numpy.ndarray 
            Data to be mapped.

        
        Returns
        ------------
        numpy.ndarray object.
        """
        assert hasattr(self, 'forward_layer'), 'Please fit the model first.'
        data = data.astype(theano.config.floatX)
        ncases, ndim = data.shape
        assert ndim == self.ndim, \
            'Given data dimension doesn\'t match the learned model.'
        nbatches = (ncases + batch_size - 1) / batch_size
        map_function = theano.function(
            [self.forward_layer.varin],
            self.forward_layer.output()
        )
        if verbose:
            print "Transforming, %d dots to punch:" % nbatches,
        pcaed_data = []
        for bidx in range(nbatches):
            if verbose:
                print ".",
                sys.stdout.flush()
            start = bidx * batch_size
            end = min((bidx + 1) * batch_size, ncases)
            pcaed_data.append(map_function(data[start:end, :]))
        pcaed_data = numpy.concatenate(pcaed_data, axis=0)
        if verbose:     print "Done."
        return pcaed_data

    def backward(self, data, batch_size=10000, verbose=True):
        """
        The same to forward(), but in a reverse direction.
        
        data : numpy.ndarray 
            Data to be mapped.

        Returns
        ------------
        numpy.ndarray object.
        """
        assert hasattr(self, 'backward_layer'), 'Please fit the model first.'
        data = data.astype(theano.config.floatX)
        ncases, ndim = data.shape
        assert ndim == self.outdim, \
            'Given data dimension doesn\'t match the learned model.'
        nbatches = (ncases + batch_size - 1) / batch_size
        map_function = theano.function(
            [self.backward_layer.varin],
            self.backward_layer.output()
        )
        if verbose:
            print "Transforming, %d dots to punch:" % nbatches,
        recons_data = []
        for bidx in range(nbatches):
            if verbose:
                print ".",
                sys.stdout.flush()
            start = bidx * batch_size
            end = min((bidx + 1) * batch_size, ncases)
            recons_data.append(map_function(data[start:end, :]))
        recons_data = numpy.concatenate(recons_data, axis=0)
        if verbose:     print "Done."
        return recons_data

    def energy_dist(self,):
        """
        """
        assert hasattr(self, 'variance_fracs'), \
            "The model has not been fitted."
        return self.variance_fracs
예제 #14
0
for x in inputs:
    x /= 255
targets = []
for num in digits.target:
    baz = np.zeros(10)
    baz[num] = 1
    targets.append(baz)
targets = np.array(targets)
from sklearn.model_selection import train_test_split
inputs, xtest, targets, ytest = train_test_split(inputs,
                                                 targets,
                                                 test_size=0.2)

np.seterr(all='raise')
net = NeuralNetwork([
    LinearLayer(inputSize=64, outputSize=16),
    LeakyRelu(),
    LinearLayer(inputSize=16, outputSize=10),
    LeakyRelu(),
    Softmax()
])

train(net,
      inputs,
      targets,
      loss=CrossEntropy(),
      num_epochs=600,
      optimizer=MBGD(learningRate=0.0001),
      showGraph=True)

net.serialize("serializedMNIST.json")
예제 #15
0
class PCA(object):
    """
    A theano based PCA capable of using GPU.
    """
    """
    considering to make PCA a layer object

    def __init__(self, n_in, n_out, varin=None):
        pca_forward_w = theano.shared(
            value=pca_forward, name='pca_fwd', borrow=True
        )
        pca_forward_bvis = theano.shared(
            value = self.mean, name='pca_fwd_bvis', borrow=True
        )
        self.forward_layer = NeuralizedPCALayer(
            n_in=self.ndim, n_out=self.retain,
            init_w=pca_forward_w, init_bvis=pca_forward_bvis
        )

        pca_backward_w = theano.shared(
            value=pca_backward, name='pca_bkwd', borrow=True
        )
        pca_backward_bvis = theano.shared(
            value=self.mean, name='pca_bkwd_bvis', borrow=True
        )
        self.backward_layer = LinearLayer(
            n_in=self.retain, n_out=self.ndim,
            init_w=pca_backward_w, init_b=pca_backward_bvis
        )
    """
    def fit(self, data, retain=None, verbose=True, whiten=False):
        """
        Part of the code is adapted from Roland Memisevic's code.

        fit() deals with small datasets, i.e., those datasets that can be
        loaded into memory at once. It establishes 2 LinearLayer objects:
        PCAForwardLayer and PCABackwardLayer. They define how the data is
        mapped after the PCA mapping is learned.
        """
        self.retain = retain
        assert isinstance(data, numpy.ndarray), \
               "data has to be a numpy ndarray."
        data = data.copy().astype(theano.config.floatX)
        ncases, self.ndim = data.shape

        # centralizing data
        """
        If you don\'t centralize the dataset, then you are still going to get
        perfect reconstruction from the forward/backward mapping matrices, but
        1. the eigenvalues you get will no longer match the variance of each
           principle components, 
        2. the \'principle component\' you get will no longer match the
           projection of largest variance, and
        3. the output will not be centered at the initial data center, neither
           at the origin too. However, the shape of the data scatter would
           still remain intact.
        It just rotates the data by an unwanted angle and shifts the data by an
        unexpected vector.
        """
        if verbose:
            print "Centralizing data... ",
        data_variable = T.matrix('data_variable')
        np_ncases = numpy.array([ncases]).astype(theano.config.floatX)
        fun_partmean = theano.function(inputs=[data_variable],
                                       outputs=T.sum(data_variable / np_ncases,
                                                     axis=0))
        self.mean = numpy.zeros(self.ndim, dtype=theano.config.floatX)
        self.mean += fun_partmean(data)
        data -= self.mean
        if verbose: print "Done."

        # compute convariance matrix
        if verbose:
            print "Computing covariance... ",
        covmat = theano.shared(value=numpy.zeros((self.ndim, self.ndim),
                                                 dtype=theano.config.floatX),
                               name='covmat',
                               borrow=True)
        fun_update_covmat = theano.function(
            inputs=[data_variable],
            outputs=[],
            updates={covmat: covmat + \
                             T.dot(data_variable.T, data_variable) / np_ncases}
        )
        fun_update_covmat(data)
        self.covmat = covmat.get_value()
        if verbose: print "Done."

        # compute eigenvalue and eigenvector
        if verbose:
            print "Eigen-decomposition...",
            sys.stdout.flush()
        # u should be real valued vector, which stands for the variace of data
        # at each PC. v should be a real valued orthogonal matrix.
        u, v_unsorted = numpy.linalg.eigh(self.covmat)
        self.v = v_unsorted[:, numpy.argsort(u)[::-1]]
        u.sort()
        u = u[::-1]
        # throw away some eigenvalues for numerical stability
        self.stds = numpy.sqrt(u[u > 0.])
        self.variance_fracs = (self.stds**2).cumsum() / (self.stds**2).sum()
        self.maxPCs = self.stds.shape[0]
        if verbose: print "Done. Maximum stable PCs: %d" % self.maxPCs

        # decide number of principle components.
        error_info = "Wrong \"retain\" value. Should be " + \
                     "a real number within the interval of (0, 1), " + \
                     "an integer in (0, maxPCs], None, or \'mle\'."
        if self.retain == None:
            self.retain = self.maxPCs
        elif self.retain == 'mle':
            raise NotImplementedError("Adaptive dimension matching," + \
                                      "not implemented yet...")
        elif isinstance(self.retain, int):
            assert (self.retain > 0 and self.retain <= self.maxPCs), error_info
        elif isinstance(self.retain, float):
            assert (self.retain > 0 and self.retain < 1), error_info
            self.retain = numpy.sum(self.variance_fracs < self.retain) + 1
        if verbose:
            print "Number of selected PCs: %d, ratio of retained variance: %f"%\
                (self.retain, self.variance_fracs[self.retain-1])
        self._build_layers(whiten)

    def fit_partwise(self,
                     data_genfun,
                     data_resetfun,
                     ncases,
                     ndim,
                     retain=None,
                     verbose=True,
                     whiten=False):
        """
        fit_partwise() is for computing PCA for large datasets. the data part
        is generated by a generator, and at each iteration the generated data
        should be in the form of a single numpy.ndarray, with 2-d structure. 
        The method establishes 2 LinearLayer objects: PCAForwardLayer and
        PCABackwardLayer. They define how the data is mapped after the PCA
        mapping is learned.
        """
        self.retain = retain
        self.ndim = ndim

        # centralizing data
        if verbose:
            print "Centralizing data..."
        data_variable = T.matrix('data_variable')
        np_ncases = numpy.array([ncases]).astype(theano.config.floatX)
        fun_partmean = theano.function(inputs=[data_variable],
                                       outputs=T.sum(data_variable / np_ncases,
                                                     axis=0))
        self.mean = numpy.zeros(self.ndim, dtype=theano.config.floatX)
        data_resetfun()
        data_generator = data_genfun()
        for data_part in data_generator:
            assert isinstance(data_part, numpy.ndarray), (
                "data_genfun has to be a generator function yielding "
                "numpy.ndarray.")
            data_part = data_part.astype(theano.config.floatX)
            _, self.ndim = data_part.shape
            self.mean += fun_partmean(data_part)
            if verbose:
                print ".",
                sys.stdout.flush()
        if verbose: print "Done."

        # compute convariance matrix
        if verbose:
            print "Computing covariance..."
        covmat = theano.shared(value=numpy.zeros((self.ndim, self.ndim),
                                                 dtype=theano.config.floatX),
                               name='covmat',
                               borrow=True)
        fun_update_covmat = theano.function(
            inputs=[data_variable],
            outputs=[],
            updates={covmat: covmat + \
                             T.dot(data_variable.T, data_variable) / np_ncases}
        )
        data_resetfun()
        data_generator = data_genfun()
        for data_part in data_generator:
            data_part = data_part.astype(theano.config.floatX) - self.mean
            fun_update_covmat(data_part)
            if verbose:
                print ".",
                sys.stdout.flush()
        self.covmat = covmat.get_value()
        if verbose: print "Done."

        # compute eigenvalue and eigenvector
        if verbose:
            print "Eigen-decomposition...",
            sys.stdout.flush()
        # u should be real valued vector, which stands for the variace of data
        # at each PC. v should be a real valued orthogonal matrix.
        u, v_unsorted = numpy.linalg.eigh(self.covmat)
        self.v = v_unsorted[:, numpy.argsort(u)[::-1]]
        u.sort()
        u = u[::-1]
        # throw away some eigenvalues for numerical stability
        self.stds = numpy.sqrt(u[u > 0.])
        self.variance_fracs = (self.stds**2).cumsum() / (self.stds**2).sum()
        self.maxPCs = self.stds.shape[0]
        if verbose: print "Done. Maximum stable PCs: %d" % self.maxPCs

        # decide number of principle components.
        error_info = "Wrong \"retain\" value. Should be " + \
                     "a real number within the interval of (0, 1), " + \
                     "an integer in (0, maxPCs], None, or \'mle\'."
        if self.retain == None:
            self.retain = self.maxPCs
        elif self.retain == 'mle':
            raise NotImplementedError("Adaptive dimension matching," + \
                                      "not implemented yet...")
        elif isinstance(self.retain, int):
            assert (self.retain > 0 and self.retain <= self.maxPCs), error_info
        elif isinstance(self.retain, float):
            assert (self.retain > 0 and self.retain < 1), error_info
            self.retain = numpy.sum(self.variance_fracs < self.retain) + 1
        if verbose:
            print "Number of selected PCs: %d, ratio of retained variance: %f"%\
                (self.retain, self.variance_fracs[self.retain-1])
        self._build_layers(whiten)

    def _build_layers(self, whiten):
        # decide if or not to whiten data
        if whiten:
            pca_forward = self.v[:, :self.retain] / self.stds[:self.retain]
            pca_backward = (self.v[:, :self.retain] *
                            self.stds[:self.retain]).T
        else:
            pca_forward = self.v[:, :self.retain]
            pca_backward = pca_forward.T

        # build transforming layers
        pca_forward_w = theano.shared(value=pca_forward,
                                      name='pca_fwd',
                                      borrow=True)
        pca_forward_bvis = theano.shared(value=self.mean,
                                         name='pca_fwd_bvis',
                                         borrow=True)
        self.forward_layer = NeuralizedPCALayer(n_in=self.ndim,
                                                n_out=self.retain,
                                                init_w=pca_forward_w,
                                                init_bvis=pca_forward_bvis)

        pca_backward_w = theano.shared(value=pca_backward,
                                       name='pca_bkwd',
                                       borrow=True)
        pca_backward_bvis = theano.shared(value=self.mean,
                                          name='pca_bkwd_bvis',
                                          borrow=True)
        self.backward_layer = LinearLayer(n_in=self.retain,
                                          n_out=self.ndim,
                                          init_w=pca_backward_w,
                                          init_b=pca_backward_bvis)
        self.outdim = self.retain

    def forward(self, data, batch_size=10000, verbose=True):
        """
        Maps the given data to PCA representation, in a batchwise manner.
        
        There is no need to do the batchwise mapping though, but this
        implementation is for the unloaded version in the future. That will
        allow us to do PCA mapping on arbitrarilly large dataset.
        
        Parameters
        ------------
        data : numpy.ndarray 
            Data to be mapped.

        
        Returns
        ------------
        numpy.ndarray object.
        """
        assert hasattr(self, 'forward_layer'), 'Please fit the model first.'
        data = data.astype(theano.config.floatX)
        ncases, ndim = data.shape
        assert ndim == self.ndim, \
            'Given data dimension doesn\'t match the learned model.'
        nbatches = (ncases + batch_size - 1) / batch_size
        map_function = theano.function([self.forward_layer.varin],
                                       self.forward_layer.output())
        if verbose:
            print "Transforming, %d dots to punch:" % nbatches,
        pcaed_data = []
        for bidx in range(nbatches):
            if verbose:
                print ".",
                sys.stdout.flush()
            start = bidx * batch_size
            end = min((bidx + 1) * batch_size, ncases)
            pcaed_data.append(map_function(data[start:end, :]))
        pcaed_data = numpy.concatenate(pcaed_data, axis=0)
        if verbose: print "Done."
        return pcaed_data

    def backward(self, data, batch_size=10000, verbose=True):
        """
        The same to forward(), but in a reverse direction.
        
        data : numpy.ndarray 
            Data to be mapped.

        Returns
        ------------
        numpy.ndarray object.
        """
        assert hasattr(self, 'backward_layer'), 'Please fit the model first.'
        data = data.astype(theano.config.floatX)
        ncases, ndim = data.shape
        assert ndim == self.outdim, \
            'Given data dimension doesn\'t match the learned model.'
        nbatches = (ncases + batch_size - 1) / batch_size
        map_function = theano.function([self.backward_layer.varin],
                                       self.backward_layer.output())
        if verbose:
            print "Transforming, %d dots to punch:" % nbatches,
        recons_data = []
        for bidx in range(nbatches):
            if verbose:
                print ".",
                sys.stdout.flush()
            start = bidx * batch_size
            end = min((bidx + 1) * batch_size, ncases)
            recons_data.append(map_function(data[start:end, :]))
        recons_data = numpy.concatenate(recons_data, axis=0)
        if verbose: print "Done."
        return recons_data

    def energy_dist(self, ):
        """
        """
        assert hasattr(self, 'variance_fracs'), \
            "The model has not been fitted."
        return self.variance_fracs
예제 #16
0
파일: xor.py 프로젝트: jptboy/NeuralNetwork
inputs = np.array([
    [0, 0],
    [1, 0],
    [0, 1],
    [1, 1]
])

targets = np.array([
    [1, 0],
    [0, 1],
    [0, 1],
    [1, 0]
])

net = NeuralNetwork([
    LinearLayer(inputSize=2, outputSize=2),
    LeakyRelu(),
    LinearLayer(inputSize=2, outputSize=2),
    Tanh()
])

train(net, inputs, targets, loss= MSE(), num_epochs=5000, optimizer=MBGD(learningRate=0.01), showGraph=True)

# net.loadParamsFromFile("/home/ayush/scratch/Net/aknet/serialized.json")

for x, y in zip(inputs, targets):
    predicted = net.forward(x)
    print(x, predicted, y)

net.serialize("serialized.json")