def _build_layers(self, whiten): # decide if or not to whiten data if whiten: pca_forward = self.v[:, :self.retain] / self.stds[:self.retain] pca_backward = (self.v[:, :self.retain] * self.stds[:self.retain]).T else: pca_forward = self.v[:, :self.retain] pca_backward = pca_forward.T # build transforming layers pca_forward_w = theano.shared(value=pca_forward, name='pca_fwd', borrow=True) pca_forward_bvis = theano.shared(value=self.mean, name='pca_fwd_bvis', borrow=True) self.forward_layer = NeuralizedPCALayer(n_in=self.ndim, n_out=self.retain, init_w=pca_forward_w, init_bvis=pca_forward_bvis) pca_backward_w = theano.shared(value=pca_backward, name='pca_bkwd', borrow=True) pca_backward_bvis = theano.shared(value=self.mean, name='pca_bkwd_bvis', borrow=True) self.backward_layer = LinearLayer(n_in=self.retain, n_out=self.ndim, init_w=pca_backward_w, init_b=pca_backward_bvis) self.outdim = self.retain
def buildForwardGraph(self, batch_size, discrimivative=False): """ :param batch_size: Minibatch Size. Currently unused. Using None. :param discrimivative: True for discriminative pretraining (Creates a graph with zero hidden layers). Default \ value: False (Creates a graph with specified hidden layers) """ with tf.variable_scope('forward_variables', reuse=False): self.input = tf.placeholder(tf.float32, (None, self.input_dim), 'input_nodes') self.output = tf.placeholder(tf.float32, (None, self.output_dim), 'output_nodes') inpt = self.input if not discrimivative: inpt = self.__buildFullGraph__() self.layers.append( LinearLayer( self.layer_dims[-2], self.layer_dims[-1], inpt, str(len(self.layer_dims) - 2) + 'layerNet_output')) else: self.layers.append( LinearLayer(self.layer_dims[0], self.layer_dims[-1], inpt, '0layerNet_output')) self.global_step = tf.Variable(0, name='global_step', trainable=False) self.step_incr = tf.assign_add(self.global_step, 1)
def test_draw_weight(): test_model = LinearLayer(3072, 3) test_weight2 = numpy.ones((3072, 3)) test_weight2[:1024, 0] = 255 test_weight2[1025:2048, 1] = 255 test_weight2[2049:3072, 2] = 255 test_model.w.set_value(test_weight2.astype(theano.config.floatX)) test_model.draw_weight(patch_shape=(32,32,3))
def test_draw_weight(): test_model = LinearLayer(3072, 3) test_weight2 = numpy.ones((3072, 3)) test_weight2[:1024, 0] = 255 test_weight2[1025:2048, 1] = 255 test_weight2[2049:3072, 2] = 255 test_model.w.set_value(test_weight2.astype(theano.config.floatX)) test_model.draw_weight(patch_shape=(32, 32, 3))
def _build_layers(self, whiten): # decide if or not to whiten data if whiten: pca_forward = self.v[:, :self.retain] / self.stds[:self.retain] pca_backward = (self.v[:, :self.retain] * self.stds[:self.retain]).T else: pca_forward = self.v[:, :self.retain] pca_backward = pca_forward.T # build transforming layers pca_forward_w = theano.shared( value=pca_forward, name='pca_fwd', borrow=True ) pca_forward_bvis = theano.shared( value = self.mean, name='pca_fwd_bvis', borrow=True ) self.forward_layer = NeuralizedPCALayer( n_in=self.ndim, n_out=self.retain, init_w=pca_forward_w, init_bvis=pca_forward_bvis ) pca_backward_w = theano.shared( value=pca_backward, name='pca_bkwd', borrow=True ) pca_backward_bvis = theano.shared( value=self.mean, name='pca_bkwd_bvis', borrow=True ) self.backward_layer = LinearLayer( n_in=self.retain, n_out=self.ndim, init_w=pca_backward_w, init_b=pca_backward_bvis ) self.outdim = self.retain
def addLayer(self, idx): """ :param idx: index of the layer(in the list passed to initialize the network) to be added. Note 0 is the input\ layers :return: return a list of newly created variables that has to initialized """ with tf.variable_scope('forward_variables', reuse=False): self.layers = self.layers[:-1] print 'layers len', len(self.layers) if len(self.layers) == 0: inpt = self.input else: inpt = self.layers[-1].activations self.layers.append( HiddenLayer(self.layer_dims[idx - 1], self.layer_dims[idx], inpt, 'layer' + str(idx))) self.layers.append( LinearLayer(self.layer_dims[-2], self.layer_dims[-1], self.layers[-1].activations, str(idx) + 'layerNet_output')) self.__buildLossGraph__() params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='forward_variables/layer' + str(idx)) params += tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='forward_variables_' + str(idx)) params += tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='forward_variables/' + str(idx) + 'layerNet_output') print 'params are ', params self.buildEvalGraph() self.buildSummaryGraph() return params
def decoder(self): if self.vistype == 'binary': return SigmoidLayer(self.n_hid, self.n_in, varin=self.encoder().output(), init_w=self.wT, init_b=self.bT) elif self.vistype == 'real': return LinearLayer(self.n_hid, self.n_in, varin=self.encoder().output(), init_w=self.wT, init_b=self.bT)
def encoder(self): if self.hidtype == 'binary': return SigmoidLayer(self.n_in, self.n_hid, varin=self.varin, init_w=self.w, init_b=self.b) elif self.hidtype == 'real': return LinearLayer(self.n_in, self.n_hid, varin=self.varin, init_w=self.w, init_b=self.b)
def main(): """ main function """ num_range = 2 # Load the dataset train_set, valid_set, _ = get_data() train_x, train_y = get_data_range(train_set, num_range=num_range) valid_x, valid_y = get_data_range(valid_set, num_range=num_range) print "size x: %s, y: %s" % (train_x.shape, train_y.shape) # n_classes = np.unique(train_y).size model = NeutraNetwork( [ LinearLayer(64, Activation('relu')), # LinearLayer(64, Activation('relu')), LinearLayer(32, Activation('relu')), # LinearLayer(32, Activation('relu')), LinearLayer(num_range, Activation('sigmoid')), ], MSECostLayer()) # model.grad_check(valid_x[:, : 1000], valid_y[:1000]) # Train neural network print 'Training neural network' model.train(train_x, train_y, num_epochs=50, batch_size=32, learning_rate=0.1) # Evaluate on training data error = model.error(valid_x, valid_y) print 'valid error rate: %.4f' % error
def _build_layers(self, whiten): # decide if or not to whiten data if whiten: zca_forward = numpy.dot( self.v[:, :self.retain] / self.stds[:self.retain], self.v[:, :self.retain].T) zca_backward = numpy.dot(self.v[:, :self.retain], (self.v[:, :self.retain] * self.stds[:self.retain]).T) else: zca_forward = numpy.dot(self.v[:, :self.retain], self.v[:, :self.retain].T) zca_backward = zca_forward # build transforming layers zca_forward_w = theano.shared(value=zca_forward, name='zca_fwd', borrow=True) zca_forward_bvis = theano.shared(value=self.mean, name='zca_fwd_bvis', borrow=True) self.forward_layer = NeuralizedPCALayer(n_in=self.ndim, n_out=self.ndim, init_w=zca_forward_w, init_bvis=zca_forward_bvis) zca_backward_w = theano.shared(value=zca_backward, name='zca_bkwd', borrow=True) zca_backward_bvis = theano.shared(value=self.mean, name='zca_bkwd_bvis', borrow=True) self.backward_layer = LinearLayer(n_in=self.ndim, n_out=self.ndim, init_w=zca_backward_w, init_b=zca_backward_bvis) self.outdim = self.ndim
def test_Dropout(): npy_rng = numpy.random.RandomState(123) theano_rng = RandomStreams(123) data_x = theano.shared( 100 * npy_rng.normal(0, 1, [1000, 50]).astype(theano.config.floatX)) data_y = theano.shared( npy_rng.randint(0, 10, 1000)) ae = ClassicalAutoencoder( 50, 70, vistype='real', hidtype='binary', tie=True ) sl = LinearLayer(50, 70) + LogisticRegression(70, 10) # sl.print_layer() lg = LogisticRegression(50, 10) # lg.print_layer() ae_recon = theano.function( [], ae.reconstruction(), givens={ae.varin: data_x} ) sl_output = theano.function( [], sl.output(), givens={sl.varin: data_x} ) lg_output = theano.function( [], lg.output(), givens={lg.varin: data_x} ) recon_before_dropout = ae_recon() output_before_dropout = sl_output() lgoutput_before_dropout = lg_output() dropout_ae = Dropout(ae, [0.2, 0.5], theano_rng=theano_rng) dropout_sl = Dropout(sl, [0.7, 0.5], theano_rng=theano_rng) dropout_lg = Dropout(lg, [0.5], theano_rng=theano_rng) # dropout_ae.dropout_model.print_layer() # dropout_sl.dropout_model.print_layer() # dropout_lg.dropout_model.print_layer() ae_recon = theano.function( [], ae.reconstruction(), givens={ae.varin: data_x} ) sl_output = theano.function( [], sl.output(), givens={sl.varin: data_x} ) lg_output = theano.function( [], lg.output(), givens={lg.varin: data_x} ) recon_after_dropout = ae_recon() output_after_dropout = sl_output() lgoutput_after_dropout = lg_output() assert numpy.allclose(recon_before_dropout, recon_after_dropout) assert numpy.allclose(output_before_dropout, output_after_dropout) assert numpy.allclose(lgoutput_before_dropout, lgoutput_after_dropout)
def encoder(self): return LinearLayer(self.n_in, self.n_hid, varin=self.varin, init_w=self.w, init_b=self.b)
class PCA(object): """ A theano based PCA capable of using GPU. """ """ considering to make PCA a layer object def __init__(self, n_in, n_out, varin=None): pca_forward_w = theano.shared( value=pca_forward, name='pca_fwd', borrow=True ) pca_forward_bvis = theano.shared( value = self.mean, name='pca_fwd_bvis', borrow=True ) self.forward_layer = NeuralizedPCALayer( n_in=self.ndim, n_out=self.retain, init_w=pca_forward_w, init_bvis=pca_forward_bvis ) pca_backward_w = theano.shared( value=pca_backward, name='pca_bkwd', borrow=True ) pca_backward_bvis = theano.shared( value=self.mean, name='pca_bkwd_bvis', borrow=True ) self.backward_layer = LinearLayer( n_in=self.retain, n_out=self.ndim, init_w=pca_backward_w, init_b=pca_backward_bvis ) """ def fit(self, data, retain=None, verbose=True, whiten=False): """ Part of the code is adapted from Roland Memisevic's code. fit() deals with small datasets, i.e., those datasets that can be loaded into memory at once. It establishes 2 LinearLayer objects: PCAForwardLayer and PCABackwardLayer. They define how the data is mapped after the PCA mapping is learned. """ self.retain = retain assert isinstance(data, numpy.ndarray), \ "data has to be a numpy ndarray." data = data.copy().astype(theano.config.floatX) ncases, self.ndim = data.shape # centralizing data """ If you don\'t centralize the dataset, then you are still going to get perfect reconstruction from the forward/backward mapping matrices, but 1. the eigenvalues you get will no longer match the variance of each principle components, 2. the \'principle component\' you get will no longer match the projection of largest variance, and 3. the output will not be centered at the initial data center, neither at the origin too. However, the shape of the data scatter would still remain intact. It just rotates the data by an unwanted angle and shifts the data by an unexpected vector. """ if verbose: print "Centralizing data... ", data_variable = T.matrix('data_variable') np_ncases = numpy.array([ncases]).astype(theano.config.floatX) fun_partmean = theano.function( inputs=[data_variable], outputs=T.sum(data_variable / np_ncases, axis=0) ) self.mean = numpy.zeros(self.ndim, dtype=theano.config.floatX) self.mean += fun_partmean(data) data -= self.mean if verbose: print "Done." # compute convariance matrix if verbose: print "Computing covariance... ", covmat = theano.shared( value=numpy.zeros((self.ndim, self.ndim), dtype=theano.config.floatX), name='covmat', borrow=True ) fun_update_covmat = theano.function( inputs=[data_variable], outputs=[], updates={covmat: covmat + \ T.dot(data_variable.T, data_variable) / np_ncases} ) fun_update_covmat(data) self.covmat = covmat.get_value() if verbose: print "Done." # compute eigenvalue and eigenvector if verbose: print "Eigen-decomposition...",; sys.stdout.flush() # u should be real valued vector, which stands for the variace of data # at each PC. v should be a real valued orthogonal matrix. u, v_unsorted = numpy.linalg.eigh(self.covmat) self.v = v_unsorted[:, numpy.argsort(u)[::-1]] u.sort() u = u[::-1] # throw away some eigenvalues for numerical stability self.stds = numpy.sqrt(u[u > 0.]) self.variance_fracs = (self.stds ** 2).cumsum() / (self.stds ** 2).sum() self.maxPCs = self.stds.shape[0] if verbose: print "Done. Maximum stable PCs: %d" % self.maxPCs # decide number of principle components. error_info = "Wrong \"retain\" value. Should be " + \ "a real number within the interval of (0, 1), " + \ "an integer in (0, maxPCs], None, or \'mle\'." if self.retain == None: self.retain = self.maxPCs elif self.retain == 'mle': raise NotImplementedError("Adaptive dimension matching," + \ "not implemented yet...") elif isinstance(self.retain, int): assert (self.retain > 0 and self.retain <= self.maxPCs), error_info elif isinstance(self.retain, float): assert (self.retain > 0 and self.retain < 1), error_info self.retain = numpy.sum(self.variance_fracs < self.retain) + 1 if verbose: print "Number of selected PCs: %d, ratio of retained variance: %f"%\ (self.retain, self.variance_fracs[self.retain-1]) self._build_layers(whiten) def fit_partwise(self, data_genfun, data_resetfun, ncases, ndim, retain=None, verbose=True, whiten=False): """ fit_partwise() is for computing PCA for large datasets. the data part is generated by a generator, and at each iteration the generated data should be in the form of a single numpy.ndarray, with 2-d structure. The method establishes 2 LinearLayer objects: PCAForwardLayer and PCABackwardLayer. They define how the data is mapped after the PCA mapping is learned. """ self.retain = retain self.ndim = ndim # centralizing data if verbose: print "Centralizing data..." data_variable = T.matrix('data_variable') np_ncases = numpy.array([ncases]).astype(theano.config.floatX) fun_partmean = theano.function( inputs=[data_variable], outputs=T.sum(data_variable / np_ncases, axis=0) ) self.mean = numpy.zeros(self.ndim, dtype=theano.config.floatX) data_resetfun() data_generator = data_genfun() for data_part in data_generator: assert isinstance(data_part, numpy.ndarray), ( "data_genfun has to be a generator function yielding " "numpy.ndarray.") data_part = data_part.astype(theano.config.floatX) _, self.ndim = data_part.shape self.mean += fun_partmean(data_part) if verbose: print ".", sys.stdout.flush() if verbose: print "Done." # compute convariance matrix if verbose: print "Computing covariance..." covmat = theano.shared( value=numpy.zeros((self.ndim, self.ndim), dtype=theano.config.floatX), name='covmat', borrow=True ) fun_update_covmat = theano.function( inputs=[data_variable], outputs=[], updates={covmat: covmat + \ T.dot(data_variable.T, data_variable) / np_ncases} ) data_resetfun() data_generator = data_genfun() for data_part in data_generator: data_part = data_part.astype(theano.config.floatX) - self.mean fun_update_covmat(data_part) if verbose: print ".", sys.stdout.flush() self.covmat = covmat.get_value() if verbose: print "Done." # compute eigenvalue and eigenvector if verbose: print "Eigen-decomposition...",; sys.stdout.flush() # u should be real valued vector, which stands for the variace of data # at each PC. v should be a real valued orthogonal matrix. u, v_unsorted = numpy.linalg.eigh(self.covmat) self.v = v_unsorted[:, numpy.argsort(u)[::-1]] u.sort() u = u[::-1] # throw away some eigenvalues for numerical stability self.stds = numpy.sqrt(u[u > 0.]) self.variance_fracs = (self.stds ** 2).cumsum() / (self.stds ** 2).sum() self.maxPCs = self.stds.shape[0] if verbose: print "Done. Maximum stable PCs: %d" % self.maxPCs # decide number of principle components. error_info = "Wrong \"retain\" value. Should be " + \ "a real number within the interval of (0, 1), " + \ "an integer in (0, maxPCs], None, or \'mle\'." if self.retain == None: self.retain = self.maxPCs elif self.retain == 'mle': raise NotImplementedError("Adaptive dimension matching," + \ "not implemented yet...") elif isinstance(self.retain, int): assert (self.retain > 0 and self.retain <= self.maxPCs), error_info elif isinstance(self.retain, float): assert (self.retain > 0 and self.retain < 1), error_info self.retain = numpy.sum(self.variance_fracs < self.retain) + 1 if verbose: print "Number of selected PCs: %d, ratio of retained variance: %f"%\ (self.retain, self.variance_fracs[self.retain-1]) self._build_layers(whiten) def _build_layers(self, whiten): # decide if or not to whiten data if whiten: pca_forward = self.v[:, :self.retain] / self.stds[:self.retain] pca_backward = (self.v[:, :self.retain] * self.stds[:self.retain]).T else: pca_forward = self.v[:, :self.retain] pca_backward = pca_forward.T # build transforming layers pca_forward_w = theano.shared( value=pca_forward, name='pca_fwd', borrow=True ) pca_forward_bvis = theano.shared( value = self.mean, name='pca_fwd_bvis', borrow=True ) self.forward_layer = NeuralizedPCALayer( n_in=self.ndim, n_out=self.retain, init_w=pca_forward_w, init_bvis=pca_forward_bvis ) pca_backward_w = theano.shared( value=pca_backward, name='pca_bkwd', borrow=True ) pca_backward_bvis = theano.shared( value=self.mean, name='pca_bkwd_bvis', borrow=True ) self.backward_layer = LinearLayer( n_in=self.retain, n_out=self.ndim, init_w=pca_backward_w, init_b=pca_backward_bvis ) self.outdim = self.retain def forward(self, data, batch_size=10000, verbose=True): """ Maps the given data to PCA representation, in a batchwise manner. There is no need to do the batchwise mapping though, but this implementation is for the unloaded version in the future. That will allow us to do PCA mapping on arbitrarilly large dataset. Parameters ------------ data : numpy.ndarray Data to be mapped. Returns ------------ numpy.ndarray object. """ assert hasattr(self, 'forward_layer'), 'Please fit the model first.' data = data.astype(theano.config.floatX) ncases, ndim = data.shape assert ndim == self.ndim, \ 'Given data dimension doesn\'t match the learned model.' nbatches = (ncases + batch_size - 1) / batch_size map_function = theano.function( [self.forward_layer.varin], self.forward_layer.output() ) if verbose: print "Transforming, %d dots to punch:" % nbatches, pcaed_data = [] for bidx in range(nbatches): if verbose: print ".", sys.stdout.flush() start = bidx * batch_size end = min((bidx + 1) * batch_size, ncases) pcaed_data.append(map_function(data[start:end, :])) pcaed_data = numpy.concatenate(pcaed_data, axis=0) if verbose: print "Done." return pcaed_data def backward(self, data, batch_size=10000, verbose=True): """ The same to forward(), but in a reverse direction. data : numpy.ndarray Data to be mapped. Returns ------------ numpy.ndarray object. """ assert hasattr(self, 'backward_layer'), 'Please fit the model first.' data = data.astype(theano.config.floatX) ncases, ndim = data.shape assert ndim == self.outdim, \ 'Given data dimension doesn\'t match the learned model.' nbatches = (ncases + batch_size - 1) / batch_size map_function = theano.function( [self.backward_layer.varin], self.backward_layer.output() ) if verbose: print "Transforming, %d dots to punch:" % nbatches, recons_data = [] for bidx in range(nbatches): if verbose: print ".", sys.stdout.flush() start = bidx * batch_size end = min((bidx + 1) * batch_size, ncases) recons_data.append(map_function(data[start:end, :])) recons_data = numpy.concatenate(recons_data, axis=0) if verbose: print "Done." return recons_data def energy_dist(self,): """ """ assert hasattr(self, 'variance_fracs'), \ "The model has not been fitted." return self.variance_fracs
for x in inputs: x /= 255 targets = [] for num in digits.target: baz = np.zeros(10) baz[num] = 1 targets.append(baz) targets = np.array(targets) from sklearn.model_selection import train_test_split inputs, xtest, targets, ytest = train_test_split(inputs, targets, test_size=0.2) np.seterr(all='raise') net = NeuralNetwork([ LinearLayer(inputSize=64, outputSize=16), LeakyRelu(), LinearLayer(inputSize=16, outputSize=10), LeakyRelu(), Softmax() ]) train(net, inputs, targets, loss=CrossEntropy(), num_epochs=600, optimizer=MBGD(learningRate=0.0001), showGraph=True) net.serialize("serializedMNIST.json")
class PCA(object): """ A theano based PCA capable of using GPU. """ """ considering to make PCA a layer object def __init__(self, n_in, n_out, varin=None): pca_forward_w = theano.shared( value=pca_forward, name='pca_fwd', borrow=True ) pca_forward_bvis = theano.shared( value = self.mean, name='pca_fwd_bvis', borrow=True ) self.forward_layer = NeuralizedPCALayer( n_in=self.ndim, n_out=self.retain, init_w=pca_forward_w, init_bvis=pca_forward_bvis ) pca_backward_w = theano.shared( value=pca_backward, name='pca_bkwd', borrow=True ) pca_backward_bvis = theano.shared( value=self.mean, name='pca_bkwd_bvis', borrow=True ) self.backward_layer = LinearLayer( n_in=self.retain, n_out=self.ndim, init_w=pca_backward_w, init_b=pca_backward_bvis ) """ def fit(self, data, retain=None, verbose=True, whiten=False): """ Part of the code is adapted from Roland Memisevic's code. fit() deals with small datasets, i.e., those datasets that can be loaded into memory at once. It establishes 2 LinearLayer objects: PCAForwardLayer and PCABackwardLayer. They define how the data is mapped after the PCA mapping is learned. """ self.retain = retain assert isinstance(data, numpy.ndarray), \ "data has to be a numpy ndarray." data = data.copy().astype(theano.config.floatX) ncases, self.ndim = data.shape # centralizing data """ If you don\'t centralize the dataset, then you are still going to get perfect reconstruction from the forward/backward mapping matrices, but 1. the eigenvalues you get will no longer match the variance of each principle components, 2. the \'principle component\' you get will no longer match the projection of largest variance, and 3. the output will not be centered at the initial data center, neither at the origin too. However, the shape of the data scatter would still remain intact. It just rotates the data by an unwanted angle and shifts the data by an unexpected vector. """ if verbose: print "Centralizing data... ", data_variable = T.matrix('data_variable') np_ncases = numpy.array([ncases]).astype(theano.config.floatX) fun_partmean = theano.function(inputs=[data_variable], outputs=T.sum(data_variable / np_ncases, axis=0)) self.mean = numpy.zeros(self.ndim, dtype=theano.config.floatX) self.mean += fun_partmean(data) data -= self.mean if verbose: print "Done." # compute convariance matrix if verbose: print "Computing covariance... ", covmat = theano.shared(value=numpy.zeros((self.ndim, self.ndim), dtype=theano.config.floatX), name='covmat', borrow=True) fun_update_covmat = theano.function( inputs=[data_variable], outputs=[], updates={covmat: covmat + \ T.dot(data_variable.T, data_variable) / np_ncases} ) fun_update_covmat(data) self.covmat = covmat.get_value() if verbose: print "Done." # compute eigenvalue and eigenvector if verbose: print "Eigen-decomposition...", sys.stdout.flush() # u should be real valued vector, which stands for the variace of data # at each PC. v should be a real valued orthogonal matrix. u, v_unsorted = numpy.linalg.eigh(self.covmat) self.v = v_unsorted[:, numpy.argsort(u)[::-1]] u.sort() u = u[::-1] # throw away some eigenvalues for numerical stability self.stds = numpy.sqrt(u[u > 0.]) self.variance_fracs = (self.stds**2).cumsum() / (self.stds**2).sum() self.maxPCs = self.stds.shape[0] if verbose: print "Done. Maximum stable PCs: %d" % self.maxPCs # decide number of principle components. error_info = "Wrong \"retain\" value. Should be " + \ "a real number within the interval of (0, 1), " + \ "an integer in (0, maxPCs], None, or \'mle\'." if self.retain == None: self.retain = self.maxPCs elif self.retain == 'mle': raise NotImplementedError("Adaptive dimension matching," + \ "not implemented yet...") elif isinstance(self.retain, int): assert (self.retain > 0 and self.retain <= self.maxPCs), error_info elif isinstance(self.retain, float): assert (self.retain > 0 and self.retain < 1), error_info self.retain = numpy.sum(self.variance_fracs < self.retain) + 1 if verbose: print "Number of selected PCs: %d, ratio of retained variance: %f"%\ (self.retain, self.variance_fracs[self.retain-1]) self._build_layers(whiten) def fit_partwise(self, data_genfun, data_resetfun, ncases, ndim, retain=None, verbose=True, whiten=False): """ fit_partwise() is for computing PCA for large datasets. the data part is generated by a generator, and at each iteration the generated data should be in the form of a single numpy.ndarray, with 2-d structure. The method establishes 2 LinearLayer objects: PCAForwardLayer and PCABackwardLayer. They define how the data is mapped after the PCA mapping is learned. """ self.retain = retain self.ndim = ndim # centralizing data if verbose: print "Centralizing data..." data_variable = T.matrix('data_variable') np_ncases = numpy.array([ncases]).astype(theano.config.floatX) fun_partmean = theano.function(inputs=[data_variable], outputs=T.sum(data_variable / np_ncases, axis=0)) self.mean = numpy.zeros(self.ndim, dtype=theano.config.floatX) data_resetfun() data_generator = data_genfun() for data_part in data_generator: assert isinstance(data_part, numpy.ndarray), ( "data_genfun has to be a generator function yielding " "numpy.ndarray.") data_part = data_part.astype(theano.config.floatX) _, self.ndim = data_part.shape self.mean += fun_partmean(data_part) if verbose: print ".", sys.stdout.flush() if verbose: print "Done." # compute convariance matrix if verbose: print "Computing covariance..." covmat = theano.shared(value=numpy.zeros((self.ndim, self.ndim), dtype=theano.config.floatX), name='covmat', borrow=True) fun_update_covmat = theano.function( inputs=[data_variable], outputs=[], updates={covmat: covmat + \ T.dot(data_variable.T, data_variable) / np_ncases} ) data_resetfun() data_generator = data_genfun() for data_part in data_generator: data_part = data_part.astype(theano.config.floatX) - self.mean fun_update_covmat(data_part) if verbose: print ".", sys.stdout.flush() self.covmat = covmat.get_value() if verbose: print "Done." # compute eigenvalue and eigenvector if verbose: print "Eigen-decomposition...", sys.stdout.flush() # u should be real valued vector, which stands for the variace of data # at each PC. v should be a real valued orthogonal matrix. u, v_unsorted = numpy.linalg.eigh(self.covmat) self.v = v_unsorted[:, numpy.argsort(u)[::-1]] u.sort() u = u[::-1] # throw away some eigenvalues for numerical stability self.stds = numpy.sqrt(u[u > 0.]) self.variance_fracs = (self.stds**2).cumsum() / (self.stds**2).sum() self.maxPCs = self.stds.shape[0] if verbose: print "Done. Maximum stable PCs: %d" % self.maxPCs # decide number of principle components. error_info = "Wrong \"retain\" value. Should be " + \ "a real number within the interval of (0, 1), " + \ "an integer in (0, maxPCs], None, or \'mle\'." if self.retain == None: self.retain = self.maxPCs elif self.retain == 'mle': raise NotImplementedError("Adaptive dimension matching," + \ "not implemented yet...") elif isinstance(self.retain, int): assert (self.retain > 0 and self.retain <= self.maxPCs), error_info elif isinstance(self.retain, float): assert (self.retain > 0 and self.retain < 1), error_info self.retain = numpy.sum(self.variance_fracs < self.retain) + 1 if verbose: print "Number of selected PCs: %d, ratio of retained variance: %f"%\ (self.retain, self.variance_fracs[self.retain-1]) self._build_layers(whiten) def _build_layers(self, whiten): # decide if or not to whiten data if whiten: pca_forward = self.v[:, :self.retain] / self.stds[:self.retain] pca_backward = (self.v[:, :self.retain] * self.stds[:self.retain]).T else: pca_forward = self.v[:, :self.retain] pca_backward = pca_forward.T # build transforming layers pca_forward_w = theano.shared(value=pca_forward, name='pca_fwd', borrow=True) pca_forward_bvis = theano.shared(value=self.mean, name='pca_fwd_bvis', borrow=True) self.forward_layer = NeuralizedPCALayer(n_in=self.ndim, n_out=self.retain, init_w=pca_forward_w, init_bvis=pca_forward_bvis) pca_backward_w = theano.shared(value=pca_backward, name='pca_bkwd', borrow=True) pca_backward_bvis = theano.shared(value=self.mean, name='pca_bkwd_bvis', borrow=True) self.backward_layer = LinearLayer(n_in=self.retain, n_out=self.ndim, init_w=pca_backward_w, init_b=pca_backward_bvis) self.outdim = self.retain def forward(self, data, batch_size=10000, verbose=True): """ Maps the given data to PCA representation, in a batchwise manner. There is no need to do the batchwise mapping though, but this implementation is for the unloaded version in the future. That will allow us to do PCA mapping on arbitrarilly large dataset. Parameters ------------ data : numpy.ndarray Data to be mapped. Returns ------------ numpy.ndarray object. """ assert hasattr(self, 'forward_layer'), 'Please fit the model first.' data = data.astype(theano.config.floatX) ncases, ndim = data.shape assert ndim == self.ndim, \ 'Given data dimension doesn\'t match the learned model.' nbatches = (ncases + batch_size - 1) / batch_size map_function = theano.function([self.forward_layer.varin], self.forward_layer.output()) if verbose: print "Transforming, %d dots to punch:" % nbatches, pcaed_data = [] for bidx in range(nbatches): if verbose: print ".", sys.stdout.flush() start = bidx * batch_size end = min((bidx + 1) * batch_size, ncases) pcaed_data.append(map_function(data[start:end, :])) pcaed_data = numpy.concatenate(pcaed_data, axis=0) if verbose: print "Done." return pcaed_data def backward(self, data, batch_size=10000, verbose=True): """ The same to forward(), but in a reverse direction. data : numpy.ndarray Data to be mapped. Returns ------------ numpy.ndarray object. """ assert hasattr(self, 'backward_layer'), 'Please fit the model first.' data = data.astype(theano.config.floatX) ncases, ndim = data.shape assert ndim == self.outdim, \ 'Given data dimension doesn\'t match the learned model.' nbatches = (ncases + batch_size - 1) / batch_size map_function = theano.function([self.backward_layer.varin], self.backward_layer.output()) if verbose: print "Transforming, %d dots to punch:" % nbatches, recons_data = [] for bidx in range(nbatches): if verbose: print ".", sys.stdout.flush() start = bidx * batch_size end = min((bidx + 1) * batch_size, ncases) recons_data.append(map_function(data[start:end, :])) recons_data = numpy.concatenate(recons_data, axis=0) if verbose: print "Done." return recons_data def energy_dist(self, ): """ """ assert hasattr(self, 'variance_fracs'), \ "The model has not been fitted." return self.variance_fracs
inputs = np.array([ [0, 0], [1, 0], [0, 1], [1, 1] ]) targets = np.array([ [1, 0], [0, 1], [0, 1], [1, 0] ]) net = NeuralNetwork([ LinearLayer(inputSize=2, outputSize=2), LeakyRelu(), LinearLayer(inputSize=2, outputSize=2), Tanh() ]) train(net, inputs, targets, loss= MSE(), num_epochs=5000, optimizer=MBGD(learningRate=0.01), showGraph=True) # net.loadParamsFromFile("/home/ayush/scratch/Net/aknet/serialized.json") for x, y in zip(inputs, targets): predicted = net.forward(x) print(x, predicted, y) net.serialize("serialized.json")