def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=(500, 500), corruption_levels=(0.1, 0.1)): """ :param n_in: int, the dimension of input :param n_out: int, the dimension of output :param hidden_layers_sizes: list or tuple, the hidden layer sizes :param corruption_levels: list or tuple, the corruption lever for each layer """ assert len(hidden_layers_sizes) >= 1 assert len(hidden_layers_sizes) == len(corruption_levels) self.corruption_levels = corruption_levels self.n_layers = len(hidden_layers_sizes) # define the layers self.layers = [] # the normal layers self.dA_layers = [] # the dA layers self.params = [] # params # define the input and output self.x = tf.placeholder(tf.float32, shape=[None, n_in]) self.y = tf.placeholder(tf.float32, shape=[None, n_out]) # construct the layers for i in range(self.n_layers): if i == 0: # the input layer input_size = n_in layer_input = self.x else: input_size = hidden_layers_sizes[i - 1] layer_input = self.layers[i - 1].output # create the sigmoid layer sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) self.layers.append(sigmoid_layer) # create the da layer dA_layer = DA(inpt=layer_input, n_hidden=hidden_layers_sizes[i], n_visiable=input_size, W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # collect the params self.params.extend(sigmoid_layer.params) # add the output layer self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.output_layer.params) # the finetuning cost self.finetune_cost = self.output_layer.cost(self.y) # the accuracy self.accuracy = self.output_layer.accuarcy(self.y)
def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=[500, 500]): self.n_layers = len(hidden_layers_sizes) self.layers = [] # normal sigmoid layer self.rbm_layers = [] # RBM layer self.params = [] self.n_in = n_in self.n_out = n_out # Define the input and output self.x = tf.placeholder(tf.float32, shape=[None, self.n_in * 2]) self.y = tf.placeholder(tf.float32, shape=[None, self.n_out * 2]) total_data = np.loadtxt( "/mnt/disk2/liuying/T-ITS/dataset/geolife/geolife_total") total_data = total_data[:, 2:4] self.scaler = MinMaxScaler().fit(total_data) self.checkpoint_times = 1 # Contruct the layers of DBN for i in range(self.n_layers): if i == 0: layer_input = self.x input_size = self.n_in * 2 else: layer_input = self.layers[i - 1].output input_size = hidden_layers_sizes[i - 1] # Sigmoid layer print("n_in:{0} n_out:{1}".format(input_size, hidden_layers_sizes[i])) sigmoid_layer = tf.layers.dense(inputs=layer_input, units=hidden_layers_sizes[i], activation=tf.nn.sigmoid, name="layer_1") self.layers.append(sigmoid_layer) # Add the parameters for finetuning self.params.extend(sigmoid_layer.params) # Create the RBM layer self.rbm_layers.append( RBM(inpt=layer_input, n_visiable=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b)) # We use the LogisticRegression layer as the output layer self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out * 2) self.params.extend(self.output_layer.params) # The finetuning cost self.cost = self.output_layer.cost(self.y) # The logistic regression output self.predictor = self.output_layer.output self.train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize( self.cost, var_list=self.params) self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=[500, 500]): """ :param n_in: int, the dimension of input :param n_out: int, the dimension of output :param hidden_layers_sizes: list or tuple, the hidden layer sizes """ # Number of layers assert len(hidden_layers_sizes) > 0 self.n_layers = len(hidden_layers_sizes) self.layers = [] # normal sigmoid layer self.rbm_layers = [] # RBM layer self.params = [] # keep track of params for training # Define the input and output self.x = tf.placeholder(tf.float32, shape=[None, n_in]) self.y = tf.placeholder(tf.float32, shape=[None, n_out]) # Contruct the layers of DBN with tf.name_scope('DBN_layer'): for i in range(self.n_layers): if i == 0: layer_input = self.x input_size = n_in else: layer_input = self.layers[i - 1].output input_size = hidden_layers_sizes[i - 1] # Sigmoid layer with tf.name_scope('internel_layer'): sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) self.layers.append(sigmoid_layer) # Add the parameters for finetuning self.params.extend(sigmoid_layer.params) # Create the RBM layer with tf.name_scope('rbm_layer'): self.rbm_layers.append( RBM(inpt=layer_input, n_visiable=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b)) # We use the LogisticRegression layer as the output layer with tf.name_scope('output_layer'): self.output_layer = LogisticRegression( inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.output_layer.params) # The finetuning cost with tf.name_scope('output_loss'): self.cost = self.output_layer.cost(self.y) # The accuracy self.accuracy = self.output_layer.accuarcy(self.y)
def __logisict_regression(self): data = DataSetManager(self.classClassfy) X_train, X_test, Y_train, Y_test = data.get_data_X_Y_for_train_and_test( ) log = LogisticRegression(X_train=X_train, Y_train=Y_train, learning_rate=0.01) W = log.get_value_W() acurracy = log.prediction(X_test, W, Y_test) return acurracy
def __init__(self, rng, input, n_input, n_hidden, n_output): self.hiddenLayer = HidedenLayer(rng=rng, input=input, n_input=n_input, n_output=n_hidden, activation=T.tanh) self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_output) # In this tutorial we will also use L1 and L2 regularization (see L1 and L2 regularization). For this, we need to compute the L1 norm and the squared L2 norm of the weights W^{(1)}, W^{(2)}. # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = (abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (abs(self.hiddenLayer.W**2).sum() + abs(self.logRegressionLayer.W**2).sum()) self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood) self.errors = self.logRegressionLayer.errors self.params = self.hiddenLayer.params + self.logRegressionLayer.params self.input = input
def __init__(self, inpt, n_in, n_hidden, n_out): """ inpt: tf.Tensor, shape [n_examples, n_in] n_in: int, the dimensionality of input n_hidden: int, number of hidden units n_out: int, number of output units """ # hidden layer self.hiddenLayer = HiddenLayer(inpt, n_in=n_in, n_out=n_hidden) # output layer (logistic layer) self.outputLayer = LogisticRegression(self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # L1 norm self.L1 = tf.reduce_sum(tf.abs(self.hiddenLayer.W)) + \ tf.reduce_sum(tf.abs(self.outputLayer.W)) # L2 norm self.L2 = tf.reduce_sum(tf.square(self.hiddenLayer.W)) + \ tf.reduce_sum(tf.square(self.outputLayer.W)) # cross_entropy cost function self.cost = self.outputLayer.cost # accuracy function self.accuracy = self.outputLayer.accuarcy # params self.params = self.hiddenLayer.params + self.outputLayer.params # keep track of input self.input = inpt
def __init__(self, rng, input, n_in, n_hidden, n_out): # 在初始化隐藏层的同时,已经初始化权值矩阵,并计算出隐藏层的激活值 self.hiddenLayer = HiddenLayer( rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh ) # 隐藏层输出作为最后逻辑回归的输入 self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out ) self.L1 = ( abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum() ) # 用作正则化的因子 self.L2_sqr = ( (self.hiddenLayer.W ** 2).sum() + (self.logRegressionLayer.W ** 2).sum() ) # 计算逻辑回归层的代价函数 self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood ) # 逻辑回归层的总误差 self.errors = self.logRegressionLayer.errors # 模型参数 self.params = self.hiddenLayer.params + self.logRegressionLayer.params # 跟踪模型输入 self.input = input
def run_logistic_regression(df, num_features): """ This function runs logistic regression on the data frame and outputs statistics from five experiments :param df: The data set to run the algorithm on= :param num_features: The number of features in this dataset """ # Split dataset 5-fold stratified print(f"Size of total dataset = {len(df)}") train1, train2, train3, train4, train5 = split_into_random_stratified_groups( df) datasets = [train1, train2, train3, train4, train5] lg_scores = [] for i, d in enumerate(datasets): print("-------------") print(f"Experiment #{i + 1}") print("-------------") # Use one subset as a test set df_test = datasets[i] print(f"Test set size = {len(df_test)}") training_sets = datasets.copy() # Create a training set from remaining subsets del training_sets[i] df_train = pd.concat(training_sets) print(f"Training set size = {len(df_train)}") # Create Logistic Regression lg = LogisticRegression(df_train.columns[0:num_features], df_train.iloc[:, 0:num_features], df_train.iloc[:, num_features], df_test.iloc[:, 0:num_features], df_test.iloc[:, num_features]) # Train with logistic regression lg.learn() # Test the logistic regression accuracy lg_accuracy = lg.validate() print('Logistic Regression Percent accurate: ' + repr(lg_accuracy) + '%') lg_scores.append(lg_accuracy) return statistics.mean(lg_scores)
def __init__(self, rng, input, n_in, n_hidden, n_out): """ Initialize the parameters of the MLP :param rng : a random number generator used to initialize weights :type rng: numpy.random.RandomState :param input: symbolic variable that describes the input of the architecture (one minibatch) :type input: theano.tensor.TensorType :param n_in: number of input units (the dimensionality of the space : in which the datapoints live) :type n_in: int :param n_hidden: number of hidden units :type n_hidden: int :param n_out: number of hidden units in the layer. : i.e. the dimension of the space the labels lie in. :type n_out: int """ #Since we're dealing with an MLP with one hidden layer, # it is equivalent to being a network with one layer of # tanh activation units connected to a logistic regression layer self.hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh) #The logisticRegression layer has as inputs, the hidden units of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) #Regularization: #1. L1 Norm #One method of regularization is to ensure that the L1 norm is small self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() #2. Squared L2 Norm #Another regularization method is to ensure that the square of the L2 norm # is small self.L2_sqr = (self.hiddenLayer.W**2).sum() \ + (self.logRegressionLayer.W **2).sum() #Negative log likelihood of the MLP is given by the Negative log likelihood of # the output of the model, as computed by the LogisticRegression layer. self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood #same holds for the funtion computing the number of errors self.errors = self.logRegressionLayer.errors #the parameters of the model are the the parameters of the two layers # that it is made up of self.params = self.hiddenLayer.params + self.logRegressionLayer.params
def __init__(self, layers, Neurons): self.layers = [] self.alpha = 0.1 self.nuofLayers = layers self.Neuronscount = Neurons if (layers != len(Neurons)): print("Number of neurons in each layer is not specified") exit() for i in range(layers): self.layers.append([]) for j in range(Neurons[i]): self.layers[i].append(LogisticRegression(self.alpha)) self.layers = np.array(self.layers)
def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=(500, 500), corruption_levels=(0.1, 0.1)): """ :param n_in: int, the dimension of input :param n_out: int, the dimension of output :param hidden_layers_sizes: list or tuple, the hidden layer sizes :param corruption_levels: list or tuple, the corruption lever for each layer """ assert len(hidden_layers_sizes) >= 1 assert len(hidden_layers_sizes) == len(corruption_levels) self.corruption_levels = corruption_levels self.n_layers = len(hidden_layers_sizes) # define the layers self.layers = [] # the normal layers self.dA_layers = [] # the dA layers self.params = [] # params # define the input and output self.x = tf.placeholder(tf.float32, shape=[None, n_in]) self.y = tf.placeholder(tf.float32, shape=[None, n_out]) # construct the layers for i in range(self.n_layers): if i == 0: # the input layer input_size = n_in layer_input = self.x else: input_size = hidden_layers_sizes[i-1] layer_input = self.layers[i-1].output # create the sigmoid layer sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) self.layers.append(sigmoid_layer) # create the da layer dA_layer = DA(inpt=layer_input, n_hidden=hidden_layers_sizes[i], n_visiable=input_size, W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # collect the params self.params.extend(sigmoid_layer.params) # add the output layer self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.output_layer.params) # the finetuning cost self.finetune_cost = self.output_layer.cost(self.y) # the accuracy self.accuracy = self.output_layer.accuarcy(self.y)
def main(): rawData = load_breast_cancer() trainingData = np.matrix(rawData.data, dtype='float') x = trainingData[:, 0:10] x = x / x.max(axis=0) y = np.matrix(rawData.target, dtype='float').T print("Data loaded successfully...") shuffle = False while True: print("Do you want to shuffle the data while spliting for training and testing? press y or n... q for quit...") userInput = input() if userInput == 'y' or userInput == 'Y': shuffle = True break elif userInput == 'n' or userInput == 'N': break elif userInput == 'q' or userInput == 'Q': print("Quitting the program...") exit() else: print("invalid input... Please provide a valid input.") animation = False while True: print("Do you want to show the animation for gradient descent? press y or n... q for quit...") userInput = input() if userInput == 'y' or userInput == 'Y': animation = True break elif userInput == 'n' or userInput == 'N': break elif userInput == 'q' or userInput == 'Q': print("Quitting the program...") exit() else: print("invalid input... Please provide a valid input.") x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, shuffle=shuffle) print("Training and testing data generated...") logReg = LogisticRegression(x_train, y_train) logReg.train(animation=animation, alpha=0.001) y_pred = logReg.test(x_test) score = logReg.scores(y_test) print("precision: ", score['precision']) print("recall: ", score['recall']) print("f1 score: ", score['f1']) print("accuaracy: ", score['accuaracy']) print("confusionMatrix: ", score['confusionMatrix'])
def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=[500, 500]): """ :param n_in: int, the dimension of input :param n_out: int, the dimension of output :param hidden_layers_sizes: list or tuple, the hidden layer sizes """ # Number of layers assert len(hidden_layers_sizes) > 0 self.n_layers = len(hidden_layers_sizes) self.layers = [] # normal sigmoid layer self.rbm_layers = [] # RBM layer self.params = [] # keep track of params for training # Define the input and output self.x = tf.placeholder(tf.float32, shape=[None, n_in]) self.y = tf.placeholder(tf.float32, shape=[None, n_out]) # Contruct the layers of DBN for i in range(self.n_layers): if i == 0: layer_input = self.x input_size = n_in else: layer_input = self.layers[i-1].output input_size = hidden_layers_sizes[i-1] # Sigmoid layer sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) self.layers.append(sigmoid_layer) # Add the parameters for finetuning self.params.extend(sigmoid_layer.params) # Create the RBM layer self.rbm_layers.append(RBM(inpt=layer_input, n_visiable=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b)) # We use the LogisticRegression layer as the output layer self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.output_layer.params) # The finetuning cost self.cost = self.output_layer.cost(self.y) # The accuracy self.accuracy = self.output_layer.accuarcy(self.y)
def __init__(self, numpy_rng, n_ins, n_outs, hidden_layers_sizes, corruption_levels=[0.1, 0.1], theano_rng=None): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.n_ins = n_ins self.n_outs = n_outs # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.append(sigmoid_layer.theta) # Construct a denoising autoencoder that shared weights with this layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], theta=sigmoid_layer.theta) self.dA_layers.append(dA_layer) sda_input = T.matrix('sda_input') self.da_layers_output_size = hidden_layers_sizes[-1] self.get_da_output = theano.function( inputs=[sda_input], outputs=self.sigmoid_layers[-1].output.reshape( (-1, self.da_layers_output_size)), givens={self.x: sda_input}) self.logLayer = LogisticRegression( rng=numpy.random.RandomState(), input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) #self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
class SdA(object): """ Stacked denoising autoencoder class the model is constructed by stacking several dAs the dA layers are used to initialize the network, after pre-training, the SdA is similar to a normal MLP """ def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=(500, 500), corruption_levels=(0.1, 0.1)): """ :param n_in: int, the dimension of input :param n_out: int, the dimension of output :param hidden_layers_sizes: list or tuple, the hidden layer sizes :param corruption_levels: list or tuple, the corruption lever for each layer """ assert len(hidden_layers_sizes) >= 1 assert len(hidden_layers_sizes) == len(corruption_levels) self.corruption_levels = corruption_levels self.n_layers = len(hidden_layers_sizes) # define the layers self.layers = [] # the normal layers self.dA_layers = [] # the dA layers self.params = [] # params # define the input and output self.x = tf.placeholder(tf.float32, shape=[None, n_in]) self.y = tf.placeholder(tf.float32, shape=[None, n_out]) # construct the layers for i in range(self.n_layers): if i == 0: # the input layer input_size = n_in layer_input = self.x else: input_size = hidden_layers_sizes[i-1] layer_input = self.layers[i-1].output # create the sigmoid layer sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) self.layers.append(sigmoid_layer) # create the da layer dA_layer = DA(inpt=layer_input, n_hidden=hidden_layers_sizes[i], n_visiable=input_size, W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # collect the params self.params.extend(sigmoid_layer.params) # add the output layer self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.output_layer.params) # the finetuning cost self.finetune_cost = self.output_layer.cost(self.y) # the accuracy self.accuracy = self.output_layer.accuarcy(self.y) def pretrain(self, sess, X_train, pretraining_epochs=10, batch_size=100, learning_rate=0.001, display_step=1): """ Pretrain the layers :param sess: tf.Session :param X_train: the input of the train set :param batch_size: int :param learning_rate: float """ print('Starting pretraining...') start_time = timeit.default_timer() batch_num = int(X_train.train.num_examples / batch_size) for i in range(self.n_layers): # pretraining layer by layer cost = self.dA_layers[i].get_cost(corruption_level=self.corruption_levels[i]) params = self.dA_layers[i].params train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost, var_list=params) for epoch in range(pretraining_epochs): avg_cost = 0.0 for j in range(batch_num): x_batch, _ = X_train.train.next_batch(batch_size) # 训练 sess.run(train_op, feed_dict={self.x: x_batch}) # 计算cost avg_cost += sess.run(cost, feed_dict={self.x: x_batch,}) / batch_num # 输出 if epoch % display_step == 0: print("Pretraing layer {0} Epoch {1} cost: {2}".format(i, epoch, avg_cost)) end_time = timeit.default_timer() print("The pretraining process ran for {0}m".format((end_time - start_time) / 60)) def finetuning(self, sess, trainSet, training_epochs=10, batch_size=100, learning_rate=0.1, display_step=1): """Finetuing the network""" print("Start finetuning...") start_time = timeit.default_timer() train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize( self.finetune_cost, var_list=self.params) for epoch in range(training_epochs): avg_cost = 0.0 batch_num = int(trainSet.train.num_examples / batch_size) for i in range(batch_num): x_batch, y_batch = trainSet.train.next_batch(batch_size) # 训练 sess.run(train_op, feed_dict={self.x: x_batch, self.y: y_batch}) # 计算cost avg_cost += sess.run(self.finetune_cost, feed_dict= {self.x: x_batch, self.y: y_batch}) / batch_num # 输出 if epoch % display_step == 0: val_acc = sess.run(self.accuracy, feed_dict={self.x: trainSet.validation.images, self.y: trainSet.validation.labels}) print(" Epoch {0} cost: {1}, validation accuacy: {2}".format(epoch, avg_cost, val_acc)) end_time = timeit.default_timer() print("The finetuning process ran for {0}m".format((end_time - start_time) / 60))
def learnAndPredict(Ti, C, TOList): rng = np.random.RandomState(SEED) learning_rate = learning_rate0 print np.mean(Ti[1000,:]) aminW = np.amin(Ti[:1000,:]) amaxW = np.amax(Ti[:1000,:]) Ti[:1000,:] = (Ti[:1000,:] - aminW) / (amaxW - aminW) astdW = np.std(Ti[:1000,:]) ameanW = np.mean(Ti[:1000,:]) Ti[:1000,:] = (Ti[:1000,:] - ameanW) / astdW aminacW = np.amin(Ti[1000,:]) amaxacW = np.amax(Ti[1000,:]) print aminW, amaxW, aminacW, amaxacW Ti[1000,:] = (Ti[1000,:] - aminacW) / (amaxacW - aminacW) astdacW = np.std(Ti[1000,:]) ameanacW = np.mean(Ti[1000,:]) Ti[1000,:] = (Ti[1000,:] - ameanacW) / astdacW ile__ = len(TOList) ileList = np.zeros(ile__) for titer in range(len(TOList)): print np.mean(TOList[titer][1000,:]) TOList[titer][:1000,:] = (TOList[titer][:1000,:] - aminW)/(amaxW - aminW) TOList[titer][:1000,:] = (TOList[titer][:1000,:] - ameanW)/astdW TOList[titer][1000,:] = (TOList[titer][1000,:] - aminacW)/(amaxacW - aminacW) TOList[titer][1000,:] = (TOList[titer][1000,:] - ameanacW)/astdacW _, ileList[titer] = TOList[titer].shape _, ile = Ti.shape N = NN data = []; yyy = []; need = 1; BYL = {}; j= 0; dwa = 0; ONES = []; ZEROS = [] for i in range(NN): for j in range(NN): if i!= j: if C[i][j]==1: ONES.append((i,j)) else: ZEROS.append((i,j)) Nones = len(ONES) rng.shuffle(ONES) Nzeros = len(ZEROS) print Nones print Nzeros Needed = NUM_TRAIN/2 onesPerPair = Needed / Nones + 1 onesIter = 0 jj = 0 while jj < NUM_TRAIN: if jj%300000 == 0: print jj/300000, need = 1 - need if need == 1: pairNo = onesIter % Nones ppp = onesIter / Nones s,t = ONES[pairNo] shift = rng.randint(0, ile - L) onesIter += 1 if need == 0: zer = rng.randint(Nzeros) s,t = ZEROS[zer] del ZEROS[zer] Nzeros -= 1 shift = rng.randint(0, ile - L) x = np.hstack(( Ti[s][shift:shift+L], Ti[t][shift:shift+L], Ti[1000][shift:shift+L])) y = C[s][t] data.append(x); yyy.append(y) jj+=1 data = np.array(data, dtype=theano.config.floatX) is_train = np.array( ([0]*96 + [1,1,2,2]) * (NUM_TRAIN / 100)) yyy = np.array(yyy) train_set_x0, train_set_y0 = np.array(data[is_train==0]), yyy[is_train==0] test_set_x, test_set_y = np.array(data[is_train==1]), yyy[is_train==1] valid_set_x, valid_set_y = np.array(data[is_train==2]), yyy[is_train==2] n_train_batches = len(train_set_y0) / batch_size n_valid_batches = len(valid_set_y) / batch_size n_test_batches = len(test_set_y) / batch_size epoch = T.scalar() index = T.lscalar() x = T.matrix('x') inone2 = T.matrix('inone2') y = T.ivector('y') print '... building the model' #-------- my layers ------------------- #--------------------- layer0_input = x.reshape((batch_size, 1, 3, L)) Cx = 5 layer0 = ConvolutionalLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 3, L), filter_shape=(nkerns[0], 1, 2, Cx), poolsize=(1, 1), fac = 0) ONE = (3 - 2 + 1) / 1 L2 = (L - Cx + 1) / 1 #--------------------- Cx2 = 5 layer1 = ConvolutionalLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], ONE, L2), filter_shape=(nkerns[1], nkerns[0], 2, Cx2), poolsize=(1, 1), activation=ReLU, fac = 0) ONE = (ONE - 2 + 1) /1 L3 = (L2 - Cx2 + 1) /1 #--------------------- Cx3 = 1 layer1b = ConvolutionalLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], ONE, L3), filter_shape=(nkerns[2], nkerns[1], 1, Cx3), poolsize=(1, POOL), activation=ReLU, fac = 0) ONE = (ONE - 1 + 1) /1 L4 = (L3 - Cx3 + 1) /POOL REGx = 100 #--------------------- layer2_input = layer1b.output.flatten(2) print layer2_input.shape use_b = False layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[2]*L4 , n_out=REGx, activation=T.tanh, use_bias = use_b) layer3 = LogisticRegression(input=layer2.output, n_in=REGx, n_out=2) cost = layer3.negative_log_likelihood(y) out_x2 = theano.shared(np.asarray(np.zeros((N,L)), dtype=theano.config.floatX)) inone2 = theano.shared(np.asarray(np.zeros((1,L)), dtype=theano.config.floatX)) inone3 = theano.shared(np.asarray(np.zeros((1,L)), dtype=theano.config.floatX)) inone4 = theano.shared(np.asarray(np.zeros((1,L)), dtype=theano.config.floatX)) test_set_x = theano.shared(np.asarray(test_set_x, dtype=theano.config.floatX)) train_set_x = theano.shared(np.asarray(train_set_x0, dtype=theano.config.floatX)) train_set_y = T.cast(theano.shared(np.asarray(train_set_y0, dtype=theano.config.floatX)), 'int32') test_set_y = T.cast(theano.shared(np.asarray(test_set_y, dtype=theano.config.floatX)), 'int32') valid_set_y = T.cast(theano.shared(np.asarray(valid_set_y, dtype=theano.config.floatX)), 'int32') valid_set_x = theano.shared(np.asarray(valid_set_x, dtype=theano.config.floatX)) test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) mom_start = 0.5; mom_end = 0.98; mom_epoch_interval = n_epochs * 1.0 #### @@@@@@@@@@@ class_params0 = [layer3, layer2, layer1, layer1b, layer0] class_params = [ param for layer in class_params0 for param in layer.params ] gparams = [] for param in class_params: gparam = T.grad(cost, param) gparams.append(gparam) gparams_mom = [] for param in class_params: gparam_mom = theano.shared(np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) mom = ifelse(epoch < mom_epoch_interval, mom_start*(1.0 - epoch/mom_epoch_interval) + mom_end*(epoch/mom_epoch_interval), mom_end) updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam for param, gparam_mom in zip(class_params, gparams_mom): stepped_param = param + updates[gparam_mom] squared_filter_length_limit = 15.0 if param.get_value(borrow=True).ndim == 2: col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param output = cost train_model = theano.function(inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) keep = theano.function([index], layer3.errorsFull(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}, on_unused_input='warn') timer = time.clock() print "finished reading", (timer - start_time0) /60. , "minutes " # TRAIN MODEL # print '... training' validation_frequency = n_train_batches; best_params = None; best_validation_loss = np.inf best_iter = 0; test_score = 0.; epochc = 0; while (epochc < n_epochs): epochc = epochc + 1 learning_rate = learning_rate0 * (1.2 - ((1.0 * epochc)/n_epochs)) for minibatch_index in xrange(n_train_batches): iter = (epochc - 1) * n_train_batches + minibatch_index cost_ij = train_model(epochc, minibatch_index) if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print(' %i) err %.2f ' % (epochc, this_validation_loss/10)), L, nkerns, REGx, "|", Cx, Cx2, Cx3, batch_size if this_validation_loss < best_validation_loss or epochc % 30 == 0: best_validation_loss = this_validation_loss best_iter = iter test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epochc, minibatch_index + 1, n_train_batches, test_score/10)) ############ timel = time.clock() print "finished learning", (timel - timer) /60. , "minutes " ppm = theano.function([index], layer3.pred_proba_mine(), givens={ x: T.horizontal_stack(T.tile(inone2, (batch_size ,1)), out_x2[index * batch_size: (index + 1) * batch_size], T.tile(inone3, (batch_size ,1))), y: train_set_y[0 * (batch_size): (0 + 1) * (batch_size)] }, on_unused_input='warn') NONZERO = (N*N-N) gc.collect() RESList = [np.zeros((N,N)) for it in range(ile__)] for __net in range(ile__): TO = TOList[__net] ileO = ileList[__net] RES = RESList[__net] shift = 0.1 DELTAshift = (ileO-L) / (Q-1) print "DELTAshift:", DELTAshift for q in range (Q): dataO = []; print (q+1),"/", Q , " ", out_x2.set_value(np.asarray(np.array(TO[:,shift:shift+L]), dtype=theano.config.floatX)) PARTIAL = np.zeros((N,N)) inone3.set_value(np.asarray(np.array(TO[1000][shift:shift+L]).reshape(1,L), dtype=theano.config.floatX)) for i in range(N): inone2.set_value(np.asarray(np.array(TO[i][shift:shift+L]).reshape(1,L), dtype=theano.config.floatX)) p = [ppm(ii) for ii in xrange( N / batch_size)] for pos in range(N): if pos != i: PARTIAL[i][pos] += p[pos / batch_size][pos % batch_size][1] for i in range(N): for j in range(N): RES[i][j] += PARTIAL[i][j] shift += DELTAshift print "Finished", __net RESList[__net] = RES/np.max(RES) gc.collect() end_time = time.clock() print "finished predicting", (end_time - timel) /60. , "minutes ", str(nkerns), "using SEED = ", SEED print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time0) / 60.)) return RESList
padding="SAME") # [?, 28, 28, 32] layer0_pool = MaxPoolLayer(layer0_conv.output, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1]) # [?, 14, 14, 32] # conv and pool layer1 layer1_conv = ConvLayer(layer0_pool.output, filter_shape=[5, 5, 32, 64], strides=[1, 1, 1, 1], activation=tf.nn.relu, padding="SAME") # [?, 14, 14, 64] layer1_pool = MaxPoolLayer(layer1_conv.output, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1]) # [?, 7, 7, 64] # flatten layer layer2_flatten = FlattenLayer(layer1_pool.output, shape=[-1, 7*7*64]) # fully-connected layer layer3_fullyconn = HiddenLayer(layer2_flatten.output, n_in=7*7*64, n_out=256, activation=tf.nn.relu) # dropout layer layer3_dropout = DropoutLayer(layer3_fullyconn.output, keep_prob=0.5) # the output layer layer4_output = LogisticRegression(layer3_dropout.output, n_in=256, n_out=10) # params for training params = layer0_conv.params + layer1_conv.params + layer3_fullyconn.params + layer4_output.params # train dicts for dropout train_dicts = layer3_dropout.train_dicts # prediction dicts for dropout pred_dicts = layer3_dropout.pred_dicts # get cost cost = layer4_output.cost(y_) # accuracy accuracy = layer4_output.accuarcy(y_) predictor = layer4_output.y_pred # 定义训练器 train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(
mu_2 = [0, 1.5] sigma_1 = [[1, 0.75], [0.75, 1]] sigma_2 = [[1, 0.75], [0.75, 1]] train_x_0 = np.random.multivariate_normal(mu_1, sigma_1, size=500) train_x_1 = np.random.multivariate_normal(mu_2, sigma_2, size=500) train_y_0 = np.zeros(500) train_y_1 = np.ones(500) test_x_0 = np.random.multivariate_normal(mu_1, sigma_1, size=250) test_x_1 = np.random.multivariate_normal(mu_2, sigma_2, size=250) test_y_0 = np.zeros(250) test_y_1 = np.ones(250) train_x = np.concatenate((train_x_0, train_x_1), axis=0) train_y = np.concatenate((train_y_0, train_y_1), axis=0) test_x = np.concatenate((test_x_0, test_x_1), axis=0) test_y = np.concatenate((test_y_0, test_y_1), axis=0) modes = ["batch", "online"] lrs = [1, 0.1, 0.01, 0.001] for mode in modes: for lr in lrs: model = LogisticRegression(mode=mode, lr=lr) model.fit(train_x, train_y) preds = model.predict(test_x) print( f"Mode: {mode}, LR: {lr}, Iteration: {model.best_iter}, Accuracy: {model.accuracy(preds, test_y)}", end="\n\n") model.plot_boundary(test_x, test_y) model.plot_loss_history() model.plot_grad_history()
def __init__( self, numpy_rng, n_ins, n_outs, hidden_layers_sizes, corruption_levels=[0.1, 0.1], theano_rng=None ): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.n_ins=n_ins self.n_outs=n_outs # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer( rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid ) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.append(sigmoid_layer.theta) # Construct a denoising autoencoder that shared weights with this layer dA_layer = dA( numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], theta=sigmoid_layer.theta ) self.dA_layers.append(dA_layer) sda_input = T.matrix('sda_input') self.da_layers_output_size = hidden_layers_sizes[-1] self.get_da_output = theano.function( inputs=[sda_input], outputs=self.sigmoid_layers[-1].output.reshape((-1, self.da_layers_output_size)), givens={ self.x: sda_input } ) self.logLayer = LogisticRegression( rng = numpy.random.RandomState(), input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) #self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def __init__(self, rng, input, n_in, n_hidden, n_out): """Initialize the parameters for the multilayer perceptron rng (type: numpy.random.RandomState; content: a random number generator used to initialize weights) input (type: theano.tensor.TensorType; content: symbolic variable that describes the input of the architecture (one minibatch)) n_int (type: int; content: number of input units, the dimension of the space in which the datapoints lie) n_hidden (type: int; content: number of hidden units) n_out (type: int; content: number of output units, the dimension of the space in which the labels lie) """ ## ------------------------------------------------------------------------------------- ## Define instance of Hidden Layer, as self.hiddenLayer ## ------------------------------------------------------------------------------------- """ Since we are dealing with a one hidden layer MLP, this will translate into a HiddenLayer with a tanh activation function connected to the LogisticRegression layer; the activation function can be replaced by sigmoid or any other nonlinear function """ self.hiddenLayer = HiddenLayer( rng = rng, input = input, n_in = n_in, n_out = n_hidden, activation = T.tanh ) ## ------------------------------------------------------------------------------------- ## Define instance of Logistic Regression Layer, as self.logRegressionLayer ## ------------------------------------------------------------------------------------- """The logistic regression layer gets as input the hidden units of the hidden layer""" self.logRegressionLayer = LogisticRegression( input = self.hiddenLayer.output, n_in = n_hidden, n_out = n_out ) ## ------------------------------------------------------------------------------------- ## Regularisation ## ------------------------------------------------------------------------------------- """ We will also use L1 and L2 regularization. L1 and L2 regularization involve adding an extra term to the loss function, which penalizes certain parameter configurations. For this, we need to compute the L1 norm and the squared L2 norm of the weights. """ # L1 norm (enforcing L1 norm to be small) self.L1 = (abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum()) # Square of L2 norm (enforcing the square of L2 norm to be small) self.L2_sqr = ((self.hiddenLayer.W ** 2).sum() + (self.logRegressionLayer.W ** 2).sum()) ## ---------------------------------------------------------------------------------------- ## Calculate loss (negative log likelihood) ## ---------------------------------------------------------------------------------------- ## Negative log likelihood of the MLP is given by the negative log likelihood of the output ## of the model, computed in the logistic regression layer self.negative_log_likelihood = (self.logRegressionLayer.negative_log_likelihood) # Same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors ## ---------------------------------------------------------------------------------------- ## Output parameters ## ---------------------------------------------------------------------------------------- # The parameters of the model are the parameters of the two layer it is made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params # Keep track of model input self.input = input
def __init__(self, rng, input, n_in, n_hidden, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # end-snippet-2 start-snippet-3 # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = (abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = ((self.hiddenLayer.W**2).sum() + (self.logRegressionLayer.W**2).sum()) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood) # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params # end-snippet-3 # keep track of model input self.input = input
def evaluate_lenet5(datasets, learning_rate=0.1, n_epochs=10, nkerns=[20, 50], batch_size=2): """ Demonstrates lenet on MNIST dataset :param datasets: :param batch_size: :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 1, 47)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 1, 47), filter_shape=(nkerns[0], 1, 1, 6), poolsize=(1, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 1, 21), filter_shape=(nkerns[1], nkerns[0], 1, 6), poolsize=(1, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def train_logistic_regression(train, test, model_parameters=[1e5]): lr = LogisticRegression(train, model_parameters) lr.lg_train() print_report('Logistic Regression', test[:, -1], lr.lg_predict(test))
import matplotlib.pyplot as plt import os # set current directory os.chdir("F:\\neuralnetworksanddeeplearning\\codes") from logisticRegression import LogisticRegression from generalFunctions import plot_decision_boundary ###################################################################################### ## Generate a dataset a plot it np.random.seed(0) X, y = datasets.make_moons(200, noise=0.20) plt.scatter(x = X[:,0], y = X[:,1], c = y, cmap = plt.cm.Spectral) # The data is not linearly separable, this means that linear classifiers like logistic regression wont be # able to fit the data unless you hand-engineer non-linear features (such as polynomials). # In fact, that's one of the major advantages of Neural Networks. You don't need to worry about the # feature engineering. The hidden layer of neural network will learn the features for you. ###################################################################################### ## Logistic Regression num_features = 2 LR = LogisticRegression(dim = num_features) grads, costs = LR.train(X = X.T, Y = y.reshape(1, 200), print_cost = True, num_iterations = 5000, learning_rate = 0.01, plot_cost = True) plot_decision_boundary(lambda x: LR.predict(x), X = X, y = y) plt.title("Logistic Regression") # The graph shows the decision boundary learned by our logistic regression classifier. It separates # the data as good as it can using a straight line, but it's unable to capure the "moon-shape" of # our data.
padding="SAME") # [?, 14, 14, 64] layer1_pool = MaxPoolLayer(layer1_conv.output, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1]) # [?, 7, 7, 64] # flatten layer layer2_flatten = FlattenLayer(layer1_pool.output, shape=[-1, 7 * 7 * 64]) # fully-connected layer layer3_fullyconn = HiddenLayer(layer2_flatten.output, n_in=7 * 7 * 64, n_out=256, activation=tf.nn.relu) # dropout layer layer3_dropout = DropoutLayer(layer3_fullyconn.output, keep_prob=0.5) # the output layer layer4_output = LogisticRegression(layer3_dropout.output, n_in=256, n_out=10) # params for training params = layer0_conv.params + layer1_conv.params + layer3_fullyconn.params + layer4_output.params # train dicts for dropout train_dicts = layer3_dropout.train_dicts # prediction dicts for dropout pred_dicts = layer3_dropout.pred_dicts # get cost cost = layer4_output.cost(y_) # accuracy accuracy = layer4_output.accuarcy(y_) predictor = layer4_output.y_pred # 定义训练器
# Reshape the training and test examples train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T print("train_set_x_flatten shape: " + str(train_set_x_flatten.shape)) print("train_set_y shape: " + str(train_set_y.shape)) print("test_set_x_flatten shape: " + str(test_set_x_flatten.shape)) print("test_set_y shape: " + str(test_set_y.shape)) print("sanity check after reshaping: " + str(train_set_x_flatten[0:5, 0])) # Standerdize the dataset train_set_x = train_set_x_flatten / 255. test_set_x = test_set_x_flatten / 255. ################################################################################################### # Fit Logistic Model LR = LogisticRegression(dim=12288) grads, costs = LR.train(X=train_set_x, Y=train_set_y, num_iterations=2000, learning_rate=0.005, print_cost=True, plot_cost=True) # Prediction Accuracy for training data y_pred = LR.predict(X=train_set_x) LR.accuracy_stats(train_set_y, y_pred) # Prediction Accuracy for test data y_test_pred = LR.predict(X=test_set_x) LR.accuracy_stats(test_set_y, y_test_pred)
def train_logisticRegression(learning_rate=0.13, n_epochs=1000, dataset="mnist.pkl.gz", batch_size=600): ############################################################### # Get Data ############################################################### # Load datasets datasets = shared_dataset(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # Visualize some data samples plot_image(train_set_x.get_value(borrow=True)[10], 28, 28) plot_image(valid_set_x.get_value(borrow=True)[15], 28, 28) plot_image(test_set_x.get_value(borrow=True)[5], 28, 28) # Split sets into batches n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ############################################################### # Build model ############################################################### # Allocate symbolic variables index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # Build classifier classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # Define gradient descent cost = classifier.negative_log_likelihood(y) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) updates = [(classifier.W, classifier.W - g_W * learning_rate), (classifier.b, classifier.b - g_b * learning_rate)] # Test function test_model = theano.function( inputs=[index], outputs=classifier.errorRate(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # Validation function validate_model = theano.function( inputs=[index], outputs=classifier.errorRate(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # Training function train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############################################################### # Train Model ############################################################### print("Training the model...") patience = 5000 # look at this many batches regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for batch_index in range(n_train_batches): batch_avg_cost = train_model(batch_index) iter = (epoch - 1) * n_train_batches + batch_index if (iter + 1) % validation_frequency == 0: validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, batch %i/%i, validation error rate %f %%' % (epoch, batch_index + 1, n_train_batches, this_validation_loss * 100)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print(' epoch %i, batch %i/%i, test error rate %f %%' % (epoch, batch_index + 1, n_train_batches, test_score * 100)) with open('best_model.pkl', 'wb') as f: _pickle.dump(classifier, f) if (patience <= iter): done_looping = True break end_time = timeit.default_timer() print(('Optimization completed with best validation loss of %f %%,' 'with test score of %f %%.') % (best_validation_loss * 100., test_score * 100.)) print('The code ran for %d epochs, withiin %f seconds.' % (epoch, end_time - start_time))
class DBN(object): """ An implement of deep belief network The hidden layers are firstly pretrained by RBM, then DBN is treated as a normal MLP by adding a output layer. """ def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=[500, 500]): """ :param n_in: int, the dimension of input :param n_out: int, the dimension of output :param hidden_layers_sizes: list or tuple, the hidden layer sizes """ # Number of layers assert len(hidden_layers_sizes) > 0 self.n_layers = len(hidden_layers_sizes) self.layers = [] # normal sigmoid layer self.rbm_layers = [] # RBM layer self.params = [] # keep track of params for training # Define the input and output self.x = tf.placeholder(tf.float32, shape=[None, n_in]) self.y = tf.placeholder(tf.float32, shape=[None, n_out]) # Contruct the layers of DBN with tf.name_scope('DBN_layer'): for i in range(self.n_layers): if i == 0: layer_input = self.x input_size = n_in else: layer_input = self.layers[i - 1].output input_size = hidden_layers_sizes[i - 1] # Sigmoid layer with tf.name_scope('internel_layer'): sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) self.layers.append(sigmoid_layer) # Add the parameters for finetuning self.params.extend(sigmoid_layer.params) # Create the RBM layer with tf.name_scope('rbm_layer'): self.rbm_layers.append( RBM(inpt=layer_input, n_visiable=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b)) # We use the LogisticRegression layer as the output layer with tf.name_scope('output_layer'): self.output_layer = LogisticRegression( inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.output_layer.params) # The finetuning cost with tf.name_scope('output_loss'): self.cost = self.output_layer.cost(self.y) # The accuracy self.accuracy = self.output_layer.accuarcy(self.y) def pretrain(self, sess, train_x, batch_size=50, pretraining_epochs=10, lr=0.5, k=1, display_step=1): """ Pretrain the layers (just train the RBM layers) :param sess: tf.Session :param X_train: the input of the train set (You might modidy this function if you do not use the desgined mnist) :param batch_size: int :param lr: float :param k: int, use CD-k :param pretraining_epoch: int :param display_step: int """ print('Starting pretraining...\n') start_time = timeit.default_timer() # Pretrain layer by layer for i in range(self.n_layers): cost = self.rbm_layers[i].get_reconstruction_cost() train_ops = self.rbm_layers[i].get_train_ops(learning_rate=lr, k=k, persistent=None) batch_num = int(train_x.shape[0] / batch_size) for epoch in range(pretraining_epochs): avg_cost = 0.0 for step in range(batch_num - 1): # 训练 x_batch = train_x[batch_num * batch_size:(batch_num + 1) * batch_size] sess.run(train_ops, feed_dict={self.x: x_batch}) # 计算cost avg_cost += sess.run(cost, feed_dict={ self.x: x_batch, }) # 输出 if epoch % display_step == 0: print("\tPretraing layer {0} Epoch {1} cost: {2}".format( i, epoch, avg_cost)) end_time = timeit.default_timer() print("\nThe pretraining process ran for {0} minutes".format( (end_time - start_time) / 60)) def finetuning(self, sess, train_x, train_y, test_x, test_y, training_epochs=10, batch_size=100, lr=0.5, display_step=1): """ Finetuing the network """ print("\nStart finetuning...\n") start_time = timeit.default_timer() train_op = tf.train.GradientDescentOptimizer( learning_rate=lr).minimize(self.cost) batch_num = int(train_x.shape[0] / batch_size) merged = tf.summary.merge_all() writer = tf.summary.FileWriter("logs", sess.graph) for epoch in range(training_epochs): avg_cost = 0.0 for step in range(batch_num - 1): x_batch = train_x[batch_num * batch_size:(batch_num + 1) * batch_size] y_batch = train_y[batch_num * batch_size:(batch_num + 1) * batch_size] # 训练 sess.run(train_op, feed_dict={ self.x: x_batch, self.y: y_batch }) # 计算cost avg_cost += sess.run(self.cost, feed_dict={ self.x: x_batch, self.y: y_batch }) / batch_num # 输出 if epoch % display_step == 0: val_acc = sess.run(self.accuracy, feed_dict={ self.x: test_x, self.y: test_y }) print("\tEpoch {0} cost: {1}, validation accuacy: {2}".format( epoch, avg_cost, val_acc)) result = sess.run(merged, feed_dict={ self.x: test_x, self.y: test_y }) # 输出 writer.add_summary(result, epoch) end_time = timeit.default_timer() print("\nThe finetuning process ran for {0} minutes".format( (end_time - start_time) / 60))
import numpy as np from sklearn.metrics import precision_recall_fscore_support from logisticRegression import LogisticRegression POSITIVE_LABEL = 1 NEGATIVE_LABEL = 0 def make_prediction(algo, query): prob = algo.score(query) return POSITIVE_LABEL if prob > 0.5 else NEGATIVE_LABEL algo = LogisticRegression(model_root='.') testing_data = np.load('lr_test_16451.npz') X = testing_data['X'] Y_true = testing_data['Y'] num_matches = len(Y_true) Y_pred = np.zeros(num_matches) for i, match in enumerate(X): Y_pred[i] = make_prediction(algo, match) prec, recall, f1, support = precision_recall_fscore_support(Y_true, Y_pred, average='macro') print 'Precision: ', prec print 'Recall: ', recall print 'F1 Score: ', f1
class DBN(object): """ An implement of deep belief network The hidden layers are firstly pretrained by RBM, then DBN is treated as a normal MLP by adding a output layer. """ def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=[500, 500]): """ :param n_in: int, the dimension of input :param n_out: int, the dimension of output :param hidden_layers_sizes: list or tuple, the hidden layer sizes """ # Number of layers assert len(hidden_layers_sizes) > 0 self.n_layers = len(hidden_layers_sizes) self.layers = [] # normal sigmoid layer self.rbm_layers = [] # RBM layer self.params = [] # keep track of params for training # Define the input and output self.x = tf.placeholder(tf.float32, shape=[None, n_in]) self.y = tf.placeholder(tf.float32, shape=[None, n_out]) # Contruct the layers of DBN for i in range(self.n_layers): if i == 0: layer_input = self.x input_size = n_in else: layer_input = self.layers[i-1].output input_size = hidden_layers_sizes[i-1] # Sigmoid layer sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) self.layers.append(sigmoid_layer) # Add the parameters for finetuning self.params.extend(sigmoid_layer.params) # Create the RBM layer self.rbm_layers.append(RBM(inpt=layer_input, n_visiable=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b)) # We use the LogisticRegression layer as the output layer self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.output_layer.params) # The finetuning cost self.cost = self.output_layer.cost(self.y) # The accuracy self.accuracy = self.output_layer.accuarcy(self.y) def pretrain(self, sess, X_train, batch_size=50, pretraining_epochs=10, lr=0.1, k=1, display_step=1): """ Pretrain the layers (just train the RBM layers) :param sess: tf.Session :param X_train: the input of the train set (You might modidy this function if you do not use the desgined mnist) :param batch_size: int :param lr: float :param k: int, use CD-k :param pretraining_epoch: int :param display_step: int """ print('Starting pretraining...\n') start_time = timeit.default_timer() batch_num = int(X_train.train.num_examples / batch_size) # Pretrain layer by layer for i in range(self.n_layers): cost = self.rbm_layers[i].get_reconstruction_cost() train_ops = self.rbm_layers[i].get_train_ops(learning_rate=lr, k=k, persistent=None) for epoch in range(pretraining_epochs): avg_cost = 0.0 for j in range(batch_num): x_batch, _ = X_train.train.next_batch(batch_size) # 训练 sess.run(train_ops, feed_dict={self.x: x_batch}) # 计算cost avg_cost += sess.run(cost, feed_dict={self.x: x_batch,}) / batch_num # 输出 if epoch % display_step == 0: print("\tPretraing layer {0} Epoch {1} cost: {2}".format(i, epoch, avg_cost)) end_time = timeit.default_timer() print("\nThe pretraining process ran for {0} minutes".format((end_time - start_time) / 60)) def finetuning(self, sess, trainSet, training_epochs=10, batch_size=100, lr=0.1, display_step=1): """ Finetuing the network """ print("\nStart finetuning...\n") start_time = timeit.default_timer() train_op = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize( self.cost, var_list=self.params) for epoch in range(training_epochs): avg_cost = 0.0 batch_num = int(trainSet.train.num_examples / batch_size) for i in range(batch_num): x_batch, y_batch = trainSet.train.next_batch(batch_size) # 训练 sess.run(train_op, feed_dict={self.x: x_batch, self.y: y_batch}) # 计算cost avg_cost += sess.run(self.cost, feed_dict= {self.x: x_batch, self.y: y_batch}) / batch_num # 输出 if epoch % display_step == 0: val_acc = sess.run(self.accuracy, feed_dict={self.x: trainSet.validation.images, self.y: trainSet.validation.labels}) print("\tEpoch {0} cost: {1}, validation accuacy: {2}".format(epoch, avg_cost, val_acc)) end_time = timeit.default_timer() print("\nThe finetuning process ran for {0} minutes".format((end_time - start_time) / 60))
from sklearn.datasets import load_iris from logisticRegression import LogisticRegression import numpy as np if __name__=='__main__': data=load_iris() x=data.data y=data.target lr=LogisticRegression() lr.fit(x,y) pred=lr.predict(x) accur=np.sum(pred==y)*1.0/y.size print 'lr accuracy:%f'%accur
def learnAndPredict(Ti, C, TOList): rng = np.random.RandomState(SEED) learning_rate = learning_rate0 print np.mean(Ti[1000, :]) aminW = np.amin(Ti[:1000, :]) amaxW = np.amax(Ti[:1000, :]) Ti[:1000, :] = (Ti[:1000, :] - aminW) / (amaxW - aminW) astdW = np.std(Ti[:1000, :]) ameanW = np.mean(Ti[:1000, :]) Ti[:1000, :] = (Ti[:1000, :] - ameanW) / astdW aminacW = np.amin(Ti[1000, :]) amaxacW = np.amax(Ti[1000, :]) print aminW, amaxW, aminacW, amaxacW Ti[1000, :] = (Ti[1000, :] - aminacW) / (amaxacW - aminacW) astdacW = np.std(Ti[1000, :]) ameanacW = np.mean(Ti[1000, :]) Ti[1000, :] = (Ti[1000, :] - ameanacW) / astdacW ile__ = len(TOList) ileList = np.zeros(ile__) for titer in range(len(TOList)): print np.mean(TOList[titer][1000, :]) TOList[titer][:1000, :] = (TOList[titer][:1000, :] - aminW) / (amaxW - aminW) TOList[titer][:1000, :] = (TOList[titer][:1000, :] - ameanW) / astdW TOList[titer][1000, :] = (TOList[titer][1000, :] - aminacW) / (amaxacW - aminacW) TOList[titer][1000, :] = (TOList[titer][1000, :] - ameanacW) / astdacW _, ileList[titer] = TOList[titer].shape _, ile = Ti.shape N = NN data = [] yyy = [] need = 1 BYL = {} j = 0 dwa = 0 ONES = [] ZEROS = [] for i in range(NN): for j in range(NN): if i != j: if C[i][j] == 1: ONES.append((i, j)) else: ZEROS.append((i, j)) Nones = len(ONES) rng.shuffle(ONES) Nzeros = len(ZEROS) print Nones print Nzeros Needed = NUM_TRAIN / 2 onesPerPair = Needed / Nones + 1 onesIter = 0 jj = 0 while jj < NUM_TRAIN: if jj % 300000 == 0: print jj / 300000, need = 1 - need if need == 1: pairNo = onesIter % Nones ppp = onesIter / Nones s, t = ONES[pairNo] shift = rng.randint(0, ile - L) onesIter += 1 if need == 0: zer = rng.randint(Nzeros) s, t = ZEROS[zer] del ZEROS[zer] Nzeros -= 1 shift = rng.randint(0, ile - L) x = np.hstack((Ti[s][shift:shift + L], Ti[t][shift:shift + L], Ti[1000][shift:shift + L])) y = C[s][t] data.append(x) yyy.append(y) jj += 1 data = np.array(data, dtype=theano.config.floatX) is_train = np.array(([0] * 96 + [1, 1, 2, 2]) * (NUM_TRAIN / 100)) yyy = np.array(yyy) train_set_x0, train_set_y0 = np.array( data[is_train == 0]), yyy[is_train == 0] test_set_x, test_set_y = np.array(data[is_train == 1]), yyy[is_train == 1] valid_set_x, valid_set_y = np.array( data[is_train == 2]), yyy[is_train == 2] n_train_batches = len(train_set_y0) / batch_size n_valid_batches = len(valid_set_y) / batch_size n_test_batches = len(test_set_y) / batch_size epoch = T.scalar() index = T.lscalar() x = T.matrix('x') inone2 = T.matrix('inone2') y = T.ivector('y') print '... building the model' #-------- my layers ------------------- #--------------------- layer0_input = x.reshape((batch_size, 1, 3, L)) Cx = 5 layer0 = ConvolutionalLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 3, L), filter_shape=(nkerns[0], 1, 2, Cx), poolsize=(1, 1), fac=0) ONE = (3 - 2 + 1) / 1 L2 = (L - Cx + 1) / 1 #--------------------- Cx2 = 5 layer1 = ConvolutionalLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], ONE, L2), filter_shape=(nkerns[1], nkerns[0], 2, Cx2), poolsize=(1, 1), activation=ReLU, fac=0) ONE = (ONE - 2 + 1) / 1 L3 = (L2 - Cx2 + 1) / 1 #--------------------- Cx3 = 1 layer1b = ConvolutionalLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], ONE, L3), filter_shape=(nkerns[2], nkerns[1], 1, Cx3), poolsize=(1, POOL), activation=ReLU, fac=0) ONE = (ONE - 1 + 1) / 1 L4 = (L3 - Cx3 + 1) / POOL REGx = 100 #--------------------- layer2_input = layer1b.output.flatten(2) print layer2_input.shape use_b = False layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[2] * L4, n_out=REGx, activation=T.tanh, use_bias=use_b) layer3 = LogisticRegression(input=layer2.output, n_in=REGx, n_out=2) cost = layer3.negative_log_likelihood(y) out_x2 = theano.shared( np.asarray(np.zeros((N, L)), dtype=theano.config.floatX)) inone2 = theano.shared( np.asarray(np.zeros((1, L)), dtype=theano.config.floatX)) inone3 = theano.shared( np.asarray(np.zeros((1, L)), dtype=theano.config.floatX)) inone4 = theano.shared( np.asarray(np.zeros((1, L)), dtype=theano.config.floatX)) test_set_x = theano.shared( np.asarray(test_set_x, dtype=theano.config.floatX)) train_set_x = theano.shared( np.asarray(train_set_x0, dtype=theano.config.floatX)) train_set_y = T.cast( theano.shared(np.asarray(train_set_y0, dtype=theano.config.floatX)), 'int32') test_set_y = T.cast( theano.shared(np.asarray(test_set_y, dtype=theano.config.floatX)), 'int32') valid_set_y = T.cast( theano.shared(np.asarray(valid_set_y, dtype=theano.config.floatX)), 'int32') valid_set_x = theano.shared( np.asarray(valid_set_x, dtype=theano.config.floatX)) test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) mom_start = 0.5 mom_end = 0.98 mom_epoch_interval = n_epochs * 1.0 #### @@@@@@@@@@@ class_params0 = [layer3, layer2, layer1, layer1b, layer0] class_params = [param for layer in class_params0 for param in layer.params] gparams = [] for param in class_params: gparam = T.grad(cost, param) gparams.append(gparam) gparams_mom = [] for param in class_params: gparam_mom = theano.shared( np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) mom = ifelse( epoch < mom_epoch_interval, mom_start * (1.0 - epoch / mom_epoch_interval) + mom_end * (epoch / mom_epoch_interval), mom_end) updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam for param, gparam_mom in zip(class_params, gparams_mom): stepped_param = param + updates[gparam_mom] squared_filter_length_limit = 15.0 if param.get_value(borrow=True).ndim == 2: col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param output = cost train_model = theano.function( inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) keep = theano.function( [index], layer3.errorsFull(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }, on_unused_input='warn') timer = time.clock() print "finished reading", (timer - start_time0) / 60., "minutes " # TRAIN MODEL # print '... training' validation_frequency = n_train_batches best_params = None best_validation_loss = np.inf best_iter = 0 test_score = 0. epochc = 0 while (epochc < n_epochs): epochc = epochc + 1 learning_rate = learning_rate0 * (1.2 - ((1.0 * epochc) / n_epochs)) for minibatch_index in xrange(n_train_batches): iter = (epochc - 1) * n_train_batches + minibatch_index cost_ij = train_model(epochc, minibatch_index) if (iter + 1) % validation_frequency == 0: validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print(' %i) err %.2f ' % (epochc, this_validation_loss / 10) ), L, nkerns, REGx, "|", Cx, Cx2, Cx3, batch_size if this_validation_loss < best_validation_loss or epochc % 30 == 0: best_validation_loss = this_validation_loss best_iter = iter test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) print( (' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epochc, minibatch_index + 1, n_train_batches, test_score / 10)) ############ timel = time.clock() print "finished learning", (timel - timer) / 60., "minutes " ppm = theano.function( [index], layer3.pred_proba_mine(), givens={ x: T.horizontal_stack( T.tile(inone2, (batch_size, 1)), out_x2[index * batch_size:(index + 1) * batch_size], T.tile(inone3, (batch_size, 1))), y: train_set_y[0 * (batch_size):(0 + 1) * (batch_size)] }, on_unused_input='warn') NONZERO = (N * N - N) gc.collect() RESList = [np.zeros((N, N)) for it in range(ile__)] for __net in range(ile__): TO = TOList[__net] ileO = ileList[__net] RES = RESList[__net] shift = 0.1 DELTAshift = (ileO - L) / (Q - 1) print "DELTAshift:", DELTAshift for q in range(Q): dataO = [] print(q + 1), "/", Q, " ", out_x2.set_value( np.asarray(np.array(TO[:, shift:shift + L]), dtype=theano.config.floatX)) PARTIAL = np.zeros((N, N)) inone3.set_value( np.asarray(np.array(TO[1000][shift:shift + L]).reshape(1, L), dtype=theano.config.floatX)) for i in range(N): inone2.set_value( np.asarray(np.array(TO[i][shift:shift + L]).reshape(1, L), dtype=theano.config.floatX)) p = [ppm(ii) for ii in xrange(N / batch_size)] for pos in range(N): if pos != i: PARTIAL[i][pos] += p[pos / batch_size][pos % batch_size][1] for i in range(N): for j in range(N): RES[i][j] += PARTIAL[i][j] shift += DELTAshift print "Finished", __net RESList[__net] = RES / np.max(RES) gc.collect() end_time = time.clock() print "finished predicting", (end_time - timel) / 60., "minutes ", str( nkerns), "using SEED = ", SEED print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time0) / 60.)) return RESList
def evaluate_lenet5( learning_rate = 0.1, n_epochs = 200, dataset = 'mnist.pkl.gz', nkerns = [20, 50], batch_size = 500): """ learning_rate (type: float; content: learning rate used (factor for the stochastic gradient) n_epochs (type: int; content: maximal number of epochs to run the optimizer) dataset (type: string; content: path to the dataset used for training /testing (MNIST here)) nkerns (type: list of ints; content: number of kernels on each layer """ # Initialise random number (used to initialise weights) rng = numpy.random.RandomState(23455) ## -------------------------------------------------------------------------------------- ## Load MNIST data (using load_data() [defined above], and the dataset path) ## -------------------------------------------------------------------------------------- datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] # devided into training set... valid_set_x, valid_set_y = datasets[1] # validation set test_set_x, test_set_y = datasets[2] # and test set # Compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ######################################################################################### # BUILD THE MODEL # ######################################################################################### print('... building the model') # Allocate (initialise) symbolic variables and generate symbolic variables for input (x and y represent a minibatch) index = T.lscalar() # index to a [mini]batch (lscalar() returns a zero-dimension value) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels ## -------------------------------------------------------------------------------------- ## Define the FIRST layer ## -------------------------------------------------------------------------------------- # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) to a 4D tensor, # compatible with our LeNetConvPoolLayer. (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input = layer0_input, image_shape = (batch_size, 1, 28, 28), filter_shape = (nkerns[0], 1, 5, 5), poolsize = (2, 2) ) ## -------------------------------------------------------------------------------------- ## Define the SECOND layer ## -------------------------------------------------------------------------------------- # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) ## -------------------------------------------------------------------------------------- ## Define the THIRD layer ## -------------------------------------------------------------------------------------- # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh ) ## -------------------------------------------------------------------------------------- ## Define the FOURTH layer ## -------------------------------------------------------------------------------------- # Classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) ## -------------------------------------------------------------------------------------- ## Define cost and test functions ## -------------------------------------------------------------------------------------- cost = layer3.negative_log_likelihood(y) # Calulate the cost (negative_log_likelihood) # Compile a Theano function that computes the mistakes that are made by the model on a minibatch # Both for the test model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) # And for the validation model validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # Create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # Create a list of gradients for all model parameters grads = T.grad(cost, params) ## Specify how to update the parameters of the model """ train_model is a function that updates the model parameters by SGD. Since this model has many parameters, it would be tedious to manually create an update rule for each model parameter. We thus create the updates list by automatically looping over all (params[i], grads[i]) pairs. """ updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] # Compile a Theano function `train_model` that returns the cost, but at the same time updates # the parameter of the model based on the rules defined in `updates`. train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ######################################################################################### # TRAIN MODEL # ######################################################################################### print('... training the model') ## -------------------------------------------------------------------------------------- ## Define early-stopping parameters ## -------------------------------------------------------------------------------------- patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many minibatches before checking the network # on the validation set; in this case we check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() ## -------------------------------------------------------------------------------------- ## Start iterating loop (i.e. through multibatches for repeated SGD) ## -------------------------------------------------------------------------------------- epoch = 0 done_looping = False # Loop through epochs while (epoch < n_epochs) and (not done_looping): # n_epochs defined in definition of this large function epoch = epoch + 1 # Increment epoch on each loop # Loop through minibatches for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index # iteration number ## On every 100 iterations... if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) # When the iteration is fully divisible by the validation frequency if (iter + 1) % validation_frequency == 0: # Check for performance (zero-one loss) on validation data set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) # Print current validation test results print('epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # If we got the best validation score until now if this_validation_loss < best_validation_loss: # ...and if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # Save the best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # Test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) # Print test results print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ## ----------------------------------------------------------------- ## Save model parameters using cPickle ## ----------------------------------------------------------------- fname = 'bestCNNModel.pkl' saveFile = open(fname, 'wb') # model weights cPickle.dump(layer0.W, saveFile) cPickle.dump(layer0.b, saveFile) cPickle.dump(layer1.W, saveFile) cPickle.dump(layer1.b, saveFile) cPickle.dump(layer2.W, saveFile) cPickle.dump(layer2.b, saveFile) """ # hyperparameters and performance cPickle.dump(learning_rate, saveFile) cPickle.dump(best_validation_loss, saveFile) cPickle.dump(test_score, saveFile) cPickle.dump(test_losses, saveFile) cPickle.dump(nkerns, saveFile) cPickle.dump(n_epochs, saveFile) cPickle.dump(batch_size, saveFile) """ saveFile.close() # Else, if patience is expired if patience <= iter: done_looping = True # Break the loop break # Now that the loop has ended... end_time = timeit.default_timer() # note the time of loop ending # Print the ending results print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
# datingDataMat, datingLabels = kNN.file2matrix( # 'mechanicLearning\datingTestSet.txt') # dataLabel = [x*15 for x in datingLabels] # fig = plt.figure() # ax = fig.add_subplot(111) # ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1], # dataLabel, dataLabel) # plt.show() # kNN.datingClassTest() # vector = kNN.img2vector( # "D:\\computerScience\\python3.7\\mechanicLearning\\testDigits\\0_13.txt") # print(vector[0, 0:32]) # kNN.handwritingClassTest() # lf.test() # gdm.test(0.006) filePath = r"D:\computerScience\python3.7\mechanicLearning\ttt.txt" gd = LogisticRegression(filePath, ["x^1", "y^1"], step=1) # gd = PolynomialGradDesc(filePath, 3, step=0.2) gd.showResult() print(gd.predict([3, 3])) # print("f = ", func) # print(func.subs(sp.Symbol("x"), 0))
class DBN(object): def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=[500, 500]): self.n_layers = len(hidden_layers_sizes) self.layers = [] # normal sigmoid layer self.rbm_layers = [] # RBM layer self.params = [] self.n_in = n_in self.n_out = n_out # Define the input and output self.x = tf.placeholder(tf.float32, shape=[None, self.n_in * 2]) self.y = tf.placeholder(tf.float32, shape=[None, self.n_out * 2]) total_data = np.loadtxt( "/mnt/disk2/liuying/T-ITS/dataset/geolife/geolife_total") total_data = total_data[:, 2:4] self.scaler = MinMaxScaler().fit(total_data) self.checkpoint_times = 1 # Contruct the layers of DBN for i in range(self.n_layers): if i == 0: layer_input = self.x input_size = self.n_in * 2 else: layer_input = self.layers[i - 1].output input_size = hidden_layers_sizes[i - 1] # Sigmoid layer print("n_in:{0} n_out:{1}".format(input_size, hidden_layers_sizes[i])) sigmoid_layer = tf.layers.dense(inputs=layer_input, units=hidden_layers_sizes[i], activation=tf.nn.sigmoid, name="layer_1") self.layers.append(sigmoid_layer) # Add the parameters for finetuning self.params.extend(sigmoid_layer.params) # Create the RBM layer self.rbm_layers.append( RBM(inpt=layer_input, n_visiable=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b)) # We use the LogisticRegression layer as the output layer self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out * 2) self.params.extend(self.output_layer.params) # The finetuning cost self.cost = self.output_layer.cost(self.y) # The logistic regression output self.predictor = self.output_layer.output self.train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize( self.cost, var_list=self.params) self.saver = tf.train.Saver(tf.global_variables()) def mean_error(self, predict_output, real_output): """ compute mean error """ error = 0.0 predict_output = predict_output.reshape(-1, 2) real_output = real_output.reshape(-1, 2) for i in range(predict_output.shape[0]): distance = np.sqrt( np.square(predict_output[i][0] - real_output[i][0]) + np.square(predict_output[i][1] - real_output[i][1])) error += distance error /= predict_output.shape[0] return error def process_data_for_batch(self, data): processed_data = [] info_data = [] for i in range(len(data)): processed_data.append([elem[2] for elem in data[i]]) info_data.append( [data[i][len(data[i]) - 1][0], data[i][len(data[i]) - 1][1]]) return processed_data, info_data def next_batch(self, X, y, batch_size, step, forward_only=False): if step == len(X) / batch_size - 1: x_batch, X_info_data = self.process_data_for_batch(X[len(X) - batch_size:]) y_batch, y_info_data = self.process_data_for_batch(y[len(y) - batch_size:]) else: x_batch, X_info_data = self.process_data_for_batch( X[step * batch_size:(step + 1) * batch_size]) y_batch, y_info_data = self.process_data_for_batch( y[step * batch_size:(step + 1) * batch_size]) x_batch = np.array(x_batch, dtype=np.float32) y_batch = np.array(y_batch, dtype=np.float32) x_batch = x_batch.reshape(-1, 2) y_batch = y_batch.reshape(-1, 2) x_batch = self.scaler.transform(x_batch) y_batch = self.scaler.transform(y_batch) x_batch = x_batch.reshape(-1, self.n_in * 2) y_batch = y_batch.reshape(-1, self.n_out * 2) return x_batch, y_batch def pretrain(self, sess, dataset, batch_size=50, pretraining_epochs=10, lr=0.01, k=1, display_step=1): """ Pretrain the layers (just train the RBM layers) """ train_X = dataset[0] train_y = dataset[1] print('Starting pretraining...\n') batch_num = len(train_X) / batch_size print("batch_num {0}".format(batch_num)) # Pretrain layer by layer for i in range(self.n_layers): cost = self.rbm_layers[i].get_reconstruction_cost() train_ops = self.rbm_layers[i].get_train_ops(learning_rate=lr, k=k, persistent=None) for epoch in range(pretraining_epochs): avg_cost = 0.0 for step in range(batch_num): x_batch, _ = self.next_batch(train_X, train_y, batch_size, step) # train sess.run(train_ops, feed_dict={self.x: x_batch}) # compute cost tmp_cost = sess.run(cost, feed_dict={ self.x: x_batch, }) avg_cost += tmp_cost / batch_num if (step + 1) % 500 == 0: print( "\t\t\tPretraing layer {0} Epoch {1} Step {2} cost: {3}" .format((i + 1), (epoch + 1), (step + 1), tmp_cost)) # output if epoch % display_step == 0: print("\tPretraing layer {0} Epoch {1} cost: {2}".format( (i + 1), (epoch + 1), avg_cost)) def finetuning(self, sess, dataset, training_epochs=10, start=0, batch_size=100, display_step=1, model_path="", model_name="model", load_model=0): """ Finetuing the network """ train_X = dataset[0] train_y = dataset[1] test_X = dataset[2] test_y = dataset[3] print("\nStart finetuning...\n") best_sess = sess global_test_error = 100000000 tolerance_count = 0 for epoch in range(start, training_epochs): avg_cost = 0.0 batch_num = len(train_X) / batch_size for step in range(batch_num): x_batch, y_batch = self.next_batch(train_X, train_y, batch_size, step) # train sess.run(self.train_op, feed_dict={ self.x: x_batch, self.y: y_batch }) # compute cost avg_cost += sess.run(self.cost, feed_dict={ self.x: x_batch, self.y: y_batch }) / batch_num print "epoch:", epoch + 1, "loss: ", avg_cost if (epoch + 1) % self.checkpoint_times == 0: count = 0 final_error = 0.0 batch_num = len(test_X) / batch_size for step in range(batch_num): x_batch, y_batch = self.next_batch(test_X, test_y, batch_size, step) count += 1 predict = sess.run(self.predictor, feed_dict={ self.x: x_batch, self.y: y_batch }) error = self.mean_error(predict, y_batch) final_error += error test_error = (final_error / count) * 10000 print "final mean error(x10000):", test_error if test_error < global_test_error: tolerance_count = 0 global_test_error = test_error self.saver.save( best_sess, os.path.join(model_path, model_name + "best_model.ckpt")) else: tolerance_count += 1 print "The global min test error:", global_test_error if tolerance_count >= 50: break print 'The final final final global min test error:', global_test_error def test(self, sess, dataset, batch_size=100): test_X = dataset[2] test_y = dataset[3] count = 0 final_error = 0.0 batch_num = len(test_X) / batch_size for step in range(batch_num): x_batch, y_batch = self.next_batch(test_X, test_y, batch_size, step) count += 1 predict = sess.run(self.predictor, feed_dict={ self.x: x_batch, self.y: y_batch }) error = self.mean_error(predict, y_batch) final_error += error print("\nTest step :{0}, mean_error:{1}".format(step, error)) final_error /= count print "final mean error:", final_error