def fit(self, Xtrain, ytrain, epoch=1000, learning_rate=0.01, L2_regulation=0.0): ''' Build model :param Xtrain: a set of observations :param ytrain: labels :param epoch: :param learning_rate: :return: ''' K = np.amax(ytrain) + 1 Y = utils.convert2indicator(ytrain) N, D = Xtrain.shape self.W1, self.b1, self.W2, self.b2 = self.initializeWeights( K, D, self.M) l_cost = list() l_iterations = list() l_score = list() for i in range(0, epoch): # update weights print('Epoch ' + str(i)) # compute score Yhat, Z = self.predict(Xtrain) yhat = np.argmax(Yhat, axis=1) yindex = np.argmax(Y, axis=1) score = np.mean(yhat == yindex) l_score.append(score) print('Score: ' + str(score)) cost = self.cost(Yhat, Y) l_cost.append(cost) l_iterations.append(i) print('Cost: ' + str(cost)) # full gradient descent # compute the gradient over the whole data startRange = 0 endRange = N gradient_W2, gradient_b2 = self.updateW2andb2( self.W2, self.b2, Y, Yhat, Z, N, K, self.M, startRange, endRange) gradient_W1, gradient_b1 = self.updateW1andb1( self.W1, self.W2, self.b1, Xtrain, D, Y, Yhat, Z, N, K, self.M, startRange, endRange) self.W1 += learning_rate * (gradient_W1 + L2_regulation * self.W1) self.b1 += learning_rate * (gradient_b1 + L2_regulation * self.b1) self.W2 += learning_rate * (gradient_W2 + L2_regulation * self.W2) self.b2 += learning_rate * (gradient_b2 + L2_regulation * self.b2) print('\n') return l_cost, l_iterations, l_score
def fit(self, Xtrain, ytrain, epoch=1000): K = np.amax(ytrain) + 1 # get the number of classes Y = utils.convert2indicator(ytrain) N, D = Xtrain.shape layers = self.initializeLayers(nFeatures=D, nClasses=K, hiddenLayersSize=self.hiddenLayersSize) # initialize placeholders tf_X = tf.placeholder(dtype=tf.float32, name='X', shape=(N, D)) tf_Y = tf.placeholder(dtype=tf.float32, name='Y', shape=(N, K)) # define symbolic formula tf_Yhat = self.forward(tf_X, layers) tf_cost = tf.math.reduce_sum( -1 * tf.multiply(tf_Y, tf.log(tf_Yhat))) # cross-entropy tf_train = tf.train.GradientDescentOptimizer( learning_rate=0.001).minimize(tf_cost) tf_yhat = tf.math.argmax(tf_Yhat, axis=1) scores = [] iterations = [] with tf.Session() as session: session.run(tf.global_variables_initializer()) for i in range(epoch): print('iteration ' + str(i)) session.run(tf_train, feed_dict={tf_X: Xtrain, tf_Y: Y}) yhat = session.run(tf_yhat, feed_dict={tf_X: Xtrain, tf_Y: Y}) score = np.mean(yhat == ytrain) print('score: ' + str(score)) iterations.append(i) scores.append(score) print() self.plot(scores, iterations)
def fit(self, X, y, epoch=1000): K = np.amax(y) + 1 Y = utils.convert2indicator(y) N, D = X.shape tf_X, tf_Y = self.initializePlaceholder() tf_W1, tf_b1, tf_W2, tf_b2 = self.initializeWeights(D, self.M, K) # define symbolic operations tf_Yhat = self.forward(tf_X, tf_W1, tf_b1, tf_W2, tf_b2) tf_cost = tf.math.reduce_sum( -1 * tf.multiply(tf_Y, tf.log(tf_Yhat))) # cross-entropy tf_train = tf.train.GradientDescentOptimizer( learning_rate=0.001).minimize(tf_cost) tf_yhat = tf.math.argmax(tf_Yhat, axis=1) scores = [] iterations = [] with tf.Session() as session: session.run(tf.global_variables_initializer()) for i in range(epoch): print('iteration ' + str(i)) session.run(tf_train, feed_dict={tf_X: X, tf_Y: Y}) cost = session.run(tf_cost, feed_dict={tf_X: X, tf_Y: Y}) print("Cost = " + str(cost)) yhat = session.run(tf_yhat, feed_dict={tf_X: X, tf_Y: Y}) score = np.mean(yhat == y) print('Score = ' + str(score)) iterations.append(i) scores.append(score) print() self.plot(scores, iterations)
def fit(self, Xtrain, ytrain, learning_rate=0.001, epoch=20, batch_size=100): N = Xtrain.shape[0] self.layers = self.initializeLayers(self.D, self.K, self.hiddenLayersSize) # STEP 1: greedy layer-wise training of autoencoders input_autoencoder = Xtrain for layer in self.layers[:-1]: print('Pretraining layer = (' + str(layer.M1) + ', ' + str(layer.M2) + ')') layer.fit(input_autoencoder) input_autoencoder = layer.Z # STEP 2 print('Fit model') self.tf_X = tf.placeholder(dtype=tf.float64) tf_Y = tf.placeholder(dtype=tf.float64) Ytrain = utils.convert2indicator(ytrain) self.tf_Yhat = self.forward(self.tf_X) tf_cost = tf.math.reduce_sum(-tf_Y * tf.math.log(self.tf_Yhat)) train_op = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(tf_cost) self.session = tf.Session() self.session.run(tf.global_variables_initializer()) iteration = 0 iterations = [] costs = [] nBatches = np.int(np.round(N * 1.0 / batch_size - 0.5)) for i in range(epoch): for j in range(nBatches + 1): iterations.append(iteration) # mini-batch gradient descent trainingCost = 0 if j == nBatches: _, trainingCost = self.session.run( (train_op, tf_cost), feed_dict={ self.tf_X: Xtrain[j * nBatches:N], tf_Y: Ytrain[j * nBatches:N] }) else: _, trainingCost = self.session.run( (train_op, tf_cost), feed_dict={ self.tf_X: Xtrain[j * nBatches:(j + 1) * nBatches], tf_Y: Ytrain[j * nBatches:(j + 1) * nBatches] }) # just for testing costs.append(trainingCost) print("Training. Epoch " + str(i) + "/ Iteration " + str(iteration) + "/ Training error = " + str(trainingCost / len(Xtrain))) iteration += 1
def fit(self, Xtrain, ytrain, strategy): ''' Build model :param Xtrain: a set of observations :param ytrain: labels :param epoch: :param learning_rate: :return: ''' epoch = strategy['epoch'] learning_rate = strategy['learning_rate'] L2_regulation = strategy['L2_regulation'] Y = utils.convert2indicator(ytrain) K = np.amax(ytrain) + 1 # get number of classes N, D = Xtrain.shape self.W1, self.b1, self.W2, self.b2 = self.initializeWeights( K, D, self.M) # for logging l_cost = list() l_iterations = list() l_score = list() iteration = -1 self.cache_W2 = 1 # adagrad, rmsprop self.cache_b2 = 1 # adgrad, rmsprop self.cache_W1 = 1 # adgrad, rmsprop self.cache_b1 = 1 # adarad, rmsprop self.v_W1 = 0 # momentum self.v_b1 = 0 # momentum self.v_W2 = 0 # momentum self.v_b2 = 0 # momentum for i in range(0, epoch): # update weights for j in range(0, N): # for each epoch, run over all samples separately print('Epoch ' + str(i)) iteration += 1 print('Iteration ' + str(iteration)) print('Learning rate: ' + str(learning_rate)) # compute score Yhat, Z = self.predict(Xtrain) yhat = np.argmax(Yhat, axis=1) yindex = np.argmax(Y, axis=1) score = np.mean(yhat == yindex) l_score.append(score) print('Score: ' + str(score)) cost = self.cost(Yhat, Y) l_cost.append(cost) l_iterations.append(iteration) print('Cost: ' + str(cost)) # compute the gradient at a single observation startRange = j endRange = j + 1 print('Choose observation ' + str(startRange)) gradient_W2, gradient_b2 = self.updateW2andb2( self.W2, self.b2, Y, Yhat, Z, N, K, self.M, startRange, endRange) gradient_W1, gradient_b1 = self.updateW1andb1( self.W1, self.W2, self.b1, Xtrain, D, Y, Yhat, Z, N, K, self.M, startRange, endRange) print('Update weights') # update learning rate if strategy['name'] == 'STEP_DECAY': if iteration >= 1 and iteration % strategy['step'] == 0: learning_rate = learning_rate / strategy['factor'] self.W1 += learning_rate * (gradient_W1 + L2_regulation * self.W1) self.b1 += learning_rate * (gradient_b1 + L2_regulation * self.b1) self.W2 += learning_rate * (gradient_W2 + L2_regulation * self.W2) self.b2 += learning_rate * (gradient_b2 + L2_regulation * self.b2) elif strategy['name'] == 'ADAGRAD': self.cache_b1 += gradient_b1 * gradient_b1 self.b1 += learning_rate * ( gradient_b1 + L2_regulation * self.b1) / ( np.sqrt(self.cache_b1) + strategy['epsilon']) self.cache_W1 += gradient_W1 * gradient_W1 self.W1 += learning_rate * ( gradient_W1 + L2_regulation * self.W1) / ( np.sqrt(self.cache_W1) + strategy['epsilon']) self.cache_b2 += gradient_b2 * gradient_b2 self.b2 += learning_rate * ( gradient_b2 + L2_regulation * self.b2) / ( np.sqrt(self.cache_b2) + strategy['epsilon']) self.cache_W2 += gradient_W2 * gradient_W2 self.W2 += learning_rate * ( gradient_W2 + L2_regulation * self.W2) / ( np.sqrt(self.cache_W2) + strategy['epsilon']) elif strategy['name'] == 'CONSTANT': self.W1 += learning_rate * (gradient_W1 + L2_regulation * self.W1) self.b1 += learning_rate * (gradient_b1 + L2_regulation * self.b1) self.W2 += learning_rate * (gradient_W2 + L2_regulation * self.W2) self.b2 += learning_rate * (gradient_b2 + L2_regulation * self.b2) elif strategy['name'] == 'RMSPROP': self.cache_b1 = strategy['decay_rate'] * self.cache_b1 + ( 1 - strategy['decay_rate']) * gradient_b1 * gradient_b1 self.b1 += learning_rate * ( gradient_b1 + L2_regulation * self.b1) / ( np.sqrt(self.cache_b1) + strategy['epsilon']) self.cache_W1 = strategy['decay_rate'] * self.cache_W1 + ( 1 - strategy['decay_rate']) * gradient_W1 * gradient_W1 self.W1 += learning_rate * ( gradient_W1 + L2_regulation * self.W1) / ( np.sqrt(self.cache_W1) + strategy['epsilon']) self.cache_b2 = strategy['decay_rate'] * self.cache_b2 + ( 1 - strategy['decay_rate']) * gradient_b2 * gradient_b2 self.b2 += learning_rate * ( gradient_b2 + L2_regulation * self.b2) / ( np.sqrt(self.cache_b2) + strategy['epsilon']) self.cache_W2 = strategy['decay_rate'] * self.cache_W2 + ( 1 - strategy['decay_rate']) * gradient_W2 * gradient_W2 self.W2 += learning_rate * ( gradient_W2 + L2_regulation * self.W2) / ( np.sqrt(self.cache_W2) + strategy['epsilon']) elif strategy['name'] == 'MOMENTUM': self.v_W1 = strategy['mu'] * self.v_W1 + learning_rate * ( gradient_W1 + L2_regulation * self.W1) self.W1 += self.v_W1 self.v_b1 = strategy['mu'] * self.v_b1 + learning_rate * ( gradient_b1 + L2_regulation * self.b1) self.b1 += self.v_b1 self.v_W2 = strategy['mu'] * self.v_W2 + learning_rate * ( gradient_W2 + L2_regulation * self.W2) self.W2 += self.v_W2 self.v_b2 = strategy['mu'] * self.v_b2 + learning_rate * ( gradient_b2 + L2_regulation * self.b2) self.b2 += self.v_b2 print('\n') return l_cost, l_iterations, l_score
def main(): Xtrain, ytrain = utils.readTrainingDigitRecognizer( '../data/digit-recognizer/train.csv') Ytrain = utils.convert2indicator(ytrain) build_model(Xtrain, Ytrain)
def fit(self, Xtrain, ytrain, Xval, yval, epoch=20, learning_rate=0.001, batch_size=30): """ train model :param Xtrain: observations' input :param ytrain: observations' label :param epoch: the number of epoch for training :return: """ K = np.amax(ytrain) + 1 # get the number of classes Ytrain = utils.convert2indicator(ytrain) Yval = utils.convert2indicator(yval) N, D = Xtrain.shape layers = self.initializeLayers(nFeatures=D, nClasses=K, hiddenLayersSize=self.hiddenLayersSize) # initialize placeholders tf_X = tf.placeholder(dtype=tf.float32, name='X') tf_Y = tf.placeholder(dtype=tf.float32, name='Y') # define symbolic formula tf_Yhat_training = self.forward_train( tf_X, layers, self.pkeep) # backpropogation during training tf_cost_training = tf.math.reduce_sum(-1 * tf.multiply( tf_Y, tf.log(tf_Yhat_training + 1e-4))) # cross-entropy tf_train = tf.train.GradientDescentOptimizer( learning_rate=learning_rate).minimize(tf_cost_training) # we do not use dropout when testing tf_Yhat_testing = self.forward_test( tf_X, layers) # backpropogation during testing tf_cost_testing = tf.math.reduce_sum( -1 * tf.multiply(tf_Y, tf.log(tf_Yhat_testing + 1e-4))) # cross-entropy tf_yhat = tf.math.argmax(tf_Yhat_training, axis=1) # just for plotting trainingErrors = [] validationErrors = [] trainingAccuracies = [] validationAccuracies = [] iterations = [] with tf.Session() as session: session.run(tf.global_variables_initializer()) iteration = 0 nBatches = np.int(np.round(N * 1.0 / batch_size - 0.5)) for i in range(epoch): for j in range(nBatches + 1): print('iteration ' + str(iteration)) iterations.append(iteration) iteration += 1 # mini-batch gradient descent if j == nBatches: session.run(tf_train, feed_dict={ tf_X: Xtrain[j * nBatches:N], tf_Y: Ytrain[j * nBatches:N] }) else: session.run(tf_train, feed_dict={ tf_X: Xtrain[j * nBatches:(j + 1) * nBatches], tf_Y: Ytrain[j * nBatches:(j + 1) * nBatches] }) yhat = session.run(tf_yhat, feed_dict={ tf_X: Xtrain, tf_Y: Ytrain }) accuracy = np.mean(yhat == ytrain) print("training accuracy: " + str(accuracy)) trainingAccuracies.append(accuracy) yhat = session.run(tf_yhat, feed_dict={ tf_X: Xval, tf_Y: Yval }) accuracy = np.mean(yhat == yval) print("validation accuracy: " + str(accuracy)) validationAccuracies.append(accuracy) trainingError = session.run(tf_cost_testing, feed_dict={ tf_X: Xtrain, tf_Y: Ytrain }) / len(Xtrain) print('training error: ' + str(trainingError)) trainingErrors.append(trainingError) validationError = session.run(tf_cost_testing, feed_dict={ tf_X: Xval, tf_Y: Yval }) / len(Xval) print('validation error: ' + str(validationError)) validationErrors.append(validationError) print() self.plotError(trainingErrors, validationErrors, iterations) self.plotAccuracy(trainingAccuracies, validationAccuracies, iterations)