def main(): X, Y = get_ecommerce(user_action=None) X, Y = shuffle(X, Y) N, D = X.shape Y = Y.astype(np.int32) K = len(np.unique(Y)) Ntrain = N - 100 + 1 Xtrain, Ytrain = X[:Ntrain, :], Y[:Ntrain] Ytrain_ind = y2indicator(Ytrain, K) Ntest = 100 Xtest, Ytest = X[-Ntest:, :], Y[-Ntest:] Ytest_ind = y2indicator(Ytest, K) # params lr = 5e-3 max_iteration = 10000 W = np.random.randn(D, K) / np.sqrt(D + K) b = np.zeros(K) train_costs = [] test_costs = [] for i in xrange(max_iteration): pYtrain, pYtest = forward(W, b, Xtrain), forward(W, b, Xtest) # Ytrain = predict(pYtrain) ctrain = cross_entropy(Ytrain_ind, pYtrain) ctest = cross_entropy(Ytest_ind, pYtest) train_costs.append(ctrain) test_costs.append(ctest) W -= lr * Xtrain.T.dot(pYtrain - Ytrain_ind) b -= lr * (pYtrain - Ytrain_ind).sum(axis=0) if i % 1000 == 0: print "i=%d\ttrain cost=%.3f\ttest error=%.3f" % (i, ctrain, ctest) print "i=%d\ttrain cost=%.3f\ttest error=%.3f" % (max_iteration, ctrain, ctest) print "Final train classification rate", classification_rate( Ytrain, predict(pYtrain)) print "Final test classification rate", classification_rate( Ytest, predict(pYtest)) plt.title('logistic regression + softmax') plt.xlabel('iterations') plt.ylabel('training costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(test_costs, label='test cost') plt.legend([ legend1, legend2, ]) plt.show()
def fit(self, X, Y, learning_rate=10e-8, reg=10e-8, epochs=10000, show_figure=False): X, Y = shuffle(X, Y) K = len(set(Y)) Xvalid, Yvalid = X[-1000:], Y[-1000:] Tvalid = y2indicator(Yvalid, K) X, Y = X[:-1000], Y[:-1000] N, D = X.shape T = y2indicator(Y, K) self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M + K) self.b2 = np.zeros(K) costs = [] best_validation_error = 1 for i in xrange(epochs): pY, Z = self.forward(X) # gradient descent step self.W2 -= learning_rate * (Z.T.dot(pY - T) + reg * self.W2) self.b2 -= learning_rate * ((pY - T).sum(axis=0) + reg * self.b2) self.W1 -= learning_rate * (X.T.dot( (pY - T).dot(self.W2.T) * Z * (1 - Z)) + reg * self.W1) self.b1 -= learning_rate * (((pY - T).dot(self.W2.T) * Z * (1 - Z)).sum(axis=0) + reg * self.b1) if i % 10 == 0: pYvalid, Zvalid = self.forward(Xvalid) c = cost(Tvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) print "i", i, "cost:", c, "error", e if e < best_validation_error: best_validation_error = e print "best_validation_error:", best_validation_error if show_figure: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=10e-6, reg=10e-7, epochs=1000, show_figure=False): X, Y = shuffle(X, Y) x_valid = X[-10:] y_valid = Y[-10:] t_valid = utils.y2indicator(y_valid) x = X[:-10] y = Y[:-10] t = utils.y2indicator(y) N, D = x.shape K = len(set(y)) self.W1 = np.random.randn(D, self.M) self.b1 = np.random.randn(self.M) self.W2 = np.random.randn(self.M, K) self.b2 = np.random.randn(K) costs = [] for i in range(epochs): pY, Z = self.forward(x) #Updating Weights D = pY - t self.W2 -= learning_rate * (Z.T.dot(D) + reg * self.W2) self.b2 -= learning_rate * (D.sum() + reg * self.b2) dZ = D.dot(self.W2.T) * Z * (1 - Z) self.W1 -= learning_rate * (x.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (dZ.sum() + reg * self.b1) if i % 10 == 0: pY_valid, _ = self.forward(x_valid) c = utils.cost(t_valid, pY_valid) costs.append(c) e = utils.error_rate(y_valid, np.argmax(pY_valid, axis=1)) print("i:", i, " cost: ", c, " error: ", e) if show_figure: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=0.01, epochs=1000, show_figure=False): X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.float32) X_valid = X[-10:] Y_valid = Y[-10:] T_valid = utils.y2indicator(Y_valid) X = X[:-10] Y = Y[:-10] T = utils.y2indicator(Y) N, D = X.shape K = len(set(Y)) tfX = tf.placeholder(tf.float32, [None, D]) tfY = tf.placeholder(tf.float32, [None, K]) self.W1 = self.init_weights([D, self.M]) self.b1 = self.init_weights([self.M]) self.W2 = self.init_weights([self.M, K]) self.b2 = self.init_weights([K]) py_x = self.forward(X) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tfY, logits=py_x)) tf.summary.scalar('cost', cost) train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize( cost) predict_op = tf.argmax(py_x, 1) sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) for i in range(epochs): sess.run(train_op, feed_dict={tfX: X, tfY: T}) prediction = sess.run(predict_op, feed_dict={ tfX: X_valid, tfY: T_valid }) if i % 10 == 0: print("i: ", i, "accuracy: ", np.mean(Y == prediction))
def fit(self, X, Y): X, Y = shuffle(X, Y) totalSampleCount, _ = X.shape testTrainSeperationIndex = int(self.testTrainSeperatingFactor * totalSampleCount) Xvalid, Yvalid = X[-testTrainSeperationIndex:], Y[ -testTrainSeperationIndex:] X, Y = X[:-testTrainSeperationIndex], Y[:-testTrainSeperationIndex] numberOfSamples, featureVectorSize = X.shape classesCount = len(set(Y)) target = y2indicator(Y) #input to hidden layer weights and biases self.W1 = np.random.randn( featureVectorSize, self.numberOfHiddenLayerNeurons) / np.sqrt( featureVectorSize + self.numberOfHiddenLayerNeurons) self.b1 = np.zeros(self.numberOfHiddenLayerNeurons) #hidden layer to output weights and biases self.W2 = np.random.randn( self.numberOfHiddenLayerNeurons, classesCount) / np.sqrt(self.numberOfHiddenLayerNeurons + classesCount) self.b2 = np.zeros(classesCount) costs = [] bestValidationError = 1 for i in range(self.epochs): # forward propagation and cost calculation output, hiddenLayerOutput = self.forward(X) # gradient descent step distance = target - output self.W2 += self.learningRate * (hiddenLayerOutput.T.dot(distance) + self.reg * self.W2) self.b2 += self.learningRate * (distance.sum(axis=0) + self.reg * self.b2) dOutput = distance.dot(self.W2.T) * (hiddenLayerOutput > 0) # relu self.W1 += self.learningRate * (X.T.dot(dOutput) + self.reg * self.W1) self.b1 += self.learningRate * (dOutput.sum(axis=0) + self.reg * self.b1) if i % 10 == 0: pYvalid, _ = self.forward(Xvalid) c = cost2(Yvalid, pYvalid) costs.append(c) e = errorRate(Yvalid, np.argmax(pYvalid, axis=1)) print("i:", i, "cost:", c, "error:", e) if e < bestValidationError: bestValidationError = e print("bestValidationError:", bestValidationError) if self.showFigure: plt.plot(costs) plt.show()
def main(): X, Y = get_ecommerce(user_action=None) X, Y = shuffle(X, Y) # Running variables learning_rate = 5e-4 max_iterations = 10000 # Define dimensions N, D = X.shape M = 5 K = len(np.unique(Y)) Ntrain = N - 100 Xtrain, Ytrain = X[:Ntrain, :], Y[:Ntrain] Ytrain_ind = y2indicator(Ytrain, K) Ntest = 100 Xtest, Ytest = X[-Ntest:, :], Y[-Ntest:] Ytest_ind = y2indicator(Ytest, K) W1_init = np.random.randn(D, M) / np.sqrt(M + D) b1_init = np.random.randn(M) / np.sqrt(M) W2_init = np.random.randn(M, K) / np.sqrt(M + K) b2_init = np.random.randn(K) / np.sqrt(K) #Define theano shared W1 = theano.shared(W1_init, 'W1') b1 = theano.shared(b1_init, 'b1') W2 = theano.shared(W2_init, 'W2') b2 = theano.shared(b2_init, 'b2') #Define constant tensor matrices thX = T.matrix('X') thT = T.matrix('T') #Define cost thZ = sigmoid(thX.dot(W1) + b1) thY = softmax(thZ.dot(W2) + b2) cost = -(thT * np.log(thY) + (1 - thT) * np.log(1 - thY)).sum() prediction = T.argmax(thY, axis=1) #Define updates W1_update = W1 - learning_rate * T.grad(cost, W1) b1_update = b1 - learning_rate * T.grad(cost, b1) W2_update = W2 - learning_rate * T.grad(cost, W2) b2_update = b2 - learning_rate * T.grad(cost, b2) train = theano.function( inputs=[thX, thT], updates=[(W1, W1_update), (b1, b1_update), (W2, W2_update), (b2, b2_update)], ) predict = theano.function( inputs=[thX, thT], outputs=[cost, prediction], ) LL = [] train_errors = [] test_errors = [] train_costs = [] test_costs = [] for i in xrange(max_iterations): train(Xtrain, Ytrain_ind) if i % 10 == 0: c, pYtrain = predict(Xtrain, Ytrain_ind) err = error_rate(Ytrain, pYtrain) train_costs.append(c) train_errors.append(err) c, pYtest = predict(Xtest, Ytest_ind) err = error_rate(Ytest, pYtest) test_costs.append(c) test_errors.append(err) print "i=%d\tc=%.3f\terr==%.3f\t" % (i, c, err) print "i=%d\tc=%.3f\terr==%.3f\t" % (max_iterations, c, err) print "Final train classification rate", classification_rate( Ytrain, pYtrain) print "Final test classification rate", classification_rate(Ytest, pYtest) plt.title('Multi layer perceptron: Costs') plt.xlabel('iterations') plt.ylabel('costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(test_costs, label='test cost') plt.legend([ legend1, legend2, ]) plt.show() plt.title('Multi layer perceptron: Error rates') plt.xlabel('iterations') plt.ylabel('error rates') legend1, = plt.plot(train_errors, label='train error') legend2, = plt.plot(test_errors, label='test error') plt.legend([ legend1, legend2, ]) plt.show()
def fit(self, X, Y, learning_rate=10e-5, epochs=200, reg=10e-8, batch_sz=200, show_fig=False, activation=tf.tanh): X, Y = shuffle(X, Y) K = len(np.unique(Y)) T = y2indicator(Y, K).astype(np.float32) Xvalid, Yvalid, Tvalid = X[-1000:,], Y[-1000:], T[-1000:,:] Xtrain, Ytrain, Ttrain = X[:-1000,:], Y[:-1000],T[:-1000,:] N, D = Xtrain.shape #Varianel initialization W1, b1 = init_weight_and_bias(D,self.M) W2, b2 = init_weight_and_bias(self.M,K) self.W1 = tf.Variable(W1.astype(np.float32), 'W1') self.b1 = tf.Variable(b1.astype(np.float32), 'b1') self.W2 = tf.Variable(W2.astype(np.float32), 'W2') self.b2 = tf.Variable(b2.astype(np.float32), 'b2') self.params = [self.W1, self.b1, self.W2, self.b2] # Define placeholders X = tf.placeholder(tf.float32,shape=(None,D),name='X') T = tf.placeholder(tf.float32,shape=(None,K),name='Y') Z = activation(tf.matmul(X, self.W1) + self.b1) Yish = tf.matmul(Z, self.W2) + self.b2 rcost = reg*tf.reduce_sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(labels=T, logits=Yish) ) + rcost train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) self.predict_op = tf.argmax(Yish, 1) n_batches = N // batch_sz costs=[] errors=[] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in xrange(epochs): Xtrain, Ytrain = shuffle(Xtrain, Ytrain) for j in xrange(n_batches): Xbatch = Xtrain[j*batch_sz:(j+1)*batch_sz,:] Ybatch = Ytrain[j*batch_sz:(j+1)*batch_sz] Tbatch = Ttrain[j*batch_sz:(j+1)*batch_sz,:] session.run(train_op, feed_dict={ X: Xbatch, T: Tbatch }) if j % 10 == 0: c = session.run(cost, feed_dict={X:Xvalid, T:Tvalid} ) pYvalid = session.run( self.predict_op, feed_dict={X: Xvalid} ) err = error_rate(Yvalid, pYvalid) print "i:%d\tj:%d\tc:%.3f\terr:%.3f\t" % (i,j,c,err) costs.append(c) errors.append(err) if show_fig: plt.title('costs') plt.plot(costs) plt.show() plt.title('error rate') plt.plot(errors) plt.show()
def fit(self, Xin, Yin, learning_rate=10e-7, reg=10e-8, epochs=10000, show_figure=False): Nvalid = 500 N, D = Xin.shape K = len(np.unique(Yin)) Xin, Yin = shuffle(Xin, Yin) Xtrain, Ytrain = Xin[-Nvalid:, :], Yin[-Nvalid:, ] Xvalid, Yvalid = Xin[:-Nvalid, :], Yin[:-Nvalid, ] Ttrain, Tvalid = y2indicator(Ytrain, K), y2indicator(Yvalid, K) #Initialize Wi,bi W1_init = np.random.randn(D, self.M) / np.sqrt(D + self.M) b1_init = np.random.randn(self.M) / np.sqrt(self.M) W2_init = np.random.randn(self.M, K) / np.sqrt(K + self.M) b2_init = np.random.randn(K) / np.sqrt(K) #Theano shared W1 = theano.shared(W1_init, 'W1') b1 = theano.shared(b1_init, 'b1') W2 = theano.shared(W2_init, 'W2') b2 = theano.shared(b2_init, 'b2') #Theano variables thX = T.matrix('X') thT = T.matrix('T') thZ = sigmoid(thX.dot(W1) + b1) thY = T.nnet.softmax(thZ.dot(W2) + b2) #Theano updatebles costs = -(thT * np.log(thY) + (1 - thT) * np.log((1 - thY))).sum() prediction = T.argmax(thY, axis=1) W1_update = W1 - learning_rate * (T.grad(costs, W1) + reg * W1) b1_update = b1 - learning_rate * (T.grad(costs, b1) + reg * b1) W2_update = W2 - learning_rate * (T.grad(costs, W2) + reg * W2) b2_update = b2 - learning_rate * (T.grad(costs, b2) + reg * b2) self._train = theano.function( inputs=[thX, thT], updates=[(W1, W1_update), (b1, b1_update), (W2, W2_update), (b2, b2_update)], ) self._predict = theano.function( inputs=[thX, thT], outputs=[costs, prediction], ) train_costs = [] train_errors = [] valid_costs = [] valid_errors = [] for i in xrange(epochs): self._train(Xtrain, Ttrain) if i % 10 == 0: ctrain, pYtrain = self._predict(Xtrain, Ttrain) err = error_rate(Ttrain, pYtrain) train_costs.append(ctrain) train_errors.append(err) cvalid, pYvalid = self._predict(Xvalid, Tvalid) err = error_rate(Tvalid, pYvalid) valid_costs.append(cvalid) valid_errors.append(err) print "i=%d\tc=%.3f\terr==%.3f\t" % (i, cvalid, err) cvalid, pYvalid = self._predict(Xvalid, Tvalid) err = error_rate(Tvalid, pYvalid) valid_costs.append(cvalid) valid_errors.append(err) print "i=%d\tc=%.3f\terr==%.3f\t" % (epochs, cvalid, err) print "Final train classification rate", classification_rate( Ytrain, pYtrain) print "Final valid classification rate", classification_rate( Yalid, pYalid) plt.title('Multi layer perceptron: Costs') plt.xlabel('iterations') plt.ylabel('costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(valid_costs, label='valid cost') plt.legend([ legend1, legend2, ]) plt.show() plt.title('Multi layer perceptron: Error rates') plt.xlabel('iterations') plt.ylabel('error rates') legend1, = plt.plot(train_errors, label='train error') legend2, = plt.plot(valid_errors, label='valid error') plt.legend([ legend1, legend2, ]) plt.show()
def main(): X, Y = get_ecommerce(user_action=None) X, Y = shuffle(X, Y) # Define dimensions N, D = X.shape M = 5 K = len(np.unique(Y)) Ntrain = N - 100 + 1 Xtrain, Ytrain = X[:Ntrain, :], Y[:Ntrain] Ytrain_ind = y2indicator(Ytrain, K) Ntest = 100 Xtest, Ytest = X[-Ntest:, :], Y[-Ntest:] Ytest_ind = y2indicator(Ytest, K) W1 = np.random.randn(D, M) / np.sqrt(M + D) b1 = np.random.randn(M) / np.sqrt(M) W2 = np.random.randn(M, K) / np.sqrt(M + K) b2 = np.random.randn(K) / np.sqrt(K) # Running variables lr = 5e-4 max_iteration = 100000 train_costs = [] test_costs = [] train_errors = [] test_errors = [] for i in xrange(max_iteration): pYtrain, Ztrain = forward(W1, b1, W2, b2, Xtrain) pYtest, Ztest = forward(W1, b1, W2, b2, Xtest) ctrain = cross_entropy(Ytrain_ind, pYtrain) ctest = cross_entropy(Ytest_ind, pYtest) etrain = error_rate(predict(pYtrain), Ytrain) etest = error_rate(predict(pYtest), Ytest) train_costs.append(ctrain) test_costs.append(ctest) train_errors.append(etrain) test_errors.append(etest) if i % 1000 == 0: print "i=%d\ttrain cost=%d\ttest cost=%d\ttrain error=%0.3f" % ( i, int(ctrain), int(ctest), etrain) W2 -= lr * Ztrain.T.dot(pYtrain - Ytrain_ind) b2 -= lr * (pYtrain - Ytrain_ind).sum(axis=0) # derivative_w1(X, Z, T, Y, W2) W1 -= lr * Xtrain.T.dot( (pYtrain - Ytrain_ind).dot(W2.T) * Ztrain * (1 - Ztrain)) b1 -= lr * ((pYtrain - Ytrain_ind).dot(W2.T) * Ztrain * (1 - Ztrain)).sum(axis=0) print "i=%d\ttrain cost=%.3f\ttest error=%.3f" % (max_iteration, ctrain, ctest) print "Final train classification rate", classification_rate( Ytrain, predict(pYtrain)) print "Final test classification rate", classification_rate( Ytest, predict(pYtest)) plt.title('Multi layer perceptron: Costs') plt.xlabel('iterations') plt.ylabel('costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(test_costs, label='test cost') plt.legend([ legend1, legend2, ]) plt.show() plt.title('Multi layer perceptron: Error rates') plt.xlabel('iterations') plt.ylabel('erro rates') legend1, = plt.plot(train_costs, label='train error') legend2, = plt.plot(test_costs, label='test error') plt.legend([ legend1, legend2, ]) plt.show() plt.xlabel('iterations') plt.ylabel('training costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(test_costs, label='test cost') plt.legend([ legend1, legend2, ]) plt.show()
def fit(self, X, Y, learning_rate=10e-8, mu=0.99, decay=0.99, reg=10e-8, epochs=400, batch_sz=100, show_figure=False): X, Y = shuffle(X, Y) K = len(np.unique(Y)) Y = y2indicator(Y, K).astype(np.float32) Xvalid, Yvalid = X[-1000:, :], Y[-1000:] Yvalid_flat = np.argmax(Yvalid, axis=1) Xtrain, Ytrain = X[:-1000, :], Y[:-1000] N, D = X.shape #Build hidden layers M1 = D self.hidden_layers = [] self.params = [] for an_id, M2 in enumerate(self.hidden_layer_sizes): h = HiddenLayer(M1, M2, an_id) self.hidden_layers.append(h) self.params += h.params M1 = M2 M2 = K an_id = len(self.hidden_layer_sizes) W, b = init_weight_and_bias(M1, M2) self.W = tf.Variable(W.astype(np.float32), name='W%d' % an_id) self.b = tf.Variable(b.astype(np.float32), name='b%d' % an_id) self.params += [self.W, self.b] X = tf.placeholder(tf.float32, shape=(None, D), name='X') Y = tf.placeholder(tf.float32, shape=(None, K), name='Y') Yish = self.forward(X) # cost functions rcost = reg * tf.reduce_sum([tf.nn.l2_loss(p) for p in self.params ]) # L2 regularization costs cost = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=Yish)) + rcost train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) predict_op = tf.argmax(Yish, 1) LL = [] n_batches = int(N / batch_sz) best_validation_error = 1 init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in xrange(epochs): Xtrain, Ytrain = shuffle(Xtrain, Ytrain) for j in xrange(n_batches): Xbatch = Xtrain[j * (batch_sz):(j + 1) * batch_sz, :] Ybatch = Ytrain[j * (batch_sz):(j + 1) * batch_sz, :] session.run(train_op, feed_dict={X: Xbatch, Y: Ybatch}) if j % 100 == 0: pY = session.run(predict_op, feed_dict={X: Xvalid}) c = session.run(cost, feed_dict={X: Xvalid, Y: Yvalid}) err = error_rate(Yvalid_flat, pY) LL.append(c) print "i:%d\tj:%d\tnb:%d\tc:%.3f\te:%.3f\t" % ( i, j, n_batches, c, err) if err < best_validation_error: best_validation_error = err print "best_validation_error:", best_validation_error if show_figure: plt.plot(LL) plt.show()