def main(): X, Y = get_ecommerce(user_action=None) X, Y = shuffle(X, Y) N, D = X.shape Y = Y.astype(np.int32) K = len(np.unique(Y)) Ntrain = N - 100 + 1 Xtrain, Ytrain = X[:Ntrain, :], Y[:Ntrain] Ytrain_ind = y2indicator(Ytrain, K) Ntest = 100 Xtest, Ytest = X[-Ntest:, :], Y[-Ntest:] Ytest_ind = y2indicator(Ytest, K) # params lr = 5e-3 max_iteration = 10000 W = np.random.randn(D, K) / np.sqrt(D + K) b = np.zeros(K) train_costs = [] test_costs = [] for i in xrange(max_iteration): pYtrain, pYtest = forward(W, b, Xtrain), forward(W, b, Xtest) # Ytrain = predict(pYtrain) ctrain = cross_entropy(Ytrain_ind, pYtrain) ctest = cross_entropy(Ytest_ind, pYtest) train_costs.append(ctrain) test_costs.append(ctest) W -= lr * Xtrain.T.dot(pYtrain - Ytrain_ind) b -= lr * (pYtrain - Ytrain_ind).sum(axis=0) if i % 1000 == 0: print "i=%d\ttrain cost=%.3f\ttest error=%.3f" % (i, ctrain, ctest) print "i=%d\ttrain cost=%.3f\ttest error=%.3f" % (max_iteration, ctrain, ctest) print "Final train classification rate", classification_rate( Ytrain, predict(pYtrain)) print "Final test classification rate", classification_rate( Ytest, predict(pYtest)) plt.title('logistic regression + softmax') plt.xlabel('iterations') plt.ylabel('training costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(test_costs, label='test cost') plt.legend([ legend1, legend2, ]) plt.show()
for j in range(num_batches): batch_start = batch_size * j batch_end = batch_size * (j + 1) X_batch = X_train[batch_start:batch_end, :] Y_batch = Y_train_E[batch_start:batch_end, :] pred, cost, _ = sess.run([Y_hat, loss, opt], feed_dict={ X: X_batch, Y: Y_batch }) if i % 50 == 0: saver.save(sess, './' + test_name + '/training', global_step=i) pred_dev = sess.run(Y_hat, feed_dict={X: X_dev, Y: Y_dev_E}) pred_dev = np.argmax(pred_dev, axis=1) misclassified, class_rate = classification_rate(Y_dev, pred_dev) print('--------------------') print('---| iter ' + str(i)) print('---| cost ' + str(cost)) print('---| dev class ' + str(class_rate)) print('---| dev misclassified ' + str(misclassified)) if i % 250 == 0: pred_train_dev, cost, _ = sess.run([Y_hat, loss, opt], feed_dict={ X: X_train_dev, Y: Y_train_dev_E }) pred_train_dev = np.argmax(pred_train_dev, axis=1) misclassified, class_rate = classification_rate( Y_train_dev, pred_train_dev)
def main(): X, Y = get_ecommerce(user_action=None) X, Y = shuffle(X, Y) # Running variables learning_rate = 5e-4 max_iterations = 10000 # Define dimensions N, D = X.shape M = 5 K = len(np.unique(Y)) Ntrain = N - 100 Xtrain, Ytrain = X[:Ntrain, :], Y[:Ntrain] Ytrain_ind = y2indicator(Ytrain, K) Ntest = 100 Xtest, Ytest = X[-Ntest:, :], Y[-Ntest:] Ytest_ind = y2indicator(Ytest, K) W1_init = np.random.randn(D, M) / np.sqrt(M + D) b1_init = np.random.randn(M) / np.sqrt(M) W2_init = np.random.randn(M, K) / np.sqrt(M + K) b2_init = np.random.randn(K) / np.sqrt(K) #Define theano shared W1 = theano.shared(W1_init, 'W1') b1 = theano.shared(b1_init, 'b1') W2 = theano.shared(W2_init, 'W2') b2 = theano.shared(b2_init, 'b2') #Define constant tensor matrices thX = T.matrix('X') thT = T.matrix('T') #Define cost thZ = sigmoid(thX.dot(W1) + b1) thY = softmax(thZ.dot(W2) + b2) cost = -(thT * np.log(thY) + (1 - thT) * np.log(1 - thY)).sum() prediction = T.argmax(thY, axis=1) #Define updates W1_update = W1 - learning_rate * T.grad(cost, W1) b1_update = b1 - learning_rate * T.grad(cost, b1) W2_update = W2 - learning_rate * T.grad(cost, W2) b2_update = b2 - learning_rate * T.grad(cost, b2) train = theano.function( inputs=[thX, thT], updates=[(W1, W1_update), (b1, b1_update), (W2, W2_update), (b2, b2_update)], ) predict = theano.function( inputs=[thX, thT], outputs=[cost, prediction], ) LL = [] train_errors = [] test_errors = [] train_costs = [] test_costs = [] for i in xrange(max_iterations): train(Xtrain, Ytrain_ind) if i % 10 == 0: c, pYtrain = predict(Xtrain, Ytrain_ind) err = error_rate(Ytrain, pYtrain) train_costs.append(c) train_errors.append(err) c, pYtest = predict(Xtest, Ytest_ind) err = error_rate(Ytest, pYtest) test_costs.append(c) test_errors.append(err) print "i=%d\tc=%.3f\terr==%.3f\t" % (i, c, err) print "i=%d\tc=%.3f\terr==%.3f\t" % (max_iterations, c, err) print "Final train classification rate", classification_rate( Ytrain, pYtrain) print "Final test classification rate", classification_rate(Ytest, pYtest) plt.title('Multi layer perceptron: Costs') plt.xlabel('iterations') plt.ylabel('costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(test_costs, label='test cost') plt.legend([ legend1, legend2, ]) plt.show() plt.title('Multi layer perceptron: Error rates') plt.xlabel('iterations') plt.ylabel('error rates') legend1, = plt.plot(train_errors, label='train error') legend2, = plt.plot(test_errors, label='test error') plt.legend([ legend1, legend2, ]) plt.show()
def fit(self, Xin, Yin, learning_rate=10e-7, reg=10e-8, epochs=10000, show_figure=False): Nvalid = 500 N, D = Xin.shape K = len(np.unique(Yin)) Xin, Yin = shuffle(Xin, Yin) Xtrain, Ytrain = Xin[-Nvalid:, :], Yin[-Nvalid:, ] Xvalid, Yvalid = Xin[:-Nvalid, :], Yin[:-Nvalid, ] Ttrain, Tvalid = y2indicator(Ytrain, K), y2indicator(Yvalid, K) #Initialize Wi,bi W1_init = np.random.randn(D, self.M) / np.sqrt(D + self.M) b1_init = np.random.randn(self.M) / np.sqrt(self.M) W2_init = np.random.randn(self.M, K) / np.sqrt(K + self.M) b2_init = np.random.randn(K) / np.sqrt(K) #Theano shared W1 = theano.shared(W1_init, 'W1') b1 = theano.shared(b1_init, 'b1') W2 = theano.shared(W2_init, 'W2') b2 = theano.shared(b2_init, 'b2') #Theano variables thX = T.matrix('X') thT = T.matrix('T') thZ = sigmoid(thX.dot(W1) + b1) thY = T.nnet.softmax(thZ.dot(W2) + b2) #Theano updatebles costs = -(thT * np.log(thY) + (1 - thT) * np.log((1 - thY))).sum() prediction = T.argmax(thY, axis=1) W1_update = W1 - learning_rate * (T.grad(costs, W1) + reg * W1) b1_update = b1 - learning_rate * (T.grad(costs, b1) + reg * b1) W2_update = W2 - learning_rate * (T.grad(costs, W2) + reg * W2) b2_update = b2 - learning_rate * (T.grad(costs, b2) + reg * b2) self._train = theano.function( inputs=[thX, thT], updates=[(W1, W1_update), (b1, b1_update), (W2, W2_update), (b2, b2_update)], ) self._predict = theano.function( inputs=[thX, thT], outputs=[costs, prediction], ) train_costs = [] train_errors = [] valid_costs = [] valid_errors = [] for i in xrange(epochs): self._train(Xtrain, Ttrain) if i % 10 == 0: ctrain, pYtrain = self._predict(Xtrain, Ttrain) err = error_rate(Ttrain, pYtrain) train_costs.append(ctrain) train_errors.append(err) cvalid, pYvalid = self._predict(Xvalid, Tvalid) err = error_rate(Tvalid, pYvalid) valid_costs.append(cvalid) valid_errors.append(err) print "i=%d\tc=%.3f\terr==%.3f\t" % (i, cvalid, err) cvalid, pYvalid = self._predict(Xvalid, Tvalid) err = error_rate(Tvalid, pYvalid) valid_costs.append(cvalid) valid_errors.append(err) print "i=%d\tc=%.3f\terr==%.3f\t" % (epochs, cvalid, err) print "Final train classification rate", classification_rate( Ytrain, pYtrain) print "Final valid classification rate", classification_rate( Yalid, pYalid) plt.title('Multi layer perceptron: Costs') plt.xlabel('iterations') plt.ylabel('costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(valid_costs, label='valid cost') plt.legend([ legend1, legend2, ]) plt.show() plt.title('Multi layer perceptron: Error rates') plt.xlabel('iterations') plt.ylabel('error rates') legend1, = plt.plot(train_errors, label='train error') legend2, = plt.plot(valid_errors, label='valid error') plt.legend([ legend1, legend2, ]) plt.show()
def score(self, X, Y): pY = predict(X) return classification_rate(Y, pY)
def score(self, size): print("Measuring KNN performance... It will take a while.") y, p = self.perform_knn(self.X[:size, :]) print("classification rate: ", classification_rate(y, self.Y[:size]))
def main(): X, Y = get_ecommerce(user_action=None) X, Y = shuffle(X, Y) # Define dimensions N, D = X.shape M = 5 K = len(np.unique(Y)) Ntrain = N - 100 + 1 Xtrain, Ytrain = X[:Ntrain, :], Y[:Ntrain] Ytrain_ind = y2indicator(Ytrain, K) Ntest = 100 Xtest, Ytest = X[-Ntest:, :], Y[-Ntest:] Ytest_ind = y2indicator(Ytest, K) W1 = np.random.randn(D, M) / np.sqrt(M + D) b1 = np.random.randn(M) / np.sqrt(M) W2 = np.random.randn(M, K) / np.sqrt(M + K) b2 = np.random.randn(K) / np.sqrt(K) # Running variables lr = 5e-4 max_iteration = 100000 train_costs = [] test_costs = [] train_errors = [] test_errors = [] for i in xrange(max_iteration): pYtrain, Ztrain = forward(W1, b1, W2, b2, Xtrain) pYtest, Ztest = forward(W1, b1, W2, b2, Xtest) ctrain = cross_entropy(Ytrain_ind, pYtrain) ctest = cross_entropy(Ytest_ind, pYtest) etrain = error_rate(predict(pYtrain), Ytrain) etest = error_rate(predict(pYtest), Ytest) train_costs.append(ctrain) test_costs.append(ctest) train_errors.append(etrain) test_errors.append(etest) if i % 1000 == 0: print "i=%d\ttrain cost=%d\ttest cost=%d\ttrain error=%0.3f" % ( i, int(ctrain), int(ctest), etrain) W2 -= lr * Ztrain.T.dot(pYtrain - Ytrain_ind) b2 -= lr * (pYtrain - Ytrain_ind).sum(axis=0) # derivative_w1(X, Z, T, Y, W2) W1 -= lr * Xtrain.T.dot( (pYtrain - Ytrain_ind).dot(W2.T) * Ztrain * (1 - Ztrain)) b1 -= lr * ((pYtrain - Ytrain_ind).dot(W2.T) * Ztrain * (1 - Ztrain)).sum(axis=0) print "i=%d\ttrain cost=%.3f\ttest error=%.3f" % (max_iteration, ctrain, ctest) print "Final train classification rate", classification_rate( Ytrain, predict(pYtrain)) print "Final test classification rate", classification_rate( Ytest, predict(pYtest)) plt.title('Multi layer perceptron: Costs') plt.xlabel('iterations') plt.ylabel('costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(test_costs, label='test cost') plt.legend([ legend1, legend2, ]) plt.show() plt.title('Multi layer perceptron: Error rates') plt.xlabel('iterations') plt.ylabel('erro rates') legend1, = plt.plot(train_costs, label='train error') legend2, = plt.plot(test_costs, label='test error') plt.legend([ legend1, legend2, ]) plt.show() plt.xlabel('iterations') plt.ylabel('training costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(test_costs, label='test cost') plt.legend([ legend1, legend2, ]) plt.show()