m = len(y) # number of samples for x in range(max_iteration): # call the functions for gradient descent method new_theta = Gradient_Descent(X, y, theta, m, alpha) theta = new_theta if x % 200 == 0: # calculate the cost function with the present theta Cost_Function(X, y, theta, m) print('theta ', theta) print('cost is ', Cost_Function(X, y, theta, m)) score = 0 for i in range(len(testX)): prediction = round(Prediction(testX[i], theta)) answer = testY[i] if prediction == answer: score += 1 gdScore = float(score) / float(len(testX)) print('Coefficients from sklearn:', clf.coef_) print('Coefficients from gradient descent:', theta) print('Score from sklearn: ', clf.score(testX, testY)) print('Score from gradient descent: ', gdScore) ######################PLACEHOLDER2 #end ######################### # step 3: Use the model to get class labels of testing samples. ######################PLACEHOLDER3 #start#########################
parser.add_argument('classifier', type=argparse.FileType('r'), help='Input pickle file with classifier to re-use') parser.add_argument('predictions', type=argparse.FileType('r'), help='Input file with predicted words and locations') return parser if __name__ == "__main__": args = opts().parse_args() print >>sys.stderr, "Loading test data" X = load(args.data) X = np.asarray(X, dtype=np.float32) X = np.nan_to_num(X) print >>sys.stderr, "Loading classifer" clf = load_classifier(args.classifier) print >>sys.stderr, "Predicting decisions" d = clf.predict(X) print >>sys.stderr, "Performing decisions on stdin" for di, line, pred in izip(d, sys.stdin, args.predictions): pred = Prediction.parse(pred) words = tokenize_words(line) if di == 0: # do nothing pass elif di == 1: # insert space words.insert(pred.location, ' ') else: # insert word words.insert(pred.location, pred.word) print ' '.join(words)
# calculate the cost function with the present theta Cost_Function(X, Y, theta, m) print 'theta ', theta print 'cost is ', Cost_Function(X, Y, theta, m) ######################################################################## ################# Step-5: comparing two models ######### ######################################################################## ##comparing accuracies of two models. score = 0 winner = "" # accuracy for sklearn scikit_score = clf.score(X_test, Y_test) length = len(X_test) for i in xrange(length): prediction = round(Prediction(X_test[i], theta)) answer = Y_test[i] if prediction == answer: score += 1 my_score = float(score) / float(length) if my_score > scikit_score: print 'You won!' elif my_score == scikit_score: print 'Its a tie!' else: print 'Scikit won.. :(' print 'Your score: ', my_score print 'Scikits score: ', scikit_score
print('For the Scikit Linear Regression:\n\tMeanSquaredError: {}\n\tStandard Deviation: {}'.format(mean_squared_error(testY,scikit_y_pred),np.sqrt(mean_squared_error(testY,scikit_y_pred)))) func_calConfusionMatrix(scikit_y_pred,testY) # Use Developed model score = 0 theta = [0,0] # initial model parameters alpha = 0.1 # learning rates max_iteration = 1000 # maximal iterations m = len(y) # number of samples for x in range(max_iteration): # Call the functions for gradient descent method new_theta = Gradient_Descent(X,y,theta,m,alpha) theta = new_theta if x % 200 == 0: # Calculate the cost function with the present theta Cost_Function(X,y,theta,m) print ('theta {}'.format(theta)) print ('cost is {}'.format(Cost_Function(X,y,theta,m))) length = len(testX) develop_y_pred = [] # predictions list on the test for the devloped model # Populate the predictions list with 0s for i in range(len(testY)): develop_y_pred.append(0) # Run predictions using the final theta for i in range(length): prediction = round(Prediction(testX[i],theta)) develop_y_pred[i] = prediction print('For the Developed Linear Regression:\n\tMeanSquaredError: {}\n\tStandard Deviation: {}'.format(mean_squared_error(testY,develop_y_pred),np.sqrt(mean_squared_error(testY,develop_y_pred)))) func_calConfusionMatrix(develop_y_pred,testY)