pp = pprint.PrettyPrinter(depth=6) def get_best_k_score_noscale(x, y): kf = KFold(len(x), n_folds=5, shuffle=True, random_state=42) result = dict() for k in range(1,51): knn = KNeighborsClassifier(n_neighbors=k) scores = cross_val_score(knn, x, y, cv=kf) result[k] = mean(scores) sorted_x = sorted(result.items(), key=operator.itemgetter(1)) return sorted_x[-1] wine_data = pandas.read_csv('Data/wine.data', header=None) datay = wine_data[0] datax = wine_data[wine_data.columns[1:14]] # questions 1 and 2 k, score = get_best_k_score_noscale(datax, datay) save_to_file("Submissions/question1.txt", str(k)) save_to_file("Submissions/question2.txt", str(score)) datax_scale = scale(datax) k, score = get_best_k_score_noscale(datax_scale, datay) save_to_file("Submissions/question3.txt", str(k)) save_to_file("Submissions/question4.txt", str(score))
from statistics import mean import sklearn import sklearn.datasets from sklearn.cross_validation import KFold, cross_val_score from sklearn.neighbors import KNeighborsRegressor from sklearn.preprocessing import scale import numpy as np from Common.common_io import save_to_file pp = pprint.PrettyPrinter(depth=6) boston = sklearn.datasets.load_boston() x = scale(boston.data) y = boston.target kf = KFold(len(x), n_folds=5, shuffle=True, random_state=42) result = dict() for p in np.linspace(1, 10, num=200): regressor = KNeighborsRegressor(n_neighbors=5, weights='distance', p=p) scores = cross_val_score(regressor, x, y, cv=kf, scoring='mean_squared_error') result[p] = mean(scores) sorted_x = sorted(result.items(), key=operator.itemgetter(1)) pp.pprint(sorted_x) save_to_file("Submissions/question1.txt", str(sorted_x[-1][0]))
print(len(train_data)) def my_acc_score(test, pred): true_count = 0 for i, val in enumerate(test): if test[i] == pred[i]: true_count += 1 return true_count / len(pred) def get_accuracy_score(perc, trainx, trainy, testx, testy): perc.fit(trainx, trainy) y_pred = perc.predict(testx) return accuracy_score(testy, y_pred) perc1 = Perceptron(random_state=241) # non-normalized score_nonnorm = get_accuracy_score(perc1, x_train, y_train, x_test, y_test) # normalized score_norm = get_accuracy_score(perc1, x_train_scaled, y_train, x_test_scaled, y_test) # diff print("Score non-normalized: {0}".format(score_nonnorm)) print("Score normalized: {0}".format(score_norm)) diff = score_norm - score_nonnorm print("Score advancement: {0}".format(diff)) save_to_file("Submissions/question1.txt", str(diff))
C = 10 for _ in range(10001): w1_temp = w1 + k * (1.0 / len(y)) * sum([y[i]*X[i][0] * (1 - (1.0 / (1 + exp(-y[i] * (w1 * X[i][0] + w2 * X[i][1]))))) for i in range(len(y))]) w2_temp = w2 + k * (1.0 / len(y)) * sum([y[i]*X[i][1] * (1 - (1.0 / (1 + exp(-y[i] * (w1 * X[i][0] + w2 * X[i][1]))))) for i in range(len(y))]) if reg: w1_temp -= k*C*w1 w2_temp -= k*C*w2 if sqrt((w1_temp-w1)**2 + (w2_temp-w2)**2) < 0.00001: break w1 = w1_temp w2 = w2_temp a = [(1 / (1 + exp(-w1*X[i][0] - w2 *X[i][1]))) for i in range(len(y))] return roc_auc_score(y, a) train_data = genfromtxt('Data/data-logistic.csv', delimiter=',') y = train_data[:, 0] X = train_data[:, [1, 2]] score_noreg = get_score(X,y,reg=False) score_reg = get_score(X,y,reg=True) answer = "{0} {1}".format(round(score_noreg,3), round(score_reg,3)) save_to_file("Submissions/question1.txt", answer) print(answer)