Esempio n. 1
0
pp = pprint.PrettyPrinter(depth=6)


def get_best_k_score_noscale(x, y):
    kf = KFold(len(x), n_folds=5, shuffle=True, random_state=42)
    result = dict()
    for k in range(1,51):
        knn = KNeighborsClassifier(n_neighbors=k)
        scores = cross_val_score(knn, x, y, cv=kf)
        result[k] = mean(scores)
    sorted_x = sorted(result.items(), key=operator.itemgetter(1))
    return sorted_x[-1]




wine_data = pandas.read_csv('Data/wine.data', header=None)
datay = wine_data[0]
datax = wine_data[wine_data.columns[1:14]]

# questions 1 and 2
k, score = get_best_k_score_noscale(datax, datay)
save_to_file("Submissions/question1.txt", str(k))
save_to_file("Submissions/question2.txt", str(score))

datax_scale = scale(datax)

k, score = get_best_k_score_noscale(datax_scale, datay)
save_to_file("Submissions/question3.txt", str(k))
save_to_file("Submissions/question4.txt", str(score))
Esempio n. 2
0
from statistics import mean

import sklearn
import sklearn.datasets
from sklearn.cross_validation import KFold, cross_val_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import scale
import numpy as np

from Common.common_io import save_to_file

pp = pprint.PrettyPrinter(depth=6)

boston = sklearn.datasets.load_boston()
x = scale(boston.data)
y = boston.target

kf = KFold(len(x), n_folds=5, shuffle=True, random_state=42)

result = dict()
for p in np.linspace(1, 10, num=200):
    regressor = KNeighborsRegressor(n_neighbors=5, weights='distance', p=p)
    scores = cross_val_score(regressor, x, y, cv=kf, scoring='mean_squared_error')
    result[p] = mean(scores)

sorted_x = sorted(result.items(), key=operator.itemgetter(1))
pp.pprint(sorted_x)


save_to_file("Submissions/question1.txt", str(sorted_x[-1][0]))
Esempio n. 3
0
print(len(train_data))


def my_acc_score(test, pred):
    true_count = 0
    for i, val in enumerate(test):
        if test[i] == pred[i]:
            true_count += 1
    return true_count / len(pred)


def get_accuracy_score(perc, trainx, trainy, testx, testy):
    perc.fit(trainx, trainy)
    y_pred = perc.predict(testx)
    return accuracy_score(testy, y_pred)

perc1 = Perceptron(random_state=241)
# non-normalized
score_nonnorm = get_accuracy_score(perc1, x_train, y_train, x_test, y_test)

# normalized
score_norm = get_accuracy_score(perc1, x_train_scaled, y_train, x_test_scaled, y_test)

# diff
print("Score non-normalized: {0}".format(score_nonnorm))
print("Score normalized: {0}".format(score_norm))
diff = score_norm - score_nonnorm
print("Score advancement: {0}".format(diff))
save_to_file("Submissions/question1.txt", str(diff))

Esempio n. 4
0
    C = 10

    for _ in range(10001):

        w1_temp = w1 + k * (1.0 / len(y)) * sum([y[i]*X[i][0] * (1 - (1.0 / (1 + exp(-y[i] * (w1 * X[i][0] + w2 * X[i][1]))))) for i in range(len(y))])
        w2_temp = w2 + k * (1.0 / len(y)) * sum([y[i]*X[i][1] * (1 - (1.0 / (1 + exp(-y[i] * (w1 * X[i][0] + w2 * X[i][1]))))) for i in range(len(y))])

        if reg:
            w1_temp -= k*C*w1
            w2_temp -= k*C*w2

        if sqrt((w1_temp-w1)**2 + (w2_temp-w2)**2) < 0.00001:
            break

        w1 = w1_temp
        w2 = w2_temp

    a = [(1 / (1 + exp(-w1*X[i][0] - w2 *X[i][1]))) for i in range(len(y))]
    return roc_auc_score(y, a)


train_data = genfromtxt('Data/data-logistic.csv', delimiter=',')
y = train_data[:, 0]
X = train_data[:, [1, 2]]

score_noreg = get_score(X,y,reg=False)
score_reg = get_score(X,y,reg=True)

answer = "{0} {1}".format(round(score_noreg,3), round(score_reg,3))
save_to_file("Submissions/question1.txt", answer)
print(answer)