Example #1
0
import numpy as np
from data_analysis import data_warehouse
from sklearn.cross_validation import KFold

author_list = []
feature_list = []

t1 = data_warehouse.get_stylometric_features_by_author_id(1) # 1
feature_list.extend(t1)
author_list.extend([0 for x in range(len(t1))])

t2 = data_warehouse.get_stylometric_features_by_author_id(2) # 2
feature_list.extend(t2)
author_list.extend([1 for x in range(len(t2))])

t3 = data_warehouse.get_stylometric_features_by_author_id(3) # 3
feature_list.extend(t3)
author_list.extend([2 for x in range(len(t3))])

X = np.array(feature_list)
y = np.array(author_list)

kf = KFold(len(feature_list), n_folds=3)
print len(feature_list)
print len(author_list)
print len(kf)

for train_index, test_index in kf:
    print ("TRAIN: ", train_index, "TEST: ", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
import time
import numpy as np
from data_analysis import data_warehouse
from sklearn import cross_validation
from sklearn.metrics import classification_report, accuracy_score
from data_analysis import calculate_K_nearest_neighbors_classifier_for_sets as KNN


start_time = time.time()

author_list = []
feature_list = []

for i in range(1, 20):
    temp_arr = data_warehouse.get_stylometric_features_by_author_id(i) # 3
    feature_list.extend(temp_arr)
    author_list.extend([i for x in range(len(temp_arr))])

X_train, X_test, y_train, y_test = cross_validation.train_test_split(feature_list, author_list, test_size=0.1, random_state=1)

train = np.array(zip(X_train, y_train))
test = np.array(zip(X_test, y_test))

predictions = []


print 'Finished getting data from the database'
#print CKNN.get_knn_classifier_cross_validation(X, y)
for idx in range(len(X_test)):
    print 'Classifying test instance number ', str(idx) + ':'
    neighbors = KNN.get_set_neighbor(training_set=train, test_instance=test[idx][0], k=5)
Example #3
0
import numpy as np
from data_analysis import data_warehouse
from sklearn.cross_validation import KFold

author_list = []
feature_list = []

t1 = data_warehouse.get_stylometric_features_by_author_id(1)  # 1
feature_list.extend(t1)
author_list.extend([0 for x in range(len(t1))])

t2 = data_warehouse.get_stylometric_features_by_author_id(2)  # 2
feature_list.extend(t2)
author_list.extend([1 for x in range(len(t2))])

t3 = data_warehouse.get_stylometric_features_by_author_id(3)  # 3
feature_list.extend(t3)
author_list.extend([2 for x in range(len(t3))])

X = np.array(feature_list)
y = np.array(author_list)

kf = KFold(len(feature_list), n_folds=3)
print len(feature_list)
print len(author_list)
print len(kf)

for train_index, test_index in kf:
    print("TRAIN: ", train_index, "TEST: ", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]