import process as pr import time start_time = time.time() DP = pr.Data_Process() DP.read_data(detail=True) DP.data_frequency(idf=True) data_time = time.time() - start_time hour, minute, second = pr.time_process(data_time) print 'data process time: ' + str(hour) + "h " + str(minute) + "m " + str( second) + "s " K_range = range(1, 11) err_prob = [] false_pos = [] false_neg = [] from sklearn import neighbors total_email = float(len(DP.test_email_class)) for K in K_range: start_time = time.time() clf = neighbors.KNeighborsClassifier(K) clf.fit(DP.train_bag_words_transformed, DP.train_email_class) fit_time = time.time() - start_time hour, minute, second = pr.time_process(fit_time)
__author__ = 'liangshengzhang' import process as pr import numpy as np import time start_time = time.time() chr1 = pr.Data(1) chr1.read() read_time = time.time() - start_time hour, minute, second = pr.time_process(read_time) print '\n' print 'Loading time: ' + str(hour) + "h " + str(minute) + "m " + str( second) + "s " start_time = time.time() chr1.data_extract(strand_binary=True, pos_normalize=True) from sklearn import preprocessing imputer = preprocessing.Imputer(copy=False) imputer.fit_transform(chr1.train_beta) process_time = time.time() - start_time hour, minute, second = pr.time_process(process_time) print '\n'
__author__ = 'liangshengzhang' import process as pr import numpy as np import time from math import log, exp start_time = time.time() chr1 = pr.Data(1) chr1.read(detail=True) read_time = time.time() - start_time hour, minute, second = pr.time_process(read_time) print '\n' print 'Loading time: ' + str(hour) + "h " + str(minute) + "m " + str( second) + "s " start_time = time.time() chr1.data_extract(strand_binary=True, pos_normalize=True) start_time = time.time() from sklearn import preprocessing imputer = preprocessing.Imputer(copy=False) imputer.fit_transform(chr1.train_beta) process_time = time.time() - start_time