Ejemplo n.º 1
0
import process as pr
import time

start_time = time.time()
DP = pr.Data_Process()

DP.read_data(detail=True)
DP.data_frequency(idf=True)

data_time = time.time() - start_time
hour, minute, second = pr.time_process(data_time)
print 'data process time: ' + str(hour) + "h " + str(minute) + "m " + str(
    second) + "s "

K_range = range(1, 11)

err_prob = []
false_pos = []
false_neg = []

from sklearn import neighbors

total_email = float(len(DP.test_email_class))

for K in K_range:
    start_time = time.time()
    clf = neighbors.KNeighborsClassifier(K)
    clf.fit(DP.train_bag_words_transformed, DP.train_email_class)
    fit_time = time.time() - start_time

    hour, minute, second = pr.time_process(fit_time)
Ejemplo n.º 2
0
__author__ = 'liangshengzhang'

import process as pr
import numpy as np
import time

start_time = time.time()

chr1 = pr.Data(1)

chr1.read()

read_time = time.time() - start_time

hour, minute, second = pr.time_process(read_time)

print '\n'
print 'Loading time: ' + str(hour) + "h " + str(minute) + "m " + str(
    second) + "s "

start_time = time.time()
chr1.data_extract(strand_binary=True, pos_normalize=True)

from sklearn import preprocessing

imputer = preprocessing.Imputer(copy=False)
imputer.fit_transform(chr1.train_beta)

process_time = time.time() - start_time
hour, minute, second = pr.time_process(process_time)
print '\n'
Ejemplo n.º 3
0
__author__ = 'liangshengzhang'

import process as pr
import numpy as np
import time
from math import log, exp

start_time = time.time()

chr1 = pr.Data(1)

chr1.read(detail=True)

read_time = time.time() - start_time

hour, minute, second = pr.time_process(read_time)

print '\n'
print 'Loading time: ' + str(hour) + "h " + str(minute) + "m " + str(
    second) + "s "

start_time = time.time()
chr1.data_extract(strand_binary=True, pos_normalize=True)

start_time = time.time()
from sklearn import preprocessing

imputer = preprocessing.Imputer(copy=False)
imputer.fit_transform(chr1.train_beta)

process_time = time.time() - start_time