Ejemplo n.º 1
0
import gzip
import os
import sys
import time

import numpy
from sklearn import svm
from sklearn.externals import joblib


if not "../DataProcess/" in sys.path:
    sys.path.append("../DataProcess/")
import transform_data_to_format as tdtf

DataHome = "/home/hphp/Documents/data/Kaggle/DogVsCatData/"
train_data_set_route = DataHome + "train.csv"
test_data_set_route = DataHome + "test.csv"

train_set = tdtf.read_csv_data_to_int_list(train_data_set_route)
train_set_x , train_set_y = train_set
#print type(train_set_x),len(train_set_x),type(train_set_x[0]),len(train_set_x[0]),type(train_set_x[0][0])
#print type(train_set_y),len(train_set_y),type(train_set_y[0])
# <type 'list'> 20 <type 'list'> 6250 <type 'str'>
# <type 'list'> 20 <type 'int'>

classifier = svm.SVC()
classifier.fit(train_set_x,train_set_y)
#clf_file = open("svm.svc.cPickle","w")
#clf_pickle = cPickle.dump(classifier,clf_file)
clf_pickle = joblib.dump(classifier,DataHome + 'svm_svc_pkl/svm.svc.pkl')
Ejemplo n.º 2
0
import time

import numpy
from sklearn import svm
from sklearn.externals import joblib

if not "../DataProcess/" in sys.path:
    sys.path.append("../DataProcess/")
import transform_data_to_format as tdtf

DataHome = "/home/hphp/Documents/data/Kaggle/DogVsCatData/"
test_data_set_route = DataHome + "test.csv"

print "reading test data"
start_sec = time.time()
test_set = tdtf.read_csv_data_to_int_list(test_data_set_route, None, 0)
test_set_x, test_set_y = test_set
print len(test_set_x)
end_sec = time.time()
print 'practical reading data time : %.2fm ' % ((end_sec - start_sec) / 60.)

start_sec = time.time()
print "loading svm classifier from joblib"
classifier = joblib.load(DataHome + 'svm_svc_pkl/svm.svc.pkl', mmap_mode='c')
end_sec = time.time()
print 'practical loading svm time : %.2fm ' % ((end_sec - start_sec) / 60.)

start_sec = time.time()
print "predicting"
pred_test_y = classifier.predict(test_set_x)
end_sec = time.time()
Ejemplo n.º 3
0
import cPickle
import gzip
import os
import sys
import time

import numpy
from sklearn import svm
from sklearn.externals import joblib

if not "../DataProcess/" in sys.path:
    sys.path.append("../DataProcess/")
import transform_data_to_format as tdtf

DataHome = "/home/hphp/Documents/data/Kaggle/DogVsCatData/"
train_data_set_route = DataHome + "train.csv"
test_data_set_route = DataHome + "test.csv"

train_set = tdtf.read_csv_data_to_int_list(train_data_set_route)
train_set_x, train_set_y = train_set
#print type(train_set_x),len(train_set_x),type(train_set_x[0]),len(train_set_x[0]),type(train_set_x[0][0])
#print type(train_set_y),len(train_set_y),type(train_set_y[0])
# <type 'list'> 20 <type 'list'> 6250 <type 'str'>
# <type 'list'> 20 <type 'int'>

classifier = svm.SVC()
classifier.fit(train_set_x, train_set_y)
#clf_file = open("svm.svc.cPickle","w")
#clf_pickle = cPickle.dump(classifier,clf_file)
clf_pickle = joblib.dump(classifier, DataHome + 'svm_svc_pkl/svm.svc.pkl')
Ejemplo n.º 4
0
import numpy
from sklearn import svm
from sklearn.externals import joblib


if not "../DataProcess/" in sys.path:
    sys.path.append("../DataProcess/")
import transform_data_to_format as tdtf

DataHome = "/home/hphp/Documents/data/Kaggle/DogVsCatData/"
test_data_set_route = DataHome + "test.csv"

print "reading test data"
start_sec = time.time()
test_set = tdtf.read_csv_data_to_int_list(test_data_set_route,None,0)
test_set_x , test_set_y = test_set
print len(test_set_x)
end_sec = time.time()
print 'practical reading data time : %.2fm ' % ((end_sec - start_sec) / 60.)

start_sec = time.time()
print "loading svm classifier from joblib"
classifier = joblib.load(DataHome + 'svm_svc_pkl/svm.svc.pkl' , mmap_mode = 'c')
end_sec = time.time()
print 'practical loading svm time : %.2fm ' % ((end_sec - start_sec) / 60.)


start_sec = time.time()
print "predicting"
pred_test_y = classifier.predict(test_set_x)