import gzip import os import sys import time import numpy from sklearn import svm from sklearn.externals import joblib if not "../DataProcess/" in sys.path: sys.path.append("../DataProcess/") import transform_data_to_format as tdtf DataHome = "/home/hphp/Documents/data/Kaggle/DogVsCatData/" train_data_set_route = DataHome + "train.csv" test_data_set_route = DataHome + "test.csv" train_set = tdtf.read_csv_data_to_int_list(train_data_set_route) train_set_x , train_set_y = train_set #print type(train_set_x),len(train_set_x),type(train_set_x[0]),len(train_set_x[0]),type(train_set_x[0][0]) #print type(train_set_y),len(train_set_y),type(train_set_y[0]) # <type 'list'> 20 <type 'list'> 6250 <type 'str'> # <type 'list'> 20 <type 'int'> classifier = svm.SVC() classifier.fit(train_set_x,train_set_y) #clf_file = open("svm.svc.cPickle","w") #clf_pickle = cPickle.dump(classifier,clf_file) clf_pickle = joblib.dump(classifier,DataHome + 'svm_svc_pkl/svm.svc.pkl')
import time import numpy from sklearn import svm from sklearn.externals import joblib if not "../DataProcess/" in sys.path: sys.path.append("../DataProcess/") import transform_data_to_format as tdtf DataHome = "/home/hphp/Documents/data/Kaggle/DogVsCatData/" test_data_set_route = DataHome + "test.csv" print "reading test data" start_sec = time.time() test_set = tdtf.read_csv_data_to_int_list(test_data_set_route, None, 0) test_set_x, test_set_y = test_set print len(test_set_x) end_sec = time.time() print 'practical reading data time : %.2fm ' % ((end_sec - start_sec) / 60.) start_sec = time.time() print "loading svm classifier from joblib" classifier = joblib.load(DataHome + 'svm_svc_pkl/svm.svc.pkl', mmap_mode='c') end_sec = time.time() print 'practical loading svm time : %.2fm ' % ((end_sec - start_sec) / 60.) start_sec = time.time() print "predicting" pred_test_y = classifier.predict(test_set_x) end_sec = time.time()
import cPickle import gzip import os import sys import time import numpy from sklearn import svm from sklearn.externals import joblib if not "../DataProcess/" in sys.path: sys.path.append("../DataProcess/") import transform_data_to_format as tdtf DataHome = "/home/hphp/Documents/data/Kaggle/DogVsCatData/" train_data_set_route = DataHome + "train.csv" test_data_set_route = DataHome + "test.csv" train_set = tdtf.read_csv_data_to_int_list(train_data_set_route) train_set_x, train_set_y = train_set #print type(train_set_x),len(train_set_x),type(train_set_x[0]),len(train_set_x[0]),type(train_set_x[0][0]) #print type(train_set_y),len(train_set_y),type(train_set_y[0]) # <type 'list'> 20 <type 'list'> 6250 <type 'str'> # <type 'list'> 20 <type 'int'> classifier = svm.SVC() classifier.fit(train_set_x, train_set_y) #clf_file = open("svm.svc.cPickle","w") #clf_pickle = cPickle.dump(classifier,clf_file) clf_pickle = joblib.dump(classifier, DataHome + 'svm_svc_pkl/svm.svc.pkl')
import numpy from sklearn import svm from sklearn.externals import joblib if not "../DataProcess/" in sys.path: sys.path.append("../DataProcess/") import transform_data_to_format as tdtf DataHome = "/home/hphp/Documents/data/Kaggle/DogVsCatData/" test_data_set_route = DataHome + "test.csv" print "reading test data" start_sec = time.time() test_set = tdtf.read_csv_data_to_int_list(test_data_set_route,None,0) test_set_x , test_set_y = test_set print len(test_set_x) end_sec = time.time() print 'practical reading data time : %.2fm ' % ((end_sec - start_sec) / 60.) start_sec = time.time() print "loading svm classifier from joblib" classifier = joblib.load(DataHome + 'svm_svc_pkl/svm.svc.pkl' , mmap_mode = 'c') end_sec = time.time() print 'practical loading svm time : %.2fm ' % ((end_sec - start_sec) / 60.) start_sec = time.time() print "predicting" pred_test_y = classifier.predict(test_set_x)