def mulThreadpredict(test_data, model, start_index_lab=270): preP = PreProcess() test_X, test_loc_dic = preP.preProcessTestData(test_data) test_Xc = preP.getFeatureScaler(test_X) test_X = None length = len(test_Xc) # 初始化线程池 print "cpu number:", cpu_count() cpus = cpu_count() pool = threadpool.ThreadPool(cpus - 1) print "test data len:", length print "predict result ... ... " list_args = [] for i in xrange(start_index_lab, length): pre_result = None start_index = i * 1000 end_index = (i + 1) * 1000 if end_index > length: end_index = length args = [ model, test_data, test_Xc, start_index, end_index, test_loc_dic ] list_args.append((args, None)) requests = threadpool.makeRequests(threadPredict, list_args) [pool.putRequest(req) for req in requests] pool.wait()
def LoadModel(model_name="predict_Model"): model = None rf = ReadCsvFile() model = rf.ReadValueFromFile(model_name) predict_result = None if model: # predict_result = model.predict_proba(test_data) print "Load Model success!" else: train_data = rf.ReadTrainFile() preProce = PreProcess() train_X,train_lab,train_loc_dic = preProce.preProcessTrainData(train_data) train_Xc =preProce.getFeatureScaler(train_data) model = TrainModel(train_Xc,train_lab) # predict_result = model.predict_proba(test_data) return model
def LoadModel(model_name="predict_Model_new"): model = None rf = ReadCsvFile() try: model = rf.ReadValueFromFile(model_name) print "Load Model success!" except: train_data = rf.ReadTrainFile() print len(train_data) preP = PreProcess() train_X,train_lab,loc_dic = preP.preProcessTrainData(train_data) train_Xc = preP.getFeatureScaler(train_X) print "Train model" model = TrainModel(train_Xc,train_lab) logging.info("save model") wr = WriteResult() wr.WriteValueToFile(model,model_name) return model
def LoadModel(model_name="predict_Model_new"): model = None rf = ReadCsvFile() try: model = rf.ReadValueFromFile(model_name) print "Load Model success!" except: train_data = rf.ReadTrainFile() print len(train_data) preP = PreProcess() train_X, train_lab, loc_dic = preP.preProcessTrainData(train_data) train_Xc = preP.getFeatureScaler(train_X) print "Train model" model = TrainModel(train_Xc, train_lab) logging.info("save model") wr = WriteResult() wr.WriteValueToFile(model, model_name) return model
def main(): print "Start......" rf = ReadCsvFile() # train_data = rf.ReadTrainFile() preP = PreProcess() # train_X,train_lab,loc_dic = preP.preProcessTrainData(train_data) # train_Xc = preP.getFeatureScaler(train_X) # print "Train model" # TrainModel(train_Xc,train_lab) # train_lab = None # loc_dic = None # train_data = None # train_X = None # train_Xc = None # with open(Config.ResultDataPath+"result.csv","w") as fp: # print "清空文件","result.csv" model = LoadModel() # print "Read data predict ... ... " test_data = rf.ReadTestFile() test_X,test_loc_dic = preP.preProcessTestData(test_data) test_Xc = preP.getFeatureScaler(test_X) test_X = None length = len(test_Xc) print "test data len:",length print "predict result ... ... " for i in xrange(104,length): pre_result = None start_index = i * 100 end_index = (i+1) * 100 if end_index > length: end_index = length start_time = time.time() pre_result = predict(model,test_Xc[start_index:end_index]) end_time = time.time() print "predict from %d to %d. \n cost time :%lf s\n" % (start_index, end_index,end_time-start_time) start_time = time.time() result = transformResult(pre_result, test_loc_dic) logging.info("predict from %d to %d. \ncost time :%lf s" % (start_index, end_index,end_time-start_time)) wr = WriteResult() wr.WriteResultAnswer(test_data[start_index:end_index], result,Config.ResultDataPath+"result_1.csv") end_time = time.time() logging.info("transform index from %d to %d. \ncost time :%lf s" % (start_index, end_index,end_time-start_time)) print "transform index from %d to %d. \n cost time :%lf s\n" % (start_index, end_index,end_time-start_time)
def main(): print "Start......" rf = ReadCsvFile() train_data = rf.ReadTrainFile() preP = PreProcess() train_X,train_lab,loc_dic = preP.preProcessTrainData(train_data) train_Xc = preP.getFeatureScaler(train_X) # print "Train model" # TrainModel(train_Xc,train_lab) # train_lab = None # loc_dic = None # train_data = None # train_X = None # train_Xc = None # with open(Config.ResultDataPath+"result.csv","w") as fp: # print "清空文件","result.csv" model = LoadModel() print model print "精确率:{0}".format(model.score(train_Xc[2000:3000],train_lab[2000:3000]))
def main(): print "Start......" rf = ReadCsvFile() train_data = rf.ReadTrainFile() preP = PreProcess() train_X, train_lab, loc_dic = preP.preProcessTrainData(train_data) train_Xc = preP.getFeatureScaler(train_X) # print "Train model" # TrainModel(train_Xc,train_lab) # train_lab = None # loc_dic = None # train_data = None # train_X = None # train_Xc = None # with open(Config.ResultDataPath+"result.csv","w") as fp: # print "清空文件","result.csv" model = LoadModel() print model print "精确率:{0}".format( model.score(train_Xc[2000:3000], train_lab[2000:3000]))
def mulThreadpredict(test_data,model,start_index_lab=270): preP = PreProcess() test_X, test_loc_dic = preP.preProcessTestData(test_data) test_Xc = preP.getFeatureScaler(test_X) test_X = None length = len(test_Xc) # 初始化线程池 print "cpu number:", cpu_count() cpus = cpu_count() pool = threadpool.ThreadPool(cpus-1) print "test data len:", length print "predict result ... ... " list_args = [] for i in xrange(start_index_lab, length): pre_result = None start_index = i * 1000 end_index = (i + 1) * 1000 if end_index > length: end_index = length args = [model,test_data,test_Xc,start_index,end_index,test_loc_dic] list_args.append((args,None)) requests = threadpool.makeRequests(threadPredict, list_args) [pool.putRequest(req) for req in requests] pool.wait()
def TestReadFile(self): # rcf = ReadCsvFile() # start_r = time.time() # trainData = rcf.ReadTrainFile() # end_t = time.time() # print "read train data cost time :",(end_t-start_r) # print len(trainData) # for re in trainData: # print re # start_r = time.time() # testData = rcf.ReadTestFile() # end_t = time.time() # print "read test data cost time :", (end_t - start_r) # for re in testData: # print re wr = WriteResult() rf = ReadCsvFile() train_X = rf.ReadValueFromFile("trainData_X") train_Y = rf.ReadValueFromFile("trainData_Y") train_loc_dic = rf.ReadValueFromFile("train_loc_dic") # print len(testData) max_min_scaler = preprocessing.MinMaxScaler() train_XM = numpy.array(train_X) train_Xc = max_min_scaler.fit_transform(train_XM) wr.WriteValueToFile(train_Xc, ) print train_Xc preProce = PreProcess() # test_X,test_loc_dic = preProce.preProcessTestData(testData) test_X = rf.ReadValueFromFile("testData_X") test_loc_dic = rf.ReadValueFromFile("test_loc_dic") # X,Y,loc_dic = preProce.preProcess(trainData) print "X size:", len(test_X) print test_X[0], len(test_loc_dic.keys()) # for i in xrange(len(X)): test_XM = numpy.array(test_X) test_Xc = max_min_scaler.fit_transform(test_XM) print print test_Xc