Esempio n. 1
0
def mulThreadpredict(test_data, model, start_index_lab=270):
    preP = PreProcess()
    test_X, test_loc_dic = preP.preProcessTestData(test_data)
    test_Xc = preP.getFeatureScaler(test_X)
    test_X = None
    length = len(test_Xc)
    # 初始化线程池
    print "cpu number:", cpu_count()
    cpus = cpu_count()
    pool = threadpool.ThreadPool(cpus - 1)
    print "test data len:", length
    print "predict result ... ... "
    list_args = []
    for i in xrange(start_index_lab, length):
        pre_result = None
        start_index = i * 1000
        end_index = (i + 1) * 1000
        if end_index > length:
            end_index = length
        args = [
            model, test_data, test_Xc, start_index, end_index, test_loc_dic
        ]
        list_args.append((args, None))
    requests = threadpool.makeRequests(threadPredict, list_args)
    [pool.putRequest(req) for req in requests]
    pool.wait()
Esempio n. 2
0
def LoadModel(model_name="predict_Model"):
    model = None
    rf = ReadCsvFile()
    model = rf.ReadValueFromFile(model_name)
    predict_result = None
    if model:
        # predict_result = model.predict_proba(test_data)
        print "Load Model success!"
    else:
        train_data = rf.ReadTrainFile()
        preProce = PreProcess()
        train_X,train_lab,train_loc_dic = preProce.preProcessTrainData(train_data)
        train_Xc =preProce.getFeatureScaler(train_data)
        model = TrainModel(train_Xc,train_lab)
        # predict_result = model.predict_proba(test_data)
    return model
Esempio n. 3
0
def LoadModel(model_name="predict_Model_new"):
    model = None
    rf = ReadCsvFile()
    try:
        model = rf.ReadValueFromFile(model_name)
        print "Load Model success!"
    except:
        train_data = rf.ReadTrainFile()
        print len(train_data)
        preP = PreProcess()
        train_X,train_lab,loc_dic = preP.preProcessTrainData(train_data)
        train_Xc = preP.getFeatureScaler(train_X)
        print "Train model"
        model = TrainModel(train_Xc,train_lab)
        logging.info("save model")
        wr = WriteResult()
        wr.WriteValueToFile(model,model_name)
    return model
Esempio n. 4
0
def LoadModel(model_name="predict_Model_new"):
    model = None
    rf = ReadCsvFile()
    try:
        model = rf.ReadValueFromFile(model_name)
        print "Load Model success!"
    except:
        train_data = rf.ReadTrainFile()
        print len(train_data)
        preP = PreProcess()
        train_X, train_lab, loc_dic = preP.preProcessTrainData(train_data)
        train_Xc = preP.getFeatureScaler(train_X)
        print "Train model"
        model = TrainModel(train_Xc, train_lab)
        logging.info("save model")
        wr = WriteResult()
        wr.WriteValueToFile(model, model_name)
    return model
Esempio n. 5
0
def main():
    print "Start......"
    rf = ReadCsvFile()
    # train_data = rf.ReadTrainFile()
    preP = PreProcess()
    # train_X,train_lab,loc_dic = preP.preProcessTrainData(train_data)
    # train_Xc = preP.getFeatureScaler(train_X)
    # print "Train model"
    # TrainModel(train_Xc,train_lab)
    # train_lab = None
    # loc_dic = None
    # train_data = None
    # train_X = None
    # train_Xc = None
    # with open(Config.ResultDataPath+"result.csv","w") as fp:
    #     print "清空文件","result.csv"
    model = LoadModel()
    # print "Read data predict ... ... "
    test_data = rf.ReadTestFile()
    test_X,test_loc_dic = preP.preProcessTestData(test_data)
    test_Xc = preP.getFeatureScaler(test_X)
    test_X = None
    length = len(test_Xc)
    print "test data len:",length
    print "predict result ... ... "
    for i in xrange(104,length):
        pre_result = None
        start_index = i * 100
        end_index = (i+1) * 100
        if end_index > length:
            end_index = length
        start_time = time.time()
        pre_result = predict(model,test_Xc[start_index:end_index])
        end_time = time.time()
        print "predict from %d to %d. \n cost time :%lf s\n" % (start_index, end_index,end_time-start_time)
        start_time = time.time()
        result = transformResult(pre_result, test_loc_dic)
        logging.info("predict from %d to %d. \ncost time :%lf s" % (start_index, end_index,end_time-start_time))
        wr = WriteResult()
        wr.WriteResultAnswer(test_data[start_index:end_index], result,Config.ResultDataPath+"result_1.csv")
        end_time = time.time()
        logging.info("transform index from %d to %d. \ncost time :%lf s" % (start_index, end_index,end_time-start_time))
        print "transform index from %d to %d. \n cost time :%lf s\n" % (start_index, end_index,end_time-start_time)
Esempio n. 6
0
def main():
    print "Start......"
    rf = ReadCsvFile()
    train_data = rf.ReadTrainFile()
    preP = PreProcess()
    train_X,train_lab,loc_dic = preP.preProcessTrainData(train_data)
    train_Xc = preP.getFeatureScaler(train_X)
    # print "Train model"
    # TrainModel(train_Xc,train_lab)
    # train_lab = None
    # loc_dic = None
    # train_data = None
    # train_X = None
    # train_Xc = None
    # with open(Config.ResultDataPath+"result.csv","w") as fp:
    #     print "清空文件","result.csv"
    model = LoadModel()
    print model
    print "精确率:{0}".format(model.score(train_Xc[2000:3000],train_lab[2000:3000]))
Esempio n. 7
0
def main():
    print "Start......"
    rf = ReadCsvFile()
    train_data = rf.ReadTrainFile()
    preP = PreProcess()
    train_X, train_lab, loc_dic = preP.preProcessTrainData(train_data)
    train_Xc = preP.getFeatureScaler(train_X)
    # print "Train model"
    # TrainModel(train_Xc,train_lab)
    # train_lab = None
    # loc_dic = None
    # train_data = None
    # train_X = None
    # train_Xc = None
    # with open(Config.ResultDataPath+"result.csv","w") as fp:
    #     print "清空文件","result.csv"
    model = LoadModel()
    print model
    print "精确率:{0}".format(
        model.score(train_Xc[2000:3000], train_lab[2000:3000]))
Esempio n. 8
0
def mulThreadpredict(test_data,model,start_index_lab=270):
    preP = PreProcess()
    test_X, test_loc_dic = preP.preProcessTestData(test_data)
    test_Xc = preP.getFeatureScaler(test_X)
    test_X = None
    length = len(test_Xc)
    # 初始化线程池
    print "cpu number:", cpu_count()
    cpus = cpu_count()
    pool = threadpool.ThreadPool(cpus-1)
    print "test data len:", length
    print "predict result ... ... "
    list_args = []
    for i in xrange(start_index_lab, length):
        pre_result = None
        start_index = i * 1000
        end_index = (i + 1) * 1000
        if end_index > length:
            end_index = length
        args = [model,test_data,test_Xc,start_index,end_index,test_loc_dic]
        list_args.append((args,None))
    requests = threadpool.makeRequests(threadPredict, list_args)
    [pool.putRequest(req) for req in requests]
    pool.wait()
Esempio n. 9
0
    def TestReadFile(self):
        # rcf = ReadCsvFile()
        # start_r = time.time()
        # trainData = rcf.ReadTrainFile()
        # end_t = time.time()
        # print "read train data cost time :",(end_t-start_r)
        # print len(trainData)
        # for re in trainData:
        #     print re
        # start_r = time.time()
        # testData = rcf.ReadTestFile()
        # end_t = time.time()
        # print "read test data cost time :", (end_t - start_r)
        # for re in testData:
        #     print re
        wr = WriteResult()
        rf = ReadCsvFile()
        train_X = rf.ReadValueFromFile("trainData_X")
        train_Y = rf.ReadValueFromFile("trainData_Y")
        train_loc_dic = rf.ReadValueFromFile("train_loc_dic")
        # print len(testData)
        max_min_scaler = preprocessing.MinMaxScaler()
        train_XM = numpy.array(train_X)
        train_Xc = max_min_scaler.fit_transform(train_XM)

        wr.WriteValueToFile(train_Xc, )
        print train_Xc

        preProce = PreProcess()
        # test_X,test_loc_dic = preProce.preProcessTestData(testData)
        test_X = rf.ReadValueFromFile("testData_X")
        test_loc_dic = rf.ReadValueFromFile("test_loc_dic")
        # X,Y,loc_dic = preProce.preProcess(trainData)
        print "X size:", len(test_X)
        print test_X[0], len(test_loc_dic.keys())
        # for i in xrange(len(X)):
        test_XM = numpy.array(test_X)
        test_Xc = max_min_scaler.fit_transform(test_XM)
        print
        print test_Xc