Esempio n. 1
0
    def preProcessTestData(self, DataArrays):
        X = []
        rf = ReadCsvFile()
        loc_dic = rf.ReadValueFromFile("train_loc_dic")
        index = len(loc_dic.keys())
        for Data in DataArrays:
            len_items = len(Data)
            data = []
            data.append(int(Data[3]))
            time_s = Data[4].split(" ")
            time_items = time_s[0].split("-")
            time_data_items = [int(x) for x in time_items]
            time_data_items.extend([0, 0, 0, 0, 0, 0])
            time_data_val = time.mktime(time_data_items)
            time_items.extend(time_s[1].split(":"))
            time_items = [int(float(x)) for x in time_items]
            time_items.extend([0, 0, 0])
            time_val = time.mktime(time_items)
            time_val = time_val - time_data_val
            # print time_val
            loc_place = Data[5]
            if loc_dic.has_key(loc_place) == False:
                loc_dic[loc_place] = index
                index += 1
            loc_index = loc_dic[loc_place]
            data.append(time_val)
            data.append(loc_index)
            X.append(data)

        wr = WriteResult()
        wr.WriteValueToFile(X, "testData_X")
        wr.WriteValueToFile(loc_dic, "test_loc_dic")
        return X, loc_dic
Esempio n. 2
0
def threadPredict(model,test_data,test_Xc,start_index,end_index,test_loc_dic):
    start_time = time.time()
    pre_result = predict(model, test_Xc[start_index:end_index])
    end_time = time.time()

    print "predict from %d to %d. \n cost time :%lf s\n" % (start_index, end_index, end_time - start_time)
    start_time1 = time.time()
    result = transformResult(pre_result, test_loc_dic)
    if mu.acquire(True):
        wr = WriteResult()
        wr.WriteResultAnswer(test_data[start_index:end_index], result,Config.ResultDataPath+"submission_1.csv")
        mu.release()
    end_time1 = time.time()
    print "transform index from %d to %d. \n cost time :%lf s\n" % (start_index, end_index, end_time1 - start_time1)
Esempio n. 3
0
def LoadModel(model_name="predict_Model_new"):
    model = None
    rf = ReadCsvFile()
    try:
        model = rf.ReadValueFromFile(model_name)
        print "Load Model success!"
    except:
        train_data = rf.ReadTrainFile()
        print len(train_data)
        preP = PreProcess()
        train_X, train_lab, loc_dic = preP.preProcessTrainData(train_data)
        train_Xc = preP.getFeatureScaler(train_X)
        print "Train model"
        model = TrainModel(train_Xc, train_lab)
        logging.info("save model")
        wr = WriteResult()
        wr.WriteValueToFile(model, model_name)
    return model
Esempio n. 4
0
def main():
    print "Start......"
    rf = ReadCsvFile()
    # train_data = rf.ReadTrainFile()
    preP = PreProcess()
    # train_X,train_lab,loc_dic = preP.preProcessTrainData(train_data)
    # train_Xc = preP.getFeatureScaler(train_X)
    # print "Train model"
    # TrainModel(train_Xc,train_lab)
    # train_lab = None
    # loc_dic = None
    # train_data = None
    # train_X = None
    # train_Xc = None
    # with open(Config.ResultDataPath+"result.csv","w") as fp:
    #     print "清空文件","result.csv"
    model = LoadModel()
    # print "Read data predict ... ... "
    test_data = rf.ReadTestFile()
    test_X,test_loc_dic = preP.preProcessTestData(test_data)
    test_Xc = preP.getFeatureScaler(test_X)
    test_X = None
    length = len(test_Xc)
    print "test data len:",length
    print "predict result ... ... "
    for i in xrange(104,length):
        pre_result = None
        start_index = i * 100
        end_index = (i+1) * 100
        if end_index > length:
            end_index = length
        start_time = time.time()
        pre_result = predict(model,test_Xc[start_index:end_index])
        end_time = time.time()
        print "predict from %d to %d. \n cost time :%lf s\n" % (start_index, end_index,end_time-start_time)
        start_time = time.time()
        result = transformResult(pre_result, test_loc_dic)
        logging.info("predict from %d to %d. \ncost time :%lf s" % (start_index, end_index,end_time-start_time))
        wr = WriteResult()
        wr.WriteResultAnswer(test_data[start_index:end_index], result,Config.ResultDataPath+"result_1.csv")
        end_time = time.time()
        logging.info("transform index from %d to %d. \ncost time :%lf s" % (start_index, end_index,end_time-start_time))
        print "transform index from %d to %d. \n cost time :%lf s\n" % (start_index, end_index,end_time-start_time)
Esempio n. 5
0
    def preProcessTrainData(self, DataArrays):
        X = []
        Y = []
        loc_dic = {}
        index = 1
        for Data in DataArrays:
            len_items = len(Data)
            # Y.append(Data[len_items-1])
            data = []
            data.append(int(Data[3]))
            time_s = Data[4].split(" ")
            time_items = time_s[0].split("-")
            time_data_items = [int(x) for x in time_items]
            time_data_items.extend([0, 0, 0, 0, 0, 0])
            time_data_val = time.mktime(time_data_items)
            time_items.extend(time_s[1].split(":"))
            time_items = [int(x) for x in time_items]
            time_items.extend([0, 0, 0])
            time_val = time.mktime(time_items)
            time_val = time_val - time_data_val
            # print time_val
            loc_place = Data[5]
            if loc_dic.has_key(loc_place) == False:
                loc_dic[loc_place] = index
                index += 1
            loc_index = loc_dic[loc_place]
            data.append(time_val)
            data.append(loc_index)
            X.append(data)

            # 处理 Y
            des_loc_place = Data[6]
            if not loc_dic.has_key(des_loc_place):
                loc_dic[des_loc_place] = index
                index += 1
            Y.append(loc_dic[des_loc_place])
        wr = WriteResult()
        wr.WriteValueToFile(X, "trainData_X")
        wr.WriteValueToFile(Y, "trainData_Y")
        wr.WriteValueToFile(loc_dic, "train_loc_dic")
        return X, Y, loc_dic
Esempio n. 6
0
    def TestReadFile(self):
        # rcf = ReadCsvFile()
        # start_r = time.time()
        # trainData = rcf.ReadTrainFile()
        # end_t = time.time()
        # print "read train data cost time :",(end_t-start_r)
        # print len(trainData)
        # for re in trainData:
        #     print re
        # start_r = time.time()
        # testData = rcf.ReadTestFile()
        # end_t = time.time()
        # print "read test data cost time :", (end_t - start_r)
        # for re in testData:
        #     print re
        wr = WriteResult()
        rf = ReadCsvFile()
        train_X = rf.ReadValueFromFile("trainData_X")
        train_Y = rf.ReadValueFromFile("trainData_Y")
        train_loc_dic = rf.ReadValueFromFile("train_loc_dic")
        # print len(testData)
        max_min_scaler = preprocessing.MinMaxScaler()
        train_XM = numpy.array(train_X)
        train_Xc = max_min_scaler.fit_transform(train_XM)

        wr.WriteValueToFile(train_Xc, )
        print train_Xc

        preProce = PreProcess()
        # test_X,test_loc_dic = preProce.preProcessTestData(testData)
        test_X = rf.ReadValueFromFile("testData_X")
        test_loc_dic = rf.ReadValueFromFile("test_loc_dic")
        # X,Y,loc_dic = preProce.preProcess(trainData)
        print "X size:", len(test_X)
        print test_X[0], len(test_loc_dic.keys())
        # for i in xrange(len(X)):
        test_XM = numpy.array(test_X)
        test_Xc = max_min_scaler.fit_transform(test_XM)
        print
        print test_Xc
Esempio n. 7
0
def TrainModel(train_data,train_lab):
    model = GaussianNB()
    model.fit(train_data,train_lab)
    wr = WriteResult()
    wr.WriteValueToFile(model,"predict_Model")
    return model