Example #1
0
def order_count_contact(target='all'):
    print('start order_count_contact() method')
    df_dataSet = pd.DataFrame()
    try:
        df_dataSet = pd.read_csv('E:\\data\\DiDiData\\data_csv\\dataset\\'+target+'set.csv')
    except FileNotFoundError:
        dir_path = 'E:\\data\\DiDiData\\data_csv\\order_count_30min\\order_count_replaceTimeBand\\'
        file_list = ft.listdir_nohidden(dir_path)
        file_list.sort()

        start = 0
        end = len(file_list)
        if target == 'train':
            end = 17
        elif target == 'test':
            start = 17

        for i in range(start, end):
            file_path = os.path.join(dir_path, file_list[i])
            df = pd.read_csv(file_path)
            df.rename(columns={'half_hour': 'time'}, inplace=True)
            df.set_index(keys=['start_district_id', 'dest_district_id', 'date', 'time'], inplace=True)
            if i == start:
                df_dataSet = df
            else:
                df_dataSet = pd.concat([df_dataSet, df], axis=0)

    print('end order_count_contact() method')
    return df_dataSet
Example #2
0
        # print(df.dtypes)

        if i == 0:
            df_out = df
        else:
            # df_out = df_out.append(df, ignore_index=True)
            df_out = df_out.append(df)
        print(fileDate)

        # 最后一天的数据加入后
        if i == len(fileList) - 1:
            # if i == 1:
            # df_out = df_out.unstack(0)
            destPath = 'E:\\data\\DiDiData\\data_csv\\order_count_totalCity\\totalFlow_60min.csv'
            df_out.to_csv(destPath)


if __name__ == "__main__":

    time_Now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print('current time:', time_Now)
    dirPath = 'E:\\data\\DiDiData\\data_csv\\order_lite\\'
    fileList = ft.listdir_nohidden(dirPath)
    fileList.sort()

    time_interval = timedelta(minutes=60)
    threshold = 0
    # 调用方法获取满足时间间隔和阈值的 order_count_totalCity , 并写入 csv文件
    flow_statistics_totalCity(fileList, time_interval, threshold)