def order_count_contact(target='all'): print('start order_count_contact() method') df_dataSet = pd.DataFrame() try: df_dataSet = pd.read_csv('E:\\data\\DiDiData\\data_csv\\dataset\\'+target+'set.csv') except FileNotFoundError: dir_path = 'E:\\data\\DiDiData\\data_csv\\order_count_30min\\order_count_replaceTimeBand\\' file_list = ft.listdir_nohidden(dir_path) file_list.sort() start = 0 end = len(file_list) if target == 'train': end = 17 elif target == 'test': start = 17 for i in range(start, end): file_path = os.path.join(dir_path, file_list[i]) df = pd.read_csv(file_path) df.rename(columns={'half_hour': 'time'}, inplace=True) df.set_index(keys=['start_district_id', 'dest_district_id', 'date', 'time'], inplace=True) if i == start: df_dataSet = df else: df_dataSet = pd.concat([df_dataSet, df], axis=0) print('end order_count_contact() method') return df_dataSet
# print(df.dtypes) if i == 0: df_out = df else: # df_out = df_out.append(df, ignore_index=True) df_out = df_out.append(df) print(fileDate) # 最后一天的数据加入后 if i == len(fileList) - 1: # if i == 1: # df_out = df_out.unstack(0) destPath = 'E:\\data\\DiDiData\\data_csv\\order_count_totalCity\\totalFlow_60min.csv' df_out.to_csv(destPath) if __name__ == "__main__": time_Now = datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('current time:', time_Now) dirPath = 'E:\\data\\DiDiData\\data_csv\\order_lite\\' fileList = ft.listdir_nohidden(dirPath) fileList.sort() time_interval = timedelta(minutes=60) threshold = 0 # 调用方法获取满足时间间隔和阈值的 order_count_totalCity , 并写入 csv文件 flow_statistics_totalCity(fileList, time_interval, threshold)