def Exchange(path): """ 交换指定文件中lat 和long, 清理空文件夹和空文件 :param path: 文件路径,检查此文件路径下的子文件 :return: None """ walk_results = os.walk(path) # 遍历路径下所有文件夹 for root, dirs, files in walk_results: for file in files: print(file) # 如果是文件 doc_handle = codecs.open(root + '\\' + file, 'r', encoding='utf-8') # 打开文件 lines = doc_handle.readlines() doc_handle.close() new_root = root.replace('StayPoint', 'E_StayPoint', 1) + '\\' mkdir(new_root) new_path = new_root + file print(new_path) destination_handle = codecs.open(new_path, 'w', encoding='utf-8') list_temp = list() # 存入数组 for line in lines: temp_p = w_StayPoint(line) list_temp.append(temp_p) # 按照时间对数组排序 # list_temp = sorted(list_temp, key=lambda point: point.date_time) for p in list_temp: latitude = p.lat longtitude = p.long arv = p.arv lev = p.lev destination_handle.writelines(longtitude + ',' + latitude + ',' + arv + ',' + lev + '\r\n') list_temp.clear() destination_handle.close() print(path, 'Exchange over!')
def Sorter(path): """ Sort spatial data, :param path: 文件路径,检查此文件路径下的子文件 :return: None """ walk_results = os.walk(path) # 遍历路径下所有文件夹 for root, dirs, files in walk_results: for file in files: print(file) # 如果是文件 doc_handle = codecs.open(root + '\\' + file, 'r', encoding='utf-8') # 打开文件 lines = doc_handle.readlines() doc_handle.close() new_root = root.replace('Hours', 'S_Hours', 1) + '\\' mkdir(new_root) new_path = new_root + file print(new_path) destination_handle = codecs.open(new_path, 'w', encoding='utf-8') list_temp = list() # 存入数组 for line in lines: temp_p = Point(line) list_temp.append(temp_p) # 按照时间对数组排序 list_temp = sorted(list_temp, key=lambda point: point.date_time) for p in list_temp: latitude = p.latitude longitude = p.longitude id = p.id time = p.date_time destination_handle.writelines( id + ',' + str(longitude) + ',' + str(latitude) + ',' + str(time) + '\r\n') list_temp.clear() destination_handle.close() print(path, 'Sort over!')
from Format.T_Drive import mkdir ''' 按照小时切割文件 ''' root_path = "E:\DataSets\Preprocessed\Days\T-Drive\\" destination_folder = "E:\DataSets\Preprocessed\Hours\T-Drive\\" if __name__ == '__main__': print(os.path.exists(root_path)) walk_results = os.walk(root_path) # 遍历路径下所有文件夹 for root, dirs, files in walk_results: if len(dirs) > 0: for dir in dirs: for i in range(0, 24): mkdir(destination_folder + dir + '\\' + str(i)) if len(files) > 0: # 针对每个独立文件 for file in files: hour_list = list(list() for i in range(0, 24)) # 打开文件 path = root + '\\' + file file_handle = codecs.open(path, 'r', encoding='utf-8') lines = file_handle.readlines() # 针对每行数据 for line in lines: # 获取该行数据小时数 hour = re.search("([0-9]+)\:", line)[1] I_hour = int(hour) # 根据小时数存入24维数组
if __name__ == '__main__': # 定义输入输出路径 root_path = "E:\DataSets\Preprocessed\Days\Feb\\" destination_folder = "E:\DataSets\Preprocessed\StayPoint\Feb\\" # 检验输入路径 print(os.path.exists(root_path)) # 遍历输入路径 walk_results = os.walk(root_path) # 遍历路径下所有文件夹 for root, dirs, files in walk_results: if len(dirs) > 0: for dir in dirs: new_folder = (destination_folder + dir).replace( 'Days', 'StayPoint', 1) mkdir(new_folder) # 当遍历到叶节点层时,返回所有文件 if len(files) > 0: # 遍历当前文件夹所有文件 for file in files: # 初始化用于存储point的列表 list_temp = list() # 打开并读入某车某日的Trajectory轨迹 path = root + '\\' + file file_handle = codecs.open(path, 'r', encoding='utf-8') lines = file_handle.readlines() file_handle.close() new_path = root.replace('Days', 'StayPoint', 1) + '\\' + file print(new_path) destination_handle = codecs.open(new_path,
''' 根据驻留点切割文件 ''' root_path = "E:\DataSets\Preprocessed\Days\Feb\\" walk_results = os.walk(root_path) # 对于每一个day 都要创建对应的文件(即使不切割也要排序) for root, dirs, files in walk_results: # 当遍历到叶节点层时,返回所有文件 for file in files: # 如果是文件,读入该文件 doc_handle = codecs.open(root + '\\' + file, 'r', encoding='utf-8') # 打开文件 lines = doc_handle.readlines() doc_handle.close() # 生成对应的 new_root = root.replace('Days', 'Path_Days', 1) + '\\' mkdir(new_root) list_temp = list() # 存入数组 for line in lines: temp_p = Point(line) list_temp.append(temp_p) # 按照时间对数组排序 list_temp = sorted(list_temp, key=lambda point: point.date_time) # 判断是否需要通过staypoint切割; stay_point_root = root.replace('Days', 'StayPoint', 1) + '\\' stay_point_path = stay_point_root + file if os.path.exists(stay_point_path): stay_point_handle = codecs.open(stay_point_path, 'r', encoding='utf-8') sp_lines = stay_point_handle.readlines() sp_index = 0 for sp_line in sp_lines:
match_objects = re.split(';', line_object) Feb_id = match_objects[0] Feb_date_time = re.split(' ', match_objects[1]) Feb_date = Feb_date_time[0] Feb_time = Feb_date_time[1] points = re.split(' ', re.search("\((.+?)\)", match_objects[2])[1]) Feb_longitude = points[0] Feb_latitude = points[1] # print(Feb_id, Feb_longitude, Feb_latitude, match_objects[1]) if last_date == Feb_date: list_temp.append([Feb_id, Feb_longitude, Feb_latitude, match_objects[1]]) else: # 输出上一个 # 生成date文件夹 _destination_folder = mkdir(destination_folder + last_date) # id标识 last_id = 'start' # 用于存放相同id条目的数组 list_sameid_temp = [] # 数组排序 list_temp.sort() # 遍历同date的数组 for item in list_temp: if item[0] == last_id: list_sameid_temp.append(item) else: if last_id != 'start': print(item[0], len(list_sameid_temp)) # 生成同id,date文件 destination_address = destination_folder + last_date + '\\' + last_id + '_' + last_date + '.txt'
id_name_handle = codecs.open(dict_cab, 'w', encoding='utf-8') for root, dirs, files in walk_results: index = 0 # 每个id对应一个文件 for file in files: index += 1 id_name_handle.writelines(str(index) + '\t' + file + '\r\n') file_handle = codecs.open(root + file, "r", encoding='utf-8') line_objects = file_handle.readlines() # 每个时间对应一行 for line in line_objects: # 解析每一行 match_objects = re.split(' ', line.strip('\r\n')) latitude = match_objects[0] longitude = match_objects[1] datetime = timestamp_datetime(int(match_objects[3])) date = re.split(' ', datetime)[0] # 生成目标文件夹 _destination_folder = destination_folder + date mkdir(_destination_folder) destination_file_address = _destination_folder + '\\' + str( index) + '_' + date + '.txt' destination_file_handle = codecs.open(destination_file_address, 'a+', encoding='utf-8') destination_file_handle.writelines( str(index) + ',' + longitude + ',' + latitude + ',' + datetime + '\r\n') destination_file_handle.close() print(file)