def Exchange(path):
    """
    交换指定文件中lat 和long, 清理空文件夹和空文件
    :param path: 文件路径,检查此文件路径下的子文件
    :return: None
    """
    walk_results = os.walk(path)
    # 遍历路径下所有文件夹
    for root, dirs, files in walk_results:
        for file in files:
            print(file)
            # 如果是文件
            doc_handle = codecs.open(root + '\\' + file, 'r',
                                     encoding='utf-8')  # 打开文件
            lines = doc_handle.readlines()
            doc_handle.close()
            new_root = root.replace('StayPoint', 'E_StayPoint', 1) + '\\'
            mkdir(new_root)
            new_path = new_root + file
            print(new_path)
            destination_handle = codecs.open(new_path, 'w', encoding='utf-8')
            list_temp = list()
            # 存入数组
            for line in lines:
                temp_p = w_StayPoint(line)
                list_temp.append(temp_p)
            # 按照时间对数组排序
            # list_temp = sorted(list_temp, key=lambda point: point.date_time)
            for p in list_temp:
                latitude = p.lat
                longtitude = p.long
                arv = p.arv
                lev = p.lev
                destination_handle.writelines(longtitude + ',' + latitude +
                                              ',' + arv + ',' + lev + '\r\n')
            list_temp.clear()
            destination_handle.close()

    print(path, 'Exchange over!')
def Sorter(path):
    """
    Sort spatial data,
    :param path: 文件路径,检查此文件路径下的子文件
    :return: None
    """
    walk_results = os.walk(path)
    # 遍历路径下所有文件夹
    for root, dirs, files in walk_results:
        for file in files:
            print(file)
            # 如果是文件
            doc_handle = codecs.open(root + '\\' + file, 'r', encoding='utf-8')  # 打开文件
            lines = doc_handle.readlines()
            doc_handle.close()
            new_root = root.replace('Hours', 'S_Hours', 1) + '\\'
            mkdir(new_root)
            new_path = new_root + file
            print(new_path)
            destination_handle = codecs.open(new_path, 'w', encoding='utf-8')
            list_temp = list()
            # 存入数组
            for line in lines:
                temp_p = Point(line)
                list_temp.append(temp_p)
            # 按照时间对数组排序
            list_temp = sorted(list_temp, key=lambda point: point.date_time)
            for p in list_temp:
                latitude = p.latitude
                longitude = p.longitude
                id = p.id
                time = p.date_time
                destination_handle.writelines(
                    id + ',' + str(longitude) + ',' + str(latitude) + ',' + str(time) + '\r\n')
            list_temp.clear()
            destination_handle.close()

    print(path, 'Sort over!')
from Format.T_Drive import mkdir
'''
按照小时切割文件
'''
root_path = "E:\DataSets\Preprocessed\Days\T-Drive\\"
destination_folder = "E:\DataSets\Preprocessed\Hours\T-Drive\\"

if __name__ == '__main__':
    print(os.path.exists(root_path))
    walk_results = os.walk(root_path)
    # 遍历路径下所有文件夹
    for root, dirs, files in walk_results:
        if len(dirs) > 0:
            for dir in dirs:
                for i in range(0, 24):
                    mkdir(destination_folder + dir + '\\' + str(i))

        if len(files) > 0:
            # 针对每个独立文件
            for file in files:
                hour_list = list(list() for i in range(0, 24))
                # 打开文件
                path = root + '\\' + file
                file_handle = codecs.open(path, 'r', encoding='utf-8')
                lines = file_handle.readlines()
                # 针对每行数据
                for line in lines:
                    # 获取该行数据小时数
                    hour = re.search("([0-9]+)\:", line)[1]
                    I_hour = int(hour)
                    # 根据小时数存入24维数组
Example #4
0
if __name__ == '__main__':
    # 定义输入输出路径
    root_path = "E:\DataSets\Preprocessed\Days\Feb\\"
    destination_folder = "E:\DataSets\Preprocessed\StayPoint\Feb\\"
    # 检验输入路径
    print(os.path.exists(root_path))
    # 遍历输入路径
    walk_results = os.walk(root_path)
    # 遍历路径下所有文件夹
    for root, dirs, files in walk_results:
        if len(dirs) > 0:
            for dir in dirs:
                new_folder = (destination_folder + dir).replace(
                    'Days', 'StayPoint', 1)
                mkdir(new_folder)
        # 当遍历到叶节点层时,返回所有文件
        if len(files) > 0:
            # 遍历当前文件夹所有文件
            for file in files:
                # 初始化用于存储point的列表
                list_temp = list()
                # 打开并读入某车某日的Trajectory轨迹
                path = root + '\\' + file
                file_handle = codecs.open(path, 'r', encoding='utf-8')
                lines = file_handle.readlines()
                file_handle.close()

                new_path = root.replace('Days', 'StayPoint', 1) + '\\' + file
                print(new_path)
                destination_handle = codecs.open(new_path,
'''
根据驻留点切割文件
'''
root_path = "E:\DataSets\Preprocessed\Days\Feb\\"
walk_results = os.walk(root_path)
# 对于每一个day 都要创建对应的文件(即使不切割也要排序)
for root, dirs, files in walk_results:
    # 当遍历到叶节点层时,返回所有文件
    for file in files:
        # 如果是文件,读入该文件
        doc_handle = codecs.open(root + '\\' + file, 'r', encoding='utf-8')  # 打开文件
        lines = doc_handle.readlines()
        doc_handle.close()
        # 生成对应的
        new_root = root.replace('Days', 'Path_Days', 1) + '\\'
        mkdir(new_root)
        list_temp = list()
        # 存入数组
        for line in lines:
            temp_p = Point(line)
            list_temp.append(temp_p)
        # 按照时间对数组排序
        list_temp = sorted(list_temp, key=lambda point: point.date_time)
        # 判断是否需要通过staypoint切割;
        stay_point_root = root.replace('Days', 'StayPoint', 1) + '\\'
        stay_point_path = stay_point_root + file
        if os.path.exists(stay_point_path):
            stay_point_handle = codecs.open(stay_point_path, 'r', encoding='utf-8')
            sp_lines = stay_point_handle.readlines()
            sp_index = 0
            for sp_line in sp_lines:
    match_objects = re.split(';', line_object)
    Feb_id = match_objects[0]
    Feb_date_time = re.split(' ', match_objects[1])
    Feb_date = Feb_date_time[0]
    Feb_time = Feb_date_time[1]
    points = re.split(' ', re.search("\((.+?)\)", match_objects[2])[1])
    Feb_longitude = points[0]
    Feb_latitude = points[1]
    # print(Feb_id, Feb_longitude, Feb_latitude, match_objects[1])

    if last_date == Feb_date:
        list_temp.append([Feb_id, Feb_longitude, Feb_latitude, match_objects[1]])
    else:
        # 输出上一个
        # 生成date文件夹
        _destination_folder = mkdir(destination_folder + last_date)
        # id标识
        last_id = 'start'
        # 用于存放相同id条目的数组
        list_sameid_temp = []
        # 数组排序
        list_temp.sort()
        # 遍历同date的数组
        for item in list_temp:
            if item[0] == last_id:
                list_sameid_temp.append(item)
            else:
                if last_id != 'start':
                    print(item[0], len(list_sameid_temp))
                    # 生成同id,date文件
                    destination_address = destination_folder + last_date + '\\' + last_id + '_' + last_date + '.txt'
    id_name_handle = codecs.open(dict_cab, 'w', encoding='utf-8')
    for root, dirs, files in walk_results:
        index = 0
        # 每个id对应一个文件
        for file in files:
            index += 1
            id_name_handle.writelines(str(index) + '\t' + file + '\r\n')
            file_handle = codecs.open(root + file, "r", encoding='utf-8')
            line_objects = file_handle.readlines()
            # 每个时间对应一行
            for line in line_objects:
                # 解析每一行
                match_objects = re.split(' ', line.strip('\r\n'))
                latitude = match_objects[0]
                longitude = match_objects[1]
                datetime = timestamp_datetime(int(match_objects[3]))
                date = re.split(' ', datetime)[0]
                # 生成目标文件夹
                _destination_folder = destination_folder + date
                mkdir(_destination_folder)
                destination_file_address = _destination_folder + '\\' + str(
                    index) + '_' + date + '.txt'
                destination_file_handle = codecs.open(destination_file_address,
                                                      'a+',
                                                      encoding='utf-8')
                destination_file_handle.writelines(
                    str(index) + ',' + longitude + ',' + latitude + ',' +
                    datetime + '\r\n')
                destination_file_handle.close()
            print(file)