Beispiel #1
0
def read_own_data_file(file_name):
    point_list = []
    try:
        f = open(file_name, 'r')
        for line in f:
            eleArr = line.split(',')
            lon = eleArr[1]  # 注意经纬度不能读反了
            lat = eleArr[0]
            t = convert_to_milsecond(eleArr[2].split('.')[0].replace('T', ' '))
            p = TrackPoint(float(lon), float(lat), long(t))
            p.time_str = eleArr[2]
            point_list.append(p)
    except Exception as e:
        print e
    return point_list
def get_stop_position(compressed_data, centre_index_list):
    result = []
    for index in centre_index_list:
        result.append(
            TrackPoint(compressed_data[index].lon, compressed_data[index].lat,
                       compressed_data[index].time))
    return result
Beispiel #3
0
def calculate_cluster_centre(cluster):
    lon = 0.0
    lat = 0.0
    total_len = len(cluster)
    for point in cluster:
        lon += point.lon / total_len
        lat += point.lat / total_len
    centre_point = TrackPoint(lon, lat, 0)
    return centre_point
Beispiel #4
0
def get_code(code_dic, point):
    code_dist = ('00', 2000)
    for k in code_dic.keys():
        value_p = code_dic[k]
        dist = Utility.distance_calculate(point, TrackPoint(value_p[0], value_p[1], 0))
        if dist <= code_dist[1]:
            code_dist = (k, dist)    # 返回距离最近的区域的编码
        else:
            continue
    return code_dist[0]
Beispiel #5
0
def read_geolife_data_file(file_name):
    try:
        point_list = [
            TrackPoint(float(ele.split(',')[0]), float(ele.split(',')[1]),
                       long(convert_to_milsecond(ele.split(',')[2])))
            for ele in open(file_name, 'r')
        ]
        return point_list
    except Exception as e:
        print e
Beispiel #6
0
def read_data_file(file_name):
    try:
        point_list = [
            TrackPoint(float(ele.split(',')[3]), float(ele.split(',')[4]),
                       long(ele.split(',')[2]))
            for ele in open(file_name, 'r')
        ]
        return point_list
    except Exception as e:
        print e
Beispiel #7
0
def extract_all_position(f_name):
    position_list = []
    f_r = open(f_name, 'r')
    line = f_r.readline()
    while line != '':
        traid = line.split('\n')[0]
        count = 0
        while count < 2:             #先读取两行,包括出发地和结束地
            line = f_r.readline()
            content = re.split(':|,', line)
            position = TrackPoint(float(content[1]), float(content[2]), 0)
            position_list.append(position)
            count += 1
        line = f_r.readline()
        stop_num = int(line.split('\n')[0])
        count = 0
        while count < stop_num:
            line = f_r.readline()
            content = re.split(',', line)
            position = TrackPoint(float(content[0]), float(content[1]), 0)
            position_list.append(position)
            count += 1
        line = f_r.readline()
    return position_list
Beispiel #8
0
def calculate_sse_coeff(clusters):
    sse_sum = 0
    for cluster in clusters:
        points_len = len(cluster)
        lon_sum = 0.0
        lat_sum = 0.0
        for point in cluster:
            lon_sum += point.lon
            lat_sum += point.lat
        lon_mean = 1.0 * lon_sum / points_len
        lat_mean = 1.0 * lat_sum / points_len
        center_point = TrackPoint(lon_mean, lat_mean, 0)
        dist_sum = 0
        for point in cluster:
            dist_sum += distance_calculate(point, center_point)
        dist_mean = 1.0 * dist_sum / len(cluster)
        sse_sum += dist_mean
    sse = 1.0 * sse_sum / len(clusters)
    return sse
Beispiel #9
0
def encode_stop_positions(data_file, code_save_file):
    data = extract_stop_position(data_file)
    hot_spot_list = extract_hot_spot(data)
    id_begin = '1000'
    position_code_dic = {}
    for point in hot_spot_list:
        position_code_dic[id_begin] = (point.lon, point.lat)
        id_begin = str(int(id_begin) + 1)
    all_position = extract_all_position(data_file)
    for position in all_position:
        is_encoded = False
        for id_key in position_code_dic.keys():
            id_value = position_code_dic[id_key]
            if Utility.distance_calculate(position, TrackPoint(id_value[0], id_value[1], 0)) <= 200:
                is_encoded = True
                break
            else:
                continue
        if not is_encoded:
            position_code_dic[id_begin] = (position.lon, position.lat)
            id_begin = str(int(id_begin) + 1)
    f_w = open(code_save_file, 'w')
    dic_len = len(position_code_dic)
    f_w.write(str(dic_len) + '\n')
    for k in sorted(position_code_dic.keys()):
        p = position_code_dic[k]
        content = k + ':' + str(p[0]) + ',' + str(p[1]) + ',\n'
        f_w.write(content)
    # 开始按轨迹依次进行编码
    f_r = open(data_file, 'r')
    line = f_r.readline()
    while line != '':
        traj_code = []
        traid = line.split('\n')[0]
        # 先读取两行,包括出发地和结束地
        line = f_r.readline()
        content = re.split(':|,', line)
        s_position = TrackPoint(float(content[1]), float(content[2]), 0)
        code = get_code(position_code_dic, s_position)
        traj_code.append(code)
        line = f_r.readline()
        content = re.split(':|,', line)
        e_position = TrackPoint(float(content[1]), float(content[2]), 0)  #结束点先慢点处理
        line = f_r.readline()
        stop_num = int(line.split('\n')[0])
        count = 0
        while count < stop_num:
            line = f_r.readline()
            content = re.split(',', line)
            position = TrackPoint(float(content[0]), float(content[1]), 0)
            code = get_code(position_code_dic, position)
            traj_code.append(code)
            count += 1
        # 结束点最后处理
        code = get_code(position_code_dic, e_position)
        traj_code.append(code)
        code_len = len(traj_code)
        if code_len >= 2 and traj_code[code_len - 1] == traj_code[code_len - 2]:
            del traj_code[code_len - 1]
        if len(traj_code) >= 2 and traj_code[0] == traj_code[1]:
            del traj_code[0]
        content = ','.join(traj_code)
        f_w.write(content + '\n')
        # 循环读
        line = f_r.readline()
    f_r.close()
    f_w.close()
Beispiel #10
0
def plot_hot_spot(file_name):
    data = extract_stop_position(file_name)
    hot_spot_list = extract_hot_spot(data)
    start_positions = []
    end_positions = []
    f_r = open(file_name, 'r')
    line = f_r.readline()
    while line != '':
        route = []
        traid = line.split('\n')[0]
        # 先读取两行,包括出发地和结束地
        line = f_r.readline()
        content = re.split(':|,', line)
        s_position = TrackPoint(float(content[1]), float(content[2]), 0)
        start_positions.append(s_position)
        route.append(s_position)
        line = f_r.readline()
        content = re.split(':|,', line)
        e_position = TrackPoint(float(content[1]), float(content[2]), 0)
        end_positions.append(e_position)
        line = f_r.readline()
        stop_num = int(line.split('\n')[0])
        count = 0
        while count < stop_num:
            line = f_r.readline()
            content = re.split(',', line)
            position = TrackPoint(float(content[0]), float(content[1]), 0)
            route.append(position)
            count += 1
        route.append(e_position)
        route_lon = [p.lon for p in route]
        route_lat = [p.lat for p in route]
        plt.plot(route_lon, route_lat, '.k--', markersize=4)
        line = f_r.readline()
    f_r.close()
    # 画出所有的停留点
    stop_lon = [p.lon for p in data]
    stop_lat = [p.lat for p in data]
    l1, = plt.plot(stop_lon, stop_lat, 'xb', markersize=10)
    # 画出发点、结束点
    s_p_lon = [p.lon for p in start_positions]
    s_p_lat = [p.lat for p in start_positions]
    e_p_lon = [p.lon for p in end_positions]
    e_p_lat = [p.lat for p in end_positions]
    l2, = plt.plot(s_p_lon, s_p_lat, '<g')
    l3, = plt.plot(e_p_lon, e_p_lat, '>r')
    # 画出所有的热点区域所在地
    hot_spot_lon = [p.lon for p in hot_spot_list]
    hot_spot_lat = [p.lat for p in hot_spot_list]
    l4, = plt.plot(hot_spot_lon, hot_spot_lat, '*y', markersize=16)
    #设置横纵坐标范围
    lon_high_limit = max(max(stop_lon), max(s_p_lon), max(e_p_lon))
    lon_low_limit = min(min(stop_lon), min(s_p_lon), min(e_p_lon))
    lat_high_limit = max(max(stop_lat), max(s_p_lat), max(e_p_lat))
    lat_low_limit = min(min(stop_lat), min(s_p_lat), min(e_p_lat))
    lon_interval = (lon_high_limit - lon_low_limit) / 15
    lat_interval = (lat_high_limit - lat_low_limit) / 15
    plt.xlim(lon_low_limit - lon_interval, lon_high_limit + lon_interval)
    plt.ylim(lat_low_limit - lat_interval, lat_high_limit + lat_interval)
    plt.xlabel('longitude')
    plt.ylabel('latitude')
    # plot_arr = [l1, l2, l3]
    plot_arr = [l1, l2, l3, l4]
    plt.legend((plot_arr[1], plot_arr[0], plot_arr[2], plot_arr[3]),
               ('start position', 'stop position', 'end position', 'hot-spot position'),
              loc='best', numpoints=1, ncol=1, frameon=False)
    # plt.legend((plot_arr[1], plot_arr[0], plot_arr[2]),
    #            ('start position', 'stop position', 'end position'),
    #            loc='best', numpoints=1, ncol=1, frameon=False)
    plt.savefig(u"C:\\Users\Administrator\Desktop\\result.png", dpi=400)
    plt.show()
Beispiel #11
0
def user_stops_observation(user_id):
    nap = 31
    sigma1 = 0.3
    velocity_sigma = 0.5
    begin_time = time.time()
    conn = MySQLdb.connect(host='localhost',
                           user='******',
                           passwd='root',
                           db='stdatamining',
                           charset='utf8')
    f_w = open("E:\\Science\\" + str(user_id) + "_stop_positions.txt", 'w')
    traid_select_sql = "SELECT traid FROM owndata_trajectory WHERE objid=" + str(
        user_id) + ';'
    cur = conn.cursor()
    cur.execute(traid_select_sql)
    results = cur.fetchall()
    traid_list = []
    for record in results:
        traid_list.append(record[0])
    start_points = []
    end_points = []
    stop_positions = []
    lon_low_limit = 360
    lon_high_limit = 0
    lat_low_limit = 360
    lat_high_limit = 0
    for traid in traid_list:
        print traid
        data = []
        poi_curve_lon = []  # 用于将所有的POI用虚线连接起来,包括起始点,路途中间的停留点
        poi_curve_lat = []
        points_select_sql = "SELECT longitude,latitude,time_date FROM owndata_point WHERE traid=" \
                            + str(traid) + "  ORDER BY time_date;"
        cur.execute(points_select_sql)
        point_records = cur.fetchall()
        for record in point_records:
            point = TrackPoint(
                float(record[0]), float(record[1]),
                long(Utility.convert_to_milsecond(str(record[2]))))
            data.append(point)
        if len(data) < 500:
            continue
        f_w.write(str(traid) + '\n')
        f_w.write('start position:' + str(data[0].lon) + ',' +
                  str(data[0].lat) + ',\n')
        f_w.write('end position:' + str(data[len(data) - 1].lon) + ',' +
                  str(data[len(data) - 1].lat) + ',\n')
        start_points.append(data[0])
        end_points.append(data[len(data) - 1])
        poi_curve_lon.append(data[0].lon)  #保存第一个起始点的经纬度
        poi_curve_lat.append(data[0].lat)
        clusters = dbscan_process(data, velocity_sigma, nap,
                                  sigma1)  # 用我们的DBSCAN算法进行处理,得到停留点
        clusters = Utility.merge_clusters(clusters)
        clusters = Utility.clusters_refinement(clusters)
        f_w.write(str(len(clusters)) + '\n')
        for cluster in clusters:
            point_len = len(cluster)
            lon_sum = 0
            lat_sum = 0
            for point in cluster:
                lon_sum += point.lon
                lat_sum += point.lat
            lon_avg = lon_sum / point_len
            lat_avg = lat_sum / point_len
            stop_positions.append(TrackPoint(lon_avg, lat_avg, 0))
            poi_curve_lon.append(lon_avg)
            poi_curve_lat.append(lat_avg)
            f_w.write(str(lon_avg) + ',' + str(lat_avg) + ',\n')
        poi_curve_lon.append(data[len(data) - 1].lon)
        poi_curve_lat.append(data[len(data) - 1].lat)
        if lon_low_limit > min(poi_curve_lon):
            lon_low_limit = min(poi_curve_lon)
        if lon_high_limit < max(poi_curve_lon):
            lon_high_limit = max(poi_curve_lon)
        if lat_low_limit > min(poi_curve_lat):
            lat_low_limit = min(poi_curve_lat)
        if lat_high_limit < max(poi_curve_lat):
            lat_high_limit = max(poi_curve_lat)
        pl.plot(poi_curve_lon, poi_curve_lat, '.k--', markersize=4)
    stop_lon = [ele.lon for ele in stop_positions]
    stop_lat = [ele.lat for ele in stop_positions]
    l1, = pl.plot(stop_lon, stop_lat, 'xb', markersize=12)
    start_lon = [ele.lon for ele in start_points]
    start_lat = [ele.lat for ele in start_points]
    l2, = pl.plot(start_lon, start_lat, 'dg')
    end_lon = [ele.lon for ele in end_points]
    end_lat = [ele.lat for ele in end_points]
    l3, = pl.plot(end_lon, end_lat, '>r')
    plot_arr = [l1, l2, l3]
    lon_interval = (lon_high_limit - lon_low_limit) / 15
    lat_interval = (lat_high_limit - lat_low_limit) / 15
    pl.xlim(lon_low_limit - lon_interval, lon_high_limit + lon_interval)
    pl.ylim(lat_low_limit - lat_interval, lat_high_limit + lat_interval)
    pl.xlabel('longitude')
    pl.ylabel('latitude')
    pl.legend((plot_arr[1], plot_arr[0], plot_arr[2]),
              ('start position', 'stop position', 'end position'),
              loc='best',
              numpoints=1,
              ncol=1,
              frameon=False)
    pl.savefig(u"C:\\Users\Administrator\Desktop\\" + str(user_id) +
               "_positions.png",
               dpi=1000)
    f_w.close()
    cur.close()
    conn.close()
    end_time = time.time()
    print 'consume time: =' + str((end_time - begin_time) / 60) + 'minutes'
    pl.show()
    pl.close()
Beispiel #12
0
# y = [1/(1 + math.pow(math.e, -(10*ele-5))) for ele in x]
# plt.plot(x, y)
# plt.xlabel('t')
# plt.ylabel('temporal distance')

# plt.show()

# x = [ele.split(' | ')[3] for ele in open(file_name1, 'r')]
# y = [ele.split(' | ')[4] for ele in open(file_name1, 'r')]
# plt.plot(x, y, 'g')
# x2 = [ele.split(' ')[0] for ele in open(file_name2, 'r')]
# y2 = [ele.split(' ')[1] for ele in open(file_name2, 'r')]
# plt.plot(x, y, 'og')
# plt.plot(x2, y2, 'or')
# plt.show()
# compressed_data = Utility.read_geolife_data_file(file_name1)
# clustered_data = Utility.read_geolife_data_file(file_name2)
# x = []
# y = []
# for point in compressed_data:
#     if point not in clustered_data:
#         x.append(point.lon)
#         y.append(point.lat)
# plt.plot(x, y, 'or')
# plt.show()

p1 = TrackPoint(116.319857, 39.987436, 20000)
p2 = TrackPoint(116.319872, 39.987406, 22000)
dist = Utility.distance_calculate(p1, p2)
print dist