예제 #1
0
def encode_dataset_polyline(data):
    """get coordinates and encode polyline"""
    
    coordinates = ()
    directions = []
    for directions in data:
        direct_poly = []
        for line in directions['path']:
            coordinates = (float(line['lat']), float(line['lon']))
            direct_poly.append(coordinates)
        # add polyline in format of 'main.decodePolylines(directions)'
        directions.update({'overview_polyline': {'points': polyline.encode(direct_poly, 5)}}) 
   
    try:
        data[0]['path'][0]['filename']
    except:
        try:
            data[0]['path'][0]['filename'] = 'unknown_filename'
        except:
            print("No paths were found. Exiting...")
            exit(1)
    iowork.save_temp_data(data,'ds_polyline_' + data[0]['path'][0]['filename'])
    print("Polyline of dataset directions is encoded and saved")
    
    return data
예제 #2
0
def cut_directions(directions, direct_num, minutes_interval, tracking_interval):
    """cut number of directions while testing"""
    
    tracking_interval /= 60 # convert to minutes
    tracking_interval /= minutes_interval # select number of lines related to tracking_interval depends on minutes interval of input data
    cutted_directions = []
    if direct_num > len(directions):
        direct_num = len(directions)
    num_added = 0
    for i, line in enumerate(directions):
        if len(directions[i]['path']) > tracking_interval: # get only direction that continuos more than tracking_interval 
            cutted_directions.append(line)
            num_added += 1
            if num_added == direct_num:
                break
    if not cutted_directions:
        return False
    else:
        try:
            directions[0]['path'][0]['filename']
        except:
            directions[0]['path'][0]['filename'] = "unknown_filename"
        iowork.save_temp_data(cutted_directions, '{}_cutted_{:0.0f}_interval'.format(directions[0]['path'][0]['filename'], tracking_interval),directory='real_data')
        
        return cutted_directions
예제 #3
0
def get_busy_directions(data):
    """get directions only for busy times and convert them to ACSPrivacy format"""

    if not data:
        return
    path = []
    directions = []
    last_busy = '0'

    for line in data:
        if line['busy'] == '1':
            if last_busy == '0':
                last_busy = '1'
            path.append(line)
            continue
        if line['busy'] == '0':
            if last_busy == '1':
                directions.append({'path':path})
                path = []
                last_busy = '0'
    
    if len(directions) <= 1 and len(directions[0]['path']) <= 1:
        return
    directions[0]['path'][0]['filename'] = data[0]['filename']
    directions = encode_dataset_polyline(directions) # get polyline for original path
    iowork.save_temp_data(directions, 'coor_busy_' + data[0]['filename'])
    print("Coordinates of directions for busy time received")
    
    return directions
예제 #4
0
def get_waypoints_for_poi(directions, poi_type=None, filename=''):
    """!!!Potential function!!!
    select (filter) potential waypoints from obtained list of POIs with help of get_poi_by_type(), 
    remove all unnecessary data. Add only POIs that have "time_spent" info and are in free time intertval"""
    
    try:
        directions[0]['overview_free_time']
    except:
        print("WARNING! No key \'overview_free_time\' is presented! Run the \'in_time_directions\' function before!\nExiting...")
        exit(1)

    for i in range(len(directions)):
        waypoint_list = []
        origin_addr = directions[i]['legs'][0]['start_address'] # update to directions without waypoints
        if len(directions[i]['legs']) == 1:
            destination_addr = directions[i]['legs'][0]['end_address'] # for directions without waypoints
        else:
            destination_addr = directions[i]['legs'][2]['end_address'] # for directions with waypoints NEED TEST
        
        for j in range(len(directions[i]['polyline_coor_POI'])):
            for k in range(len(directions[i]['polyline_coor_POI'][j][1])):
                if get_poi_by_type(directions[i]['polyline_coor_POI'][j][1][k]['types'], poi_type): # filter the existing POIs by elements in poi_type, if type is None return True
                    try:
                        if directions[i]['polyline_coor_POI'][j][1][k]['time_spent'] != -1: # add only POIs have "time_spent" information
                            if directions[i]['polyline_coor_POI'][j][1][k]['time_spent'][0] < directions[i]['overview_free_time']:
                                waypoint = directions[i]['polyline_coor_POI'][j][1][k]['place_id']
                                name = directions[i]['polyline_coor_POI'][j][1][k]['name']
                                types = directions[i]['polyline_coor_POI'][j][1][k]['types']
                                time_spent = directions[i]['polyline_coor_POI'][j][1][k]['time_spent']
                                populartimes = directions[i]['polyline_coor_POI'][j][1][k]['populartimes']
                                rating_n = directions[i]['polyline_coor_POI'][j][1][k]['rating_n']
                                waypoint_list.append(["place_id:" + waypoint, name, types, time_spent, populartimes, rating_n])
                    except KeyError:
                        print("\'time_spent\' parameter is not available because no popular times were added")
        if waypoint_list:
            destination_wayp_list = [origin_addr, destination_addr, waypoint_list]
            directions[i].update({'dest_wayp_list': destination_wayp_list})
            destinations = get_destination_via_poi(destination_wayp_list)
            directions[i].update({'all_destinations': destinations})
            # directions_obtained = True
            print("Potential in time waypoints were obtained for direction[{}]".format(i))
    
    if poi_type:
        directions[0]['filtered_poi'] = poi_type # add filtered POI types for report
    if not filename:
        try:
            filename = directions[0]['filename']
        except:
            directions[0]['filename'] = 'unknown_filename'
    else:
        directions[0]['filename'] = filename
        print("WARNING! Filename extension was changed to:", filename)
    iowork.save_temp_data(directions, 'dest_wayp_list_' + filename)
   
    return directions
예제 #5
0
def get_near_poi_polylines(directions, max_radius, filename='', place_type=[], add_popular=True, distance_between=0.020):
    """!!!Potential function!!!
    get POIs for polyline coordinates (polyline points)"""

    print("Get POI for polyline coordinates")
    for i in range(len(directions)): 
        directions[i]['polyline_coor_POI'] = []
        next_j = 0
        for j in range(len(directions[i]['polyline_coordinates'])):
            location = str(directions[i]['polyline_coordinates'][j][0]) + ',' + str(directions[i]['polyline_coordinates'][j][1])
            coordinates = directions[i]['polyline_coordinates'][j]
            if j == 0:
                places = []
                if place_type:
                    for place in place_type:
                        result = get_near_poi(location, max_radius, place, add_popular=add_popular)
                        places.extend(result['results'])
                elif not place_type:
                    result = get_near_poi(location, max_radius, add_popular=add_popular)
                    places.extend(result['results'])
                directions[i]['polyline_coor_POI'].append([tuple(coordinates)]+[places])

            distance_lat = directions[i]['polyline_coordinates'][next_j][0] - directions[i]['polyline_coordinates'][j][0]
            distance_lon = directions[i]['polyline_coordinates'][next_j][1] - directions[i]['polyline_coordinates'][j][1]
            
            distance_lat = abs(distance_lat)
            distance_lat = round(distance_lat, 6)

            distance_lon = abs(distance_lon)
            distance_lon = round(distance_lon, 6)
            
            if distance_lat > distance_between or distance_lon > distance_between: # distance between POIs by default 0.020
                next_j = j
                places = []
                for place in place_type:
                    result = get_near_poi(location, max_radius, place, add_popular=add_popular)
                    places.extend(result['results'])
                if not place_type:
                    result = get_near_poi(location, max_radius, add_popular=add_popular)
                    places.extend(result['results'])
                directions[i]['polyline_coor_POI'].append([tuple(coordinates)]+[places])
    if place_type:
        directions[0]['place_type'] = place_type # add downloaded place types for report
    if not filename:
        try:
            filename = directions[0]['filename']
        except:
            directions[0]['filename'] = 'unknown_filename'
    else:
        directions[0]['filename'] = filename
        print("WARNING! Filename extension was changed to:", filename)
    iowork.save_temp_data(directions, 'nearbyPOIs_' + filename)
    print("Nearby POIs are downloaded")

    return directions
예제 #6
0
def get_coor_between(data, time_interval):
    """get coordinates from file return only coordinates in X seconds time interval"""

    if not data:
        return
    path = []
    directions = []
    time_gap = 1
    orig_poly = {}
    for direct in data:
        if len(path) > 1:
            list_arg = {'path':path, 'original_polyline':{'points': orig_poly['points']}}
            directions.append(list_arg)
        path = []
        last_time = '0'
        for line in direct['path']:   
            if last_time == '0':
                last_time = line['time']
                path.append(line)
                orig_poly = {'points': direct['overview_polyline']['points']}
                time_gap = 1
            if (int(line['time']) - int(last_time) > time_interval*time_gap):  # if time of the line is more than time_interval
                if (int(line['time']) - int(last_time) < time_interval*(time_gap+1)): # but still not exceeds time_interval+gap
                    if time_gap >= 61:
                        time_gap = 1
                        last_time = line['time']
                        list_arg = {'path':path, 'original_polyline':{'points': orig_poly['points']}}
                        directions.append(list_arg)
                        path = []
                        path.append(line)
                        continue
                    path.append(line)
                    time_gap += 1
                elif (int(line['time']) - int(last_time) > time_interval*(time_gap+1)):
                    time_gap = 1
                    last_time = line['time']
                    list_arg = {'path':path, 'original_polyline':{'points': orig_poly['points']}}
                    directions.append(list_arg) 
                    path = []
                    path.append(line)
    directions = encode_dataset_polyline(directions) # encode overview polyline for each direction
    try:
        directions[0]['path'][0]['filename'] = data[0]['path'][0]['filename']
    except:
        directions[0]['path'][0]['filename'] = "unknown_filename"
    iowork.save_temp_data(directions, 'coor_between_' + directions[0]['path'][0]['filename'])
    print("Coordinates of time intervals obtained")

    return directions
예제 #7
0
def get_destination_via_poi (destination_list):
    """!!!Potential function!!!
    get all routes via POI for dest_wayp_list (get_waypoints_for_poi) list presentation"""

    destination = []
    for i in range(len(destination_list[2])):
        destination.extend(get_directions(destination_list[0], destination_list[1], destination_list[2][i][0]))
        destination[i]['name'] = destination_list[2][i][1]
        destination[i]['time_spent'] = destination_list[2][i][3]
        destination[i]['rating_n'] = destination_list[2][i][5]
    
    iowork.save_temp_data(destination,'potential_dest') 
    print("Potential directions via POI received")
    
    return destination
예제 #8
0
def get_all_directions(data, split_by=0):
    """get directions for busy and not busy times and convert them to CSPrivacy format
        split_by: 0 - only busy or only free times paths, 1...n - get path contains number of lines"""

    if not data:
        return
    path = []
    directions = []
    
    if split_by == 0:
        last_busy = '0'
        for line in data:
            if line['busy'] == '1':
                if last_busy == '0':
                    directions.append({'path':path})
                    path = []
                    last_busy = '1'
                path.append(line)
                continue
            if line['busy'] == '0':
                if last_busy == '1':
                    directions.append({'path':path})
                    path = []
                    last_busy = '0'
                path.append(line)
    elif split_by > 0: 
        for i, line in enumerate(data):
            path.append(line)
            if i == split_by:
                directions.append({'path':path})
                path = []
                split_by += split_by
    else:
        print("split_by parameter is negative. Exiting...")
        exit(1)


    directions[0]['path'][0]['filename'] = data[0]['filename']
    directions = encode_dataset_polyline(directions) # get polyline for original path
    iowork.save_temp_data(directions, 'all_direct_' + data[0]['filename'])
    print("Coordinates of directions for busy and free times received")
    
    return directions
예제 #9
0
def sort_file(filename):
    """sort file data by the time"""

    data = []
    with open(filename, 'r') as file_object:
        line = file_object.readline()
        while line:
            line_data = [name.strip() for name in line.split(' ')]
            row = {
                'lat': line_data[0],
                'lon': line_data[1],
                'busy': line_data[2],
                'time': line_data[3]
            }
            data.append(row)
            line = file_object.readline()
        data.sort(key=sort_rule)
    
    data[0]['filename'] = os.path.basename(filename).rsplit('.', 1)[0] # get only filename
    iowork.save_temp_data(data, 'sorted_data_' + data[0]['filename'])
    print("Data sorted by the time and saved")
        
    return data
예제 #10
0
def dsparse_run(tracking_interval):
    """epfl/mobility dataset processing"""

    files = iowork.read_all_files()         # get all files in data sets' directory
    for f in files:                         # for each file
        directions = dsparse.sort_file(f)   # sort by the time attribute
        
        directions = dsparse.get_busy_directions(directions)        # get coordinates in when taxi is busy
        if not directions:
            continue
        directions = dsparse.get_coor_between(directions, 600)      # get coordinates in X seconds time interval

        directions = dsparse.cut_directions(directions,10000,tracking_interval)
        
        directions = decode_polylines(iowork.get_temp_data('coor_between_new_abboip'),path_num=3)
        directions = get_directions_for_ds(directions, tracking_interval)
        iowork.save_temp_data(directions, 'direct_temp_{}'.format(tracking_interval))
    
        directions = dsparse.get_for_json(iowork.get_temp_data('direct_temp_{}'.format(tracking_interval)))
        iowork.save_as_json(directions, 'direct_{}'.format(tracking_interval))
        dsparse.add_to_json(iowork.get_text_data('direct_{}'.format(tracking_interval), extension='.json'), 
                                                                    'direct{}'.format(tracking_interval))
    return directions
예제 #11
0
def get_directions(origin_addr, destination_addr, waypoints_list=None, filename='', first_run=False):
    """!!!Potential function!!! 
    to get direction from any navigation provider"""

    origin_addr = str(origin_addr)
    destination_addr = str(destination_addr)
    
    if waypoints_list is not None:
        directions = direction(origin_addr, destination_addr, waypoints=waypoints_list)
    else:
        directions = direction(origin_addr, destination_addr)
    
    print("Direction received")
    if first_run:
        if not filename:
            directions[0]['filename'] = str(round(get_time)) # creating filename extension of temporary data
            filename = directions[0]['filename']
        else:
            directions[0]['filename'] = filename
        print("Filename extension of temporary data is:", filename)
        iowork.save_temp_data(directions, 'directions_' + filename)
    
    return directions
예제 #12
0
def potential_visit_poi (directions, tracking_interval, filename='', add_no_stop=False):
    """calculate probability to visit POIs and overall entropy"""

    tracking_interval = tracking_interval/60
    for i in range(len(directions)): 
        all_probab = []
        duration = directions[i]['duration']/60
        free_time = directions[i]['overview_free_time']/60
        try:
            directions[i]['all_destinations'] = in_time_directions(directions[i]['all_destinations'], tracking_interval*60)
            for j in range(len(directions[i]['all_destinations'])):
                minim = abs(directions[i]['all_destinations'][j]['time_spent'][0]/60) # we guess that Goolge min time spent equal to -2*sigma
                maxim = abs(directions[i]['all_destinations'][j]['time_spent'][1]/60) # max time spent = 2*sigma
                if minim == maxim: # in case if there is no time interval, artificially create it
                    minim = minim/2
                    maxim = 3*maxim/2
                mean = np.mean([minim, maxim])
                std = (maxim-minim)/4

                # Z-score calc and finding of the potential probabl interval
                # three sigma rule: m+s=68%, m+2s=95%, m+3s=99,7%
                z = (free_time-mean)/std
                if -1<=z<=1:
                    probab = 0.341
                elif -2<=z<-1 or 2>=z>1:
                    probab = 0.136
                elif -3<=z<-2 or 3>=z>2:
                    probab =  0.021
                else:
                    probab = 0.0013
                directions[i]['all_destinations'][j]['dist_data'] = ({'mean': mean, 'std': std, 'zscore': z, 'probab': probab})
                
                # rating addition: unweighted probabilities
                if directions[i]['all_destinations'][j]['rating_n'] > 0:
                    probab = probab * directions[i]['all_destinations'][j]['rating_n'] #CHECK IF ZERO
                    all_probab.append(probab)

            # add nostop_prob     
            nostop_probab = no_stop_lognormal(duration, tracking_interval)
            if add_no_stop:
                all_probab.append(nostop_probab)
                sum_all = sum(all_probab)
                n_prob = []
                for prob in all_probab[:-1]:
                    n_prob.append(prob/sum_all)
                    print("Weighed probability of visit a POI is: {}%".format(round(prob/sum_all * 100, 2)))
                print("Weighed no stop probability is: {}%".format(round(nostop_probab/sum_all * 100, 2)))

            elif not add_no_stop:
                """find probability when no_stop doesn't participate in propotion""" 
                stop_prob_weight = 1-nostop_probab
                # find proportion to calc entropy * stop_prob_weight
                sum_all = sum(all_probab)
                n_prob = []
                for prob in all_probab:
                    n_prob.append(prob/sum_all * stop_prob_weight)
                    print("Weighed probability of visit a POI is: {}%".format(round(prob/sum_all * 100, 2)))
            
            # check of input data correctness for entropy
            test = sum(n_prob)
            print("\nCheck. The sum of probabilities is: {}\n".format(test))

            entropy = sp.stats.entropy(n_prob, base=2)
            entropy_data = {'probabilities': all_probab, 'normal_prob': n_prob, 'direction_entropy': entropy, 
                            'tracking_interval': tracking_interval, 'no_stop_prob': nostop_probab} # create dict to save entropy data
            directions[i].update(entropy_data)
            print("Direction[{}] entropy is: {}".format(i, entropy))
        except:
            nostop_probab = no_stop_lognormal(duration, tracking_interval)
            all_probab.append(nostop_probab)
            sum_all = sum(all_probab)
            n_prob = []
            for prob in all_probab[:-1]:
                n_prob.append(prob/sum_all)
                print("Weighed probability of visit a POI is: {}%".format(round(prob/sum_all * 100, 2)))
            print("Weighed no stop probability is: {}%".format(round(nostop_probab/sum_all * 100, 2)))
            entropy = sp.stats.entropy(n_prob, base=2)
            entropy_data = {'probabilities': all_probab, 'normal_prob': n_prob, 'direction_entropy': entropy, 
                            'tracking_interval': tracking_interval, 'no_stop_prob': nostop_probab} # create dict to save entropy data
            directions[i].update(entropy_data)
            print("Direction[{}] entropy is: {}".format(i, entropy))
            print("No potential destinations for direction[{}]".format(i))
    
    if not filename:
        filename = directions[0]['filename']
    else:
        directions[0]['filename'] = filename
        print("WARNING! Filename extension was changed to: ", filename)
    
    iowork.save_temp_data(directions, 'entropy_data_{}_{:.0f}'.format(filename, tracking_interval))
    
    return directions
예제 #13
0
def get_directions_for_ds(exist_directions, tracking_interval):
    """get from any navigation provider to compare the directions with data set"""

    directions = []
    time_interval = round(tracking_interval / 600) # get time interval to get 'path' coordinates related to the time
    if time_interval <= 0:
        print("WARNING! Time interval at get_directions_for_ds() <= 0! No tracking interval was defined.")
        exit(1)
    
    for line in exist_directions:
        start_addr = ''
        end_addr = ''
        coor_num = 0
        whole_path = [] 
        # get only coordinates when the tracking point (tracking interval) 
        # for coor_num in range(0, len(line['path']), time_interval):
        while coor_num <= len(line['path']):
            # after run, get start address
            """if coor_num == 0:
                start_addr = reverse_geocode(latitude=[line['path'][coor_num]['lat'], longitude=line['path'][coor_num]['lon']])
            else:       
                end_addr = reverse_geocode(latitude=[line['path'][coor_num]['lat'], longitude=line['path'][coor_num]['lon']])"""

            # if we have start and end addresses get directions
            if start_addr and end_addr:
                temp_directions = get_directions(start_addr, end_addr)
                temp_directions = in_time_directions(temp_directions, tracking_interval)
                # save real path coordinates to each direction
                new_real_path = {}
                for direct in temp_directions:
                    if not new_real_path:
                        if len(line['polyline_coordinates']) > coor_num: # delete exist real path and encode new polyline related to direction by any navigation provider 
                            new_real_path['real_coordinates'] = line['polyline_coordinates'][:coor_num+1]
                            new_real_path.update({'overview_polyline': polyline.encode(new_real_path['real_coordinates'],5)})
                            new_real_path.update({'original_polyline': line['original_polyline']['points']}) 
                    direct['real_path'] = new_real_path      # add the real path to direction for comparison 
                    direct['direction_time'] = coor_num      # save the tracking interval for direction
                start_addr = end_addr # for the next direction, the start address is equal to the end address of the previous direction
                temp_directions = decode_polylines(temp_directions)
                whole_path.extend(temp_directions)

            if len(line['path'])-1-coor_num >= time_interval: # check if interval is not exceed the path
                coor_num += time_interval
            elif len(line['path'])-1-coor_num >= 1: # if time interval in excess of the path get the last path's point
                coor_num += len(line['path'])-1-coor_num
            else:
                break # exit if zero
        if whole_path:
            directions.append(whole_path)
    try:
        directions[0][0]['filename'] = exist_directions[0]['path'][0]['filename']
    except:
        try:
            directions[0][0]['filename'] = exist_directions[0]['path'][0]['time']
        except:
            try:
                directions[0][0]['filename'] = 'unknown_filename'
            except:
                print("No directions were found. Exiting...")
                exit(1)

    iowork.save_temp_data(directions, "direct_" + directions[0][0]['filename'])
    
    return directions