def encode_dataset_polyline(data): """get coordinates and encode polyline""" coordinates = () directions = [] for directions in data: direct_poly = [] for line in directions['path']: coordinates = (float(line['lat']), float(line['lon'])) direct_poly.append(coordinates) # add polyline in format of 'main.decodePolylines(directions)' directions.update({'overview_polyline': {'points': polyline.encode(direct_poly, 5)}}) try: data[0]['path'][0]['filename'] except: try: data[0]['path'][0]['filename'] = 'unknown_filename' except: print("No paths were found. Exiting...") exit(1) iowork.save_temp_data(data,'ds_polyline_' + data[0]['path'][0]['filename']) print("Polyline of dataset directions is encoded and saved") return data
def cut_directions(directions, direct_num, minutes_interval, tracking_interval): """cut number of directions while testing""" tracking_interval /= 60 # convert to minutes tracking_interval /= minutes_interval # select number of lines related to tracking_interval depends on minutes interval of input data cutted_directions = [] if direct_num > len(directions): direct_num = len(directions) num_added = 0 for i, line in enumerate(directions): if len(directions[i]['path']) > tracking_interval: # get only direction that continuos more than tracking_interval cutted_directions.append(line) num_added += 1 if num_added == direct_num: break if not cutted_directions: return False else: try: directions[0]['path'][0]['filename'] except: directions[0]['path'][0]['filename'] = "unknown_filename" iowork.save_temp_data(cutted_directions, '{}_cutted_{:0.0f}_interval'.format(directions[0]['path'][0]['filename'], tracking_interval),directory='real_data') return cutted_directions
def get_busy_directions(data): """get directions only for busy times and convert them to ACSPrivacy format""" if not data: return path = [] directions = [] last_busy = '0' for line in data: if line['busy'] == '1': if last_busy == '0': last_busy = '1' path.append(line) continue if line['busy'] == '0': if last_busy == '1': directions.append({'path':path}) path = [] last_busy = '0' if len(directions) <= 1 and len(directions[0]['path']) <= 1: return directions[0]['path'][0]['filename'] = data[0]['filename'] directions = encode_dataset_polyline(directions) # get polyline for original path iowork.save_temp_data(directions, 'coor_busy_' + data[0]['filename']) print("Coordinates of directions for busy time received") return directions
def get_waypoints_for_poi(directions, poi_type=None, filename=''): """!!!Potential function!!! select (filter) potential waypoints from obtained list of POIs with help of get_poi_by_type(), remove all unnecessary data. Add only POIs that have "time_spent" info and are in free time intertval""" try: directions[0]['overview_free_time'] except: print("WARNING! No key \'overview_free_time\' is presented! Run the \'in_time_directions\' function before!\nExiting...") exit(1) for i in range(len(directions)): waypoint_list = [] origin_addr = directions[i]['legs'][0]['start_address'] # update to directions without waypoints if len(directions[i]['legs']) == 1: destination_addr = directions[i]['legs'][0]['end_address'] # for directions without waypoints else: destination_addr = directions[i]['legs'][2]['end_address'] # for directions with waypoints NEED TEST for j in range(len(directions[i]['polyline_coor_POI'])): for k in range(len(directions[i]['polyline_coor_POI'][j][1])): if get_poi_by_type(directions[i]['polyline_coor_POI'][j][1][k]['types'], poi_type): # filter the existing POIs by elements in poi_type, if type is None return True try: if directions[i]['polyline_coor_POI'][j][1][k]['time_spent'] != -1: # add only POIs have "time_spent" information if directions[i]['polyline_coor_POI'][j][1][k]['time_spent'][0] < directions[i]['overview_free_time']: waypoint = directions[i]['polyline_coor_POI'][j][1][k]['place_id'] name = directions[i]['polyline_coor_POI'][j][1][k]['name'] types = directions[i]['polyline_coor_POI'][j][1][k]['types'] time_spent = directions[i]['polyline_coor_POI'][j][1][k]['time_spent'] populartimes = directions[i]['polyline_coor_POI'][j][1][k]['populartimes'] rating_n = directions[i]['polyline_coor_POI'][j][1][k]['rating_n'] waypoint_list.append(["place_id:" + waypoint, name, types, time_spent, populartimes, rating_n]) except KeyError: print("\'time_spent\' parameter is not available because no popular times were added") if waypoint_list: destination_wayp_list = [origin_addr, destination_addr, waypoint_list] directions[i].update({'dest_wayp_list': destination_wayp_list}) destinations = get_destination_via_poi(destination_wayp_list) directions[i].update({'all_destinations': destinations}) # directions_obtained = True print("Potential in time waypoints were obtained for direction[{}]".format(i)) if poi_type: directions[0]['filtered_poi'] = poi_type # add filtered POI types for report if not filename: try: filename = directions[0]['filename'] except: directions[0]['filename'] = 'unknown_filename' else: directions[0]['filename'] = filename print("WARNING! Filename extension was changed to:", filename) iowork.save_temp_data(directions, 'dest_wayp_list_' + filename) return directions
def get_near_poi_polylines(directions, max_radius, filename='', place_type=[], add_popular=True, distance_between=0.020): """!!!Potential function!!! get POIs for polyline coordinates (polyline points)""" print("Get POI for polyline coordinates") for i in range(len(directions)): directions[i]['polyline_coor_POI'] = [] next_j = 0 for j in range(len(directions[i]['polyline_coordinates'])): location = str(directions[i]['polyline_coordinates'][j][0]) + ',' + str(directions[i]['polyline_coordinates'][j][1]) coordinates = directions[i]['polyline_coordinates'][j] if j == 0: places = [] if place_type: for place in place_type: result = get_near_poi(location, max_radius, place, add_popular=add_popular) places.extend(result['results']) elif not place_type: result = get_near_poi(location, max_radius, add_popular=add_popular) places.extend(result['results']) directions[i]['polyline_coor_POI'].append([tuple(coordinates)]+[places]) distance_lat = directions[i]['polyline_coordinates'][next_j][0] - directions[i]['polyline_coordinates'][j][0] distance_lon = directions[i]['polyline_coordinates'][next_j][1] - directions[i]['polyline_coordinates'][j][1] distance_lat = abs(distance_lat) distance_lat = round(distance_lat, 6) distance_lon = abs(distance_lon) distance_lon = round(distance_lon, 6) if distance_lat > distance_between or distance_lon > distance_between: # distance between POIs by default 0.020 next_j = j places = [] for place in place_type: result = get_near_poi(location, max_radius, place, add_popular=add_popular) places.extend(result['results']) if not place_type: result = get_near_poi(location, max_radius, add_popular=add_popular) places.extend(result['results']) directions[i]['polyline_coor_POI'].append([tuple(coordinates)]+[places]) if place_type: directions[0]['place_type'] = place_type # add downloaded place types for report if not filename: try: filename = directions[0]['filename'] except: directions[0]['filename'] = 'unknown_filename' else: directions[0]['filename'] = filename print("WARNING! Filename extension was changed to:", filename) iowork.save_temp_data(directions, 'nearbyPOIs_' + filename) print("Nearby POIs are downloaded") return directions
def get_coor_between(data, time_interval): """get coordinates from file return only coordinates in X seconds time interval""" if not data: return path = [] directions = [] time_gap = 1 orig_poly = {} for direct in data: if len(path) > 1: list_arg = {'path':path, 'original_polyline':{'points': orig_poly['points']}} directions.append(list_arg) path = [] last_time = '0' for line in direct['path']: if last_time == '0': last_time = line['time'] path.append(line) orig_poly = {'points': direct['overview_polyline']['points']} time_gap = 1 if (int(line['time']) - int(last_time) > time_interval*time_gap): # if time of the line is more than time_interval if (int(line['time']) - int(last_time) < time_interval*(time_gap+1)): # but still not exceeds time_interval+gap if time_gap >= 61: time_gap = 1 last_time = line['time'] list_arg = {'path':path, 'original_polyline':{'points': orig_poly['points']}} directions.append(list_arg) path = [] path.append(line) continue path.append(line) time_gap += 1 elif (int(line['time']) - int(last_time) > time_interval*(time_gap+1)): time_gap = 1 last_time = line['time'] list_arg = {'path':path, 'original_polyline':{'points': orig_poly['points']}} directions.append(list_arg) path = [] path.append(line) directions = encode_dataset_polyline(directions) # encode overview polyline for each direction try: directions[0]['path'][0]['filename'] = data[0]['path'][0]['filename'] except: directions[0]['path'][0]['filename'] = "unknown_filename" iowork.save_temp_data(directions, 'coor_between_' + directions[0]['path'][0]['filename']) print("Coordinates of time intervals obtained") return directions
def get_destination_via_poi (destination_list): """!!!Potential function!!! get all routes via POI for dest_wayp_list (get_waypoints_for_poi) list presentation""" destination = [] for i in range(len(destination_list[2])): destination.extend(get_directions(destination_list[0], destination_list[1], destination_list[2][i][0])) destination[i]['name'] = destination_list[2][i][1] destination[i]['time_spent'] = destination_list[2][i][3] destination[i]['rating_n'] = destination_list[2][i][5] iowork.save_temp_data(destination,'potential_dest') print("Potential directions via POI received") return destination
def get_all_directions(data, split_by=0): """get directions for busy and not busy times and convert them to CSPrivacy format split_by: 0 - only busy or only free times paths, 1...n - get path contains number of lines""" if not data: return path = [] directions = [] if split_by == 0: last_busy = '0' for line in data: if line['busy'] == '1': if last_busy == '0': directions.append({'path':path}) path = [] last_busy = '1' path.append(line) continue if line['busy'] == '0': if last_busy == '1': directions.append({'path':path}) path = [] last_busy = '0' path.append(line) elif split_by > 0: for i, line in enumerate(data): path.append(line) if i == split_by: directions.append({'path':path}) path = [] split_by += split_by else: print("split_by parameter is negative. Exiting...") exit(1) directions[0]['path'][0]['filename'] = data[0]['filename'] directions = encode_dataset_polyline(directions) # get polyline for original path iowork.save_temp_data(directions, 'all_direct_' + data[0]['filename']) print("Coordinates of directions for busy and free times received") return directions
def sort_file(filename): """sort file data by the time""" data = [] with open(filename, 'r') as file_object: line = file_object.readline() while line: line_data = [name.strip() for name in line.split(' ')] row = { 'lat': line_data[0], 'lon': line_data[1], 'busy': line_data[2], 'time': line_data[3] } data.append(row) line = file_object.readline() data.sort(key=sort_rule) data[0]['filename'] = os.path.basename(filename).rsplit('.', 1)[0] # get only filename iowork.save_temp_data(data, 'sorted_data_' + data[0]['filename']) print("Data sorted by the time and saved") return data
def dsparse_run(tracking_interval): """epfl/mobility dataset processing""" files = iowork.read_all_files() # get all files in data sets' directory for f in files: # for each file directions = dsparse.sort_file(f) # sort by the time attribute directions = dsparse.get_busy_directions(directions) # get coordinates in when taxi is busy if not directions: continue directions = dsparse.get_coor_between(directions, 600) # get coordinates in X seconds time interval directions = dsparse.cut_directions(directions,10000,tracking_interval) directions = decode_polylines(iowork.get_temp_data('coor_between_new_abboip'),path_num=3) directions = get_directions_for_ds(directions, tracking_interval) iowork.save_temp_data(directions, 'direct_temp_{}'.format(tracking_interval)) directions = dsparse.get_for_json(iowork.get_temp_data('direct_temp_{}'.format(tracking_interval))) iowork.save_as_json(directions, 'direct_{}'.format(tracking_interval)) dsparse.add_to_json(iowork.get_text_data('direct_{}'.format(tracking_interval), extension='.json'), 'direct{}'.format(tracking_interval)) return directions
def get_directions(origin_addr, destination_addr, waypoints_list=None, filename='', first_run=False): """!!!Potential function!!! to get direction from any navigation provider""" origin_addr = str(origin_addr) destination_addr = str(destination_addr) if waypoints_list is not None: directions = direction(origin_addr, destination_addr, waypoints=waypoints_list) else: directions = direction(origin_addr, destination_addr) print("Direction received") if first_run: if not filename: directions[0]['filename'] = str(round(get_time)) # creating filename extension of temporary data filename = directions[0]['filename'] else: directions[0]['filename'] = filename print("Filename extension of temporary data is:", filename) iowork.save_temp_data(directions, 'directions_' + filename) return directions
def potential_visit_poi (directions, tracking_interval, filename='', add_no_stop=False): """calculate probability to visit POIs and overall entropy""" tracking_interval = tracking_interval/60 for i in range(len(directions)): all_probab = [] duration = directions[i]['duration']/60 free_time = directions[i]['overview_free_time']/60 try: directions[i]['all_destinations'] = in_time_directions(directions[i]['all_destinations'], tracking_interval*60) for j in range(len(directions[i]['all_destinations'])): minim = abs(directions[i]['all_destinations'][j]['time_spent'][0]/60) # we guess that Goolge min time spent equal to -2*sigma maxim = abs(directions[i]['all_destinations'][j]['time_spent'][1]/60) # max time spent = 2*sigma if minim == maxim: # in case if there is no time interval, artificially create it minim = minim/2 maxim = 3*maxim/2 mean = np.mean([minim, maxim]) std = (maxim-minim)/4 # Z-score calc and finding of the potential probabl interval # three sigma rule: m+s=68%, m+2s=95%, m+3s=99,7% z = (free_time-mean)/std if -1<=z<=1: probab = 0.341 elif -2<=z<-1 or 2>=z>1: probab = 0.136 elif -3<=z<-2 or 3>=z>2: probab = 0.021 else: probab = 0.0013 directions[i]['all_destinations'][j]['dist_data'] = ({'mean': mean, 'std': std, 'zscore': z, 'probab': probab}) # rating addition: unweighted probabilities if directions[i]['all_destinations'][j]['rating_n'] > 0: probab = probab * directions[i]['all_destinations'][j]['rating_n'] #CHECK IF ZERO all_probab.append(probab) # add nostop_prob nostop_probab = no_stop_lognormal(duration, tracking_interval) if add_no_stop: all_probab.append(nostop_probab) sum_all = sum(all_probab) n_prob = [] for prob in all_probab[:-1]: n_prob.append(prob/sum_all) print("Weighed probability of visit a POI is: {}%".format(round(prob/sum_all * 100, 2))) print("Weighed no stop probability is: {}%".format(round(nostop_probab/sum_all * 100, 2))) elif not add_no_stop: """find probability when no_stop doesn't participate in propotion""" stop_prob_weight = 1-nostop_probab # find proportion to calc entropy * stop_prob_weight sum_all = sum(all_probab) n_prob = [] for prob in all_probab: n_prob.append(prob/sum_all * stop_prob_weight) print("Weighed probability of visit a POI is: {}%".format(round(prob/sum_all * 100, 2))) # check of input data correctness for entropy test = sum(n_prob) print("\nCheck. The sum of probabilities is: {}\n".format(test)) entropy = sp.stats.entropy(n_prob, base=2) entropy_data = {'probabilities': all_probab, 'normal_prob': n_prob, 'direction_entropy': entropy, 'tracking_interval': tracking_interval, 'no_stop_prob': nostop_probab} # create dict to save entropy data directions[i].update(entropy_data) print("Direction[{}] entropy is: {}".format(i, entropy)) except: nostop_probab = no_stop_lognormal(duration, tracking_interval) all_probab.append(nostop_probab) sum_all = sum(all_probab) n_prob = [] for prob in all_probab[:-1]: n_prob.append(prob/sum_all) print("Weighed probability of visit a POI is: {}%".format(round(prob/sum_all * 100, 2))) print("Weighed no stop probability is: {}%".format(round(nostop_probab/sum_all * 100, 2))) entropy = sp.stats.entropy(n_prob, base=2) entropy_data = {'probabilities': all_probab, 'normal_prob': n_prob, 'direction_entropy': entropy, 'tracking_interval': tracking_interval, 'no_stop_prob': nostop_probab} # create dict to save entropy data directions[i].update(entropy_data) print("Direction[{}] entropy is: {}".format(i, entropy)) print("No potential destinations for direction[{}]".format(i)) if not filename: filename = directions[0]['filename'] else: directions[0]['filename'] = filename print("WARNING! Filename extension was changed to: ", filename) iowork.save_temp_data(directions, 'entropy_data_{}_{:.0f}'.format(filename, tracking_interval)) return directions
def get_directions_for_ds(exist_directions, tracking_interval): """get from any navigation provider to compare the directions with data set""" directions = [] time_interval = round(tracking_interval / 600) # get time interval to get 'path' coordinates related to the time if time_interval <= 0: print("WARNING! Time interval at get_directions_for_ds() <= 0! No tracking interval was defined.") exit(1) for line in exist_directions: start_addr = '' end_addr = '' coor_num = 0 whole_path = [] # get only coordinates when the tracking point (tracking interval) # for coor_num in range(0, len(line['path']), time_interval): while coor_num <= len(line['path']): # after run, get start address """if coor_num == 0: start_addr = reverse_geocode(latitude=[line['path'][coor_num]['lat'], longitude=line['path'][coor_num]['lon']]) else: end_addr = reverse_geocode(latitude=[line['path'][coor_num]['lat'], longitude=line['path'][coor_num]['lon']])""" # if we have start and end addresses get directions if start_addr and end_addr: temp_directions = get_directions(start_addr, end_addr) temp_directions = in_time_directions(temp_directions, tracking_interval) # save real path coordinates to each direction new_real_path = {} for direct in temp_directions: if not new_real_path: if len(line['polyline_coordinates']) > coor_num: # delete exist real path and encode new polyline related to direction by any navigation provider new_real_path['real_coordinates'] = line['polyline_coordinates'][:coor_num+1] new_real_path.update({'overview_polyline': polyline.encode(new_real_path['real_coordinates'],5)}) new_real_path.update({'original_polyline': line['original_polyline']['points']}) direct['real_path'] = new_real_path # add the real path to direction for comparison direct['direction_time'] = coor_num # save the tracking interval for direction start_addr = end_addr # for the next direction, the start address is equal to the end address of the previous direction temp_directions = decode_polylines(temp_directions) whole_path.extend(temp_directions) if len(line['path'])-1-coor_num >= time_interval: # check if interval is not exceed the path coor_num += time_interval elif len(line['path'])-1-coor_num >= 1: # if time interval in excess of the path get the last path's point coor_num += len(line['path'])-1-coor_num else: break # exit if zero if whole_path: directions.append(whole_path) try: directions[0][0]['filename'] = exist_directions[0]['path'][0]['filename'] except: try: directions[0][0]['filename'] = exist_directions[0]['path'][0]['time'] except: try: directions[0][0]['filename'] = 'unknown_filename' except: print("No directions were found. Exiting...") exit(1) iowork.save_temp_data(directions, "direct_" + directions[0][0]['filename']) return directions