def compute_latencies(self): self.latencies = {} self.wlatencies = {} self.max_pole_dist2 = {} for c in self.cluster: latency = 0.0 weighted_latency = 0.0 max_pole_dist2 = 0.0 current_node = north_pole for j in self.cluster[c]: latency += haversine(current_node, self.X[j]) weighted_latency += latency * self.gifts.Weight[j] current_node = self.X[j] if self.X[j][1] > max_pole_dist2: max_pole_dist2 = self.X[j][1] latency += haversine(current_node, north_pole) weighted_latency += latency * sleigh_weight max_pole_dist2 *= 2 self.wlatencies[c] = weighted_latency self.latencies[c] = latency self.max_pole_dist2[c] = max_pole_dist2
def find_closest(point, segments, haversine_distance=False): """ Find the linestring in segments that has a start or end point closest to point. return (segment, start or end, distance) """ closest_segment = None closest_distance = 0 closest_location = None for seg in segments: start = Point(seg.coords[0]) end = Point(seg.coords[-1]) if haversine_distance: start_distance = haversine((start.y, start.x), (point.y, point.x)) * 1000 end_distance = haversine((end.y, end.x), (point.y, point.x)) * 1000 else: start_distance = point.distance(start) end_distance = point.distance(end) if closest_segment is None or start_distance < closest_distance: closest_distance = start_distance closest_segment = seg closest_location = "start" if end_distance < closest_distance: closest_distance = end_distance closest_segment = seg closest_location = "end" return closest_segment, closest_location, closest_distance
def freq_counts(entries,frame): district = 0 min_dis = 25.0 time = int(entries['Hour of the day'].get()) #The .get() method is used to fetch the values from the GUI text box myzip = entries['Zipcode'].get() zipcd = geocoder.google(myzip) #Converts the entered zip code to latitudes and longitudes. for key in DistLatLon: if min_dis == 0.0: break i = DistLatLon[key] pzip = geocoder.google(i) if (pzip.latlng): dis = haversine(zipcd.latlng,pzip.latlng,miles = True)#Haversine function calculates the distance #between two locations given their latitudes and longitudes. if dis < min_dis: min_dis = dis district = key#Identifying which district the given zipcode falls under. else: #The else block des the same thing as the if block. This is due a bug in geocoder library. #Sometimes, it doesn't fetch the latitudes and lonitudes as expected. pzip = geocoder.google(i) dis = haversine(zipcd.latlng,pzip.latlng,miles = True) if dis < min_dis: min_dis = dis district = key #Filtering the data frames with the district and time given. frame= frame[(frame["DC_DIST"]==district) & (frame["DISPATCH_TIME"]==time)] frame=frame[["DC_DIST","DISPATCH_TIME","TEXT_GENERAL_CODE"]] #Calling the plotting function to set it ready for plotting. plot_top_crimes(frame, 'TEXT_GENERAL_CODE','Top Crime Categories','category.png')
def location_chunk(bbox): # x is lat space, y is lon space max_x_dist = haversine((bbox["min_lat"], bbox["min_lon"]), (bbox["max_lat"], bbox["min_lon"])) max_y_dist = haversine((bbox["min_lat"], bbox["min_lon"]), (bbox["min_lat"], bbox["max_lon"])) x_nums, y_nums = 1000, 1000 # of cells in each axis x_cell_size = max_x_dist / x_nums # in km y_cell_size = max_y_dist / y_nums # in km session = db_connection.SESSIONMAKER() pickups = session.query(db_model_2.Pickup.id, func.ST_X(cast(db_model_2.Pickup.location, Geometry)), func.ST_Y(cast(db_model_2.Pickup.location, Geometry))) print(pickups.count()) count = 0 for pickup in pickups: x_dist = haversine((bbox["min_lat"], bbox["min_lon"]), (pickup[2], bbox["min_lon"]))# dist along lat y_dist = haversine((bbox["min_lat"], bbox["min_lon"]), (bbox["min_lat"], pickup[1]))# dist along lon session.query(db_model_2.Pickup).\ filter(db_model_2.Pickup.id == pickup[0]).\ update({ "x_coordinate" : x_dist // x_cell_size, "y_coordinate" : y_dist // y_cell_size }) if count % 10000 == 0: print(count) count += 1 session.commit()
def get_pos(self): walked_distance = 0.0 if not self.is_paused: time_passed = time.time() else: time_passed = self._last_paused_timestamp time_passed_distance = self.speed * abs(time_passed - self._timestamp - self._paused_total) # check if there are any steps to take https://github.com/th3w4y/PokemonGo-Bot/issues/27 if self.walk_steps(self.points): steps_dict = {} for step in self.walk_steps(self.points): walked_distance += haversine.haversine(*step)*1000 steps_dict[walked_distance] = step for walked_end_step in sorted(steps_dict.keys()): if walked_end_step >= time_passed_distance: break step_distance = haversine.haversine(*steps_dict[walked_end_step])*1000 if walked_end_step >= time_passed_distance: percentage_walked = (time_passed_distance - (walked_end_step - step_distance)) / step_distance else: percentage_walked = 1.0 result = self.calculate_coord(percentage_walked, *steps_dict[walked_end_step]) self._last_pos = tuple(result[0]) return self._last_pos else: # otherwise return the destination https://github.com/th3w4y/PokemonGo-Bot/issues/27 self._last_pos = tuple(self.points[-1]) return self._last_pos
def cost(job): pickup_tuple = (job['pickup']['lat'], job['pickup']['lng']) dropoff_tuple = (job['dropoff']['lat'], job['dropoff']['lng']) pickup_dist = haversine(pos, pickup_tuple) delivery_dist = haversine(pickup_tuple, dropoff_tuple) total_comp = travel_cost * (pickup_dist + delivery_dist) + delivery_cost * (delivery_dist) return total_comp / (pickup_dist + delivery_dist)
def get_pos(self): if self.speed > self.get_total_distance(): self._last_pos = self.destination self._last_step = len(self._step_keys)-1 if self.get_last_pos() == self.destination: return self.get_last_pos() distance = self.speed origin = Point(*self._last_pos) ((so_lat, so_lng), (sd_lat, sd_lng)) = self._step_dict[self._step_keys[self._last_step]] bearing = self._calc_bearing(so_lat, so_lng, sd_lat, sd_lng) while haversine.haversine(self._last_pos, (sd_lat, sd_lng))*1000 < distance: distance -= haversine.haversine(self._last_pos, (sd_lat, sd_lng))*1000 self._last_pos = (sd_lat, sd_lng) if self._last_step < len(self._step_keys)-1: self._last_step += 1 ((so_lat, so_lng), (sd_lat, sd_lng)) = self._step_dict[self._step_keys[self._last_step]] bearing = self._calc_bearing(so_lat, so_lng, sd_lat, sd_lng) origin = Point(so_lat, so_lng) lat, lng = self._calc_next_pos(origin, distance, bearing) if haversine.haversine(self._last_pos, (lat, lng))*1000 < distance: distance -= haversine.haversine(self._last_pos, (lat, lng))*1000 self._last_pos = (lat, lng) else: return self.get_last_pos() else: lat, lng = self._calc_next_pos(origin, distance, bearing) self._last_pos = (lat, lng) return self.get_last_pos()
def proximity_search(self, latitude, longitude, radius): """ Given a centerpoint, find everything within a radius around that latitude and longitude, returned in order. :param latitude: floating point latitude :param longitude: floating point longitude :param radius: radius in meters. :return: """ hashcode = geohash.encode(latitude=latitude, longitude=longitude) centerpoint = (latitude, longitude) tmp_hashcode = '' for x in hashcode: # Go through the hashcode character by character tmp_hashcode += x lat, lng, delta_lat, delta_lng = geohash.decode(tmp_hashcode, delta=True) overall_lat = 2 * 1000 * haversine( point1=(latitude - delta_lat, longitude), point2=(latitude + delta_lat, longitude) ) overall_lng = 2 * 1000 * haversine( point1=(latitude, longitude-delta_lng), point2=(latitude, longitude+delta_lng) ) dist = min(overall_lng, overall_lat) if dist < radius: tmp_hashcode = tmp_hashcode[:-1] break if tmp_hashcode == '': raise ValueError('Radius larger than earth') precision = len(tmp_hashcode) search_hashes = self._get_adjoining_hashes(hashcode=hashcode, precision=precision) search_hashes.append(tmp_hashcode) possible_points = [] result_values = [] for search_hash in search_hashes: possible_points.extend(self.storage.values(prefix=search_hash)) for point_id in possible_points: point = self.points_by_id[point_id] dist = 1000 * haversine(centerpoint, point) if dist <= radius: result_values.append((point_id, dist)) sorted_results = sorted(result_values, key = lambda x: x[1]) final_results = [x[0] for x in sorted_results] return final_results
def reduce(self, events): latitude = self.latfield longitude = self.longfield relative_distance = self.output_field use_haversine = bool(self.use_haversine) self.logger.info("[%s] - Starting geodistance instance" % str(self.metadata.searchinfo.sid)) self.logger.debug("[%s] - Using parameters - %s" % (str(self.metadata.searchinfo.sid), str(self.metadata))) if self.group_by: position_tracker = {} for event in events: current = event if not (current[latitude] or current[longitude]): current[relative_distance] = 0.0 self.logger.debug("[%s] - Using distance=0 for private IPs or unknown coordinates. " "Exclude if undesired." % str(self.metadata.searchinfo.sid)) else: current_pos = (float(current[latitude]), float(current[longitude])) if current[self.group_by] not in position_tracker.keys(): last_pos = None else: last_pos = position_tracker[current[self.group_by]] if last_pos is None: current[relative_distance] = 0.0 self.logger.debug( "[%s] - Initializing the first location with distance=0" % str(self.metadata.searchinfo.sid) ) else: if use_haversine: current[relative_distance] = haversine(last_pos, current_pos, miles=bool(self.miles)) else: current[relative_distance] = vincenty(last_pos, current_pos, miles=bool(self.miles)) position_tracker[current[self.group_by]] = current_pos yield current else: last_pos = None for event in events: current = event if not (current[latitude] or current[longitude]): current[relative_distance] = 0.0 self.logger.debug( "[%s] - Using distance=0 for private IPs or unknown coordinates. Exclude if undesired." % str( self.metadata.searchinfo.sid)) else: current_pos = (float(current[latitude]), float(current[longitude])) if last_pos is None: current[relative_distance] = 0.0 self.logger.debug("[%s] - Initializing the first location with distance=0" % str( self.metadata.searchinfo.sid)) else: if use_haversine: current[relative_distance] = haversine(last_pos, current_pos, miles=bool(self.miles)) else: current[relative_distance] = vincenty(last_pos, current_pos, miles=bool(self.miles)) last_pos = current_pos self.logger.debug(current) yield current self.logger.info("[%s] - Completed successfully." % str(self.metadata.searchinfo.sid))
def location_error(true_loc, coord, LocRes): # we create location resolver in method.py because we don't want it to load every time we import this file if not true_loc: return 0.0 # check if location field contains coordinates #coord = isCoord(text_loc) if coord: return haversine(true_loc, coord) # resolve to lat lon res = LocRes.reverse_geocode(text_loc.split()[0],text_loc.split()[1]) if not res: return 0.0 res_val = map(float, res) return haversine(true_loc, res_val)
def path_opt_test(llo): f_ = 0.0 d_ = 0.0 l_ = north_pole for i in range(len(llo)): d_ += haversine(l_, llo[i][1]) f_ += d_ * llo[i][2] l_ = llo[i][1] d_ += haversine(l_, north_pole) f_ += d_ * 10 #sleigh weight for whole trip return f_
def calculate_distance(self): latlng1 = self.location1.latlng latlng2 = self.location2.latlng if bool(latlng1 and latlng2): self.ok = True self.km = haversine(latlng1, latlng2) self.miles = haversine(latlng1, latlng2, miles=True) self.meters = int(self.km * 1000) self.feet = int(self.miles * 5280) else: print '<ERROR - Input is incorrect>'
def path_opt_test(llo): f_ = 0.0 d_ = 0.0 we_ = 0.0 l_ = north_pole for i in range(len(llo)): d_ += haversine(l_, llo[i][1]) we_ += llo[i][2] f_ += d_ * llo[i][2] l_ = llo[i][1] d_ += haversine(l_, north_pole) f_ += d_ * 10 return [f_,d_,we_]
def bipartite(day, month,year,files): """ Function bipartite --------------------- Creates a bipartite graph with edges across all possible combinations of waypoints through the zones. Each edge has an attribute of speed(average speed across the edge based on wind) and time (speed/distance) day: day the data was collected month: month the data was collected year: year the data was collected files: list with tags for paths of xlsx files formatted in the way shown getFlightData returns: a bipartite graph with edges between zones that have the attributes of speed and time (networkx graph) """ import geopy from geopy.distance import VincentyDistance zone = zones() #create zones waypoint = waypointDict(files) #get the waypoint dict of all waypoints zdir = GP(day,month,year)[0] #predicted wind directions across all prediction points zspeed = GP(day,month,year)[0]#predicted wind speed across all prediction points network = nx.DiGraph() for i in range(len(zone) - 1): #Creates the edges from layer to layer in bipartite graph for j in range(len(zone[i])): for k in range(len(zone[i+1])): network.add_edge(zone[i][j], zone[i+1][k], #Adds edges from one zone to another with distance as attribute distance = haversine((waypoint[zone[i][j]]), (waypoint[zone[i+1][k]]))/1.60934) for i in range(len(zone[0])): network.add_edge('source', zone[0][i], distance = haversine(waypoint['source'], waypoint[zone[0][i]])/1.60934) for i in range(len(zone[5])): network.add_edge(zone[5][i], 'sink', distance = haversine(waypoint[zone[5][i]], waypoint['sink'])/1.60934) p = 0 #placeholder for iterating through zdir and zspeed lists for i in range(network.number_of_edges()):#Goes through each edge to find intervals to calculate weather data b = bearing((waypoint[network.edges()[i][0]]), (waypoint[network.edges()[i][1]])) #bearing of the edge origin = geopy.Point(waypoint[network.edges()[i][0]][0], waypoint[network.edges()[i][0]][1])#lat,lon of point 1 network[network.edges()[i][0]][network.edges()[i][1]]['speed'] = 0 k = 0 #placeholder to find total number of iteration points along each edge for j in range(0, int(roundDown(network[network.edges()[i][0]][network.edges()[i][1]]['distance'],20)),20): destination = VincentyDistance(kilometers=j).destination(origin, b) #geopy to calculate lat lon after 20miles b_final = (bearing((destination.latitude, destination.longitude), (waypoint[network.edges()[i][0]][0], waypoint[network.edges()[i][0]][1]))+180)%360 network[network.edges()[i][0]][network.edges()[i][1]]['speed'] += speed_calc(destination.latitude, destination.longitude, b_final, zdir[p],zpeed[p]) k+=1 p+=1 network[network.edges()[i][0]][network.edges()[i][1]]['speed'] /= k #average speed across each edge network[network.edges()[i][0]][network.edges()[i][1]]['time'] = network[network.edges()[i][0]][network.edges()[i][1]]['distance']/ network[network.edges()[i][0]][network.edges()[i][1]]['speed'] #time across each edge
def add_in_tour(self,giftID,centroidID): i,k = giftID,centroidID n = len(self.clusters[k]) if n==0: raise Exception('cluster was not initialized') lati,longi = self.X[i] dpole_i = self.distances_to_pole[i] j = n-np.searchsorted(self.latitudes_in_cluster[k][::-1],lati) if j==0: latency_i = dpole_i else: previous_gift = self.clusters[k][j-1] latency_i = self.latencies_in_cluster[k][j-1] + haversine(self.X[previous_gift],self.X[i]) if j==n: #add gift in last position delta_latency = latency_i - self.latencies_in_cluster[k][j-1] weight_after_j = 0. delta_d = dpole_i - self.distances_to_pole[self.clusters[k][-1]] else: next_gift = self.clusters[k][j] delta_latency = latency_i + haversine(self.X[next_gift],self.X[i]) - self.latencies_in_cluster[k][j] weight_after_j = sum(self.weights[self.clusters[k][j:]]) delta_d = 0. self.clusters[k].insert(j,i) self.latitudes_in_cluster[k].insert(j,lati) for jj in range(j,n): self.latencies_in_cluster[k][jj]+=delta_latency self.latencies_in_cluster[k].insert(j,latency_i) if min(self.centroids[k][1],self.X[i][1])<-150 and max(self.centroids[k][1],self.X[i][1])>150: lamean = (self.centroids[k][0] * self.weight_per_cluster[k] + self.X[i][0] * self.weights[i])/(self.weights[i]+self.weight_per_cluster[k]) lo = np.array([self.centroids[k][1],self.X[i][1]]) lo = np.where(lo<0,lo+360,lo) lomean = (lo[0] * self.weight_per_cluster[k] + lo[1]*self.weights[i])/(self.weights[i]+self.weight_per_cluster[k]) if lomean>180: lomean = lomean-360. self.centroids[k] = np.array([lamean,lomean]) #import pdb;pdb.set_trace() else: self.centroids[k] = (self.centroids[k] * self.weight_per_cluster[k] + self.X[i] * self.weights[i])/(self.weights[i]+self.weight_per_cluster[k]) self.weight_per_cluster[k]+= self.weights[i] self.cost_per_cluster[k] += self.weights[i]*latency_i+ (weight_after_j + sleigh_weight) * delta_latency + sleigh_weight * delta_d
def most_likely_location(self,user,location_set): """ Returns the most likely location for a user of unknown locale, based on the social tightness model. """ max_probability = float('-inf') best_location = None for neighbor_u in self.mention_network.neighbors_iter_(user): if neighbor_u not in location_set: continue location_of_neighbor_u = self.mention_network.node_data_(neighbor_u) probability = 0 for neighbor_v in self.mention_network.neighbors_iter_(neighbor_u): if neighbor_v not in location_set: continue location_of_neighbor_v = self.mention_network.node_data_(neighbor_v) #to get the dict-lookup correct, we round to the nearest kilometer distance = round(haversine(location_of_neighbor_u,location_of_neighbor_v),0) # "" , round to two significant figures social_closeness = self.sij[neighbor_u][neighbor_v] probability += self.probability_distance_social_closeness[distance][social_closeness] #compare the probability of this neighbor with other possible neighbors #sets the highest-probability user as the most likely location if probability > max_probability: max_probability = probability best_location = location_of_neighbor_u return best_location
def find_distance(start, end, miles=True): response1 = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address='+start) response2 = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address='+end) resp_json_payload1 = response1.json() resp_json_payload2 = response2.json() #Retrieve latitude and longitude of the starting and ending points orig_lat = resp_json_payload1['results'][0]['geometry']['location']['lat'] orig_lon = resp_json_payload1['results'][0]['geometry']['location']['lng'] dest_lat = resp_json_payload2['results'][0]['geometry']['location']['lat'] dest_lng = resp_json_payload2['results'][0]['geometry']['location']['lng'] orig_coord = (orig_lat, orig_lon) dest_coord = (dest_lat, dest_lng) #Find the distance between the starting and ending points using the Haversine equation distance_traveled = haversine(orig_coord, dest_coord, miles) # url = "http://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&language=en-EN&units=imperial".format(str(orig_coord),str(dest_coord)) # result = requests.get(url) # resp_json = result.json() # distance_traveled= resp_json['rows'][0]['elements'][0]['distance']['text'] return distance_traveled
def find_near(): """Find doc near point from nearest to farest. Usually have limit() clause. Order by distance from nearest to farest by default. """ # This is how you construct a filter filters = { "loc": { "$nearSphere": { "$geometry": { "type": "Point", "coordinates": [LNG, LAT], }, "$maxDistance": 5 * 1000, # max distance in meter } } } array = list() for doc in col.find(filters).limit(5): lng, lat = doc["loc"]["coordinates"][0], doc["loc"]["coordinates"][1] dist = haversine((lat, lng), (LAT, LNG), miles=False) assert dist <= 5.0 array.append(dist) assert_increasing(array)
def read_pl_lns_geo(pl_lns, pl_lns_geo): """returns list of lns, and a dict with pairwise distances between lns.""" lns = [] f = open(pl_lns) for line in f: lns.append(line.split()[0].strip()) lns_geo = {} f = open(pl_lns_geo) for line in f: tokens = line.split() lns_geo[tokens[0]] = [float(tokens[1]), float(tokens[2])] lns_dist = {} for lns1 in lns: lns_dist[lns1] = {} for lns2 in lns: if lns1 == lns2: lns_dist[lns1][lns2] = 0 else: lns_dist[lns1][lns2] = haversine(lns_geo[lns1][0], lns_geo[lns1][1], lns_geo[lns2][0], lns_geo[lns2][1]) lns_nbrs_ordered_bydist = {} for lns1 in lns: tuples = [] for k,v in lns_dist[lns1].items(): if k == lns1: continue tuples.append([k,v]) tuples.sort(key = itemgetter(1)) lns_nbrs_ordered_bydist[lns1] = [t[0] for t in tuples] return lns, lns_dist, lns_nbrs_ordered_bydist
def get_proximos_a(cls_obj, latitude, longitude): """ Calculos com latitude e longitude são complicados. O ideal seria usar a https://pt.wikipedia.org/wiki/F%C3%B3rmula_de_Haversine para testar a distancia de cada imóvel ao endereço de busca. Mas, além de complicado de implementar numa consultado do django seria problematico quando o banco de dados ficasse cheio de imóveis. Resolvi usar a seguinte abordagem: A função get_min_max_coordenates calcula as latitudes e longitudes mínimas e máximas com n km de distancia (aproximada). Nesse caso n=1 km Assim, eu filtro os imoveis cujas latitude e longitude fiquem dentro desse quadrado. É claro que essa lista pode retornar imoveis que estejam a mais de 1km de distância do endereço, afinal, geramos um quadrado ao invés de um circulo. Como o conjunto de imóveis já foi reduzido, itero os imóveis retornado pelo orm testando a distância entre cada um e ponto através da fórmula. Os imóveis fora do circúlo, mas retornados pela consulta são eliminados, ficando apenas os dentro do circulo. """ circle = 1 # Até 1km de distância bounds = get_min_max_coordenates(latitude, longitude, circle) candidatos = Imovel.get_disponiveis().filter(latitude__gte=bounds[0], latitude__lte=bounds[1], longitude__gte=bounds[2], longitude__lte=bounds[3]) center = (latitude, longitude) return [imovel for imovel in candidatos if haversine(center, (imovel.latitude, imovel.longitude)) <= circle]
def most_central_point(geos_array, valid_medoid=30): """ Algorithm to find the point that is most central (i.e., medoid) using the haversine formula. Distances are weighted by the number of observations (increases sucessful selection of a medoid from the pool by 50%). :param geos_array: :param valid_medoid: min for mean distance to all other points / number of observations. Defaults to 30. If the value is still over 30 after computing the above weighting metric, it is almost certainly worth removing. :return: """ # Count the number eac coordinate appears in `geos_array` geos_array_count = dict(Counter(geos_array)) # Define a list of unique coordinates uniuqe_geos = list(set(geos_array)) # Compute the distance from each point to all of the others coord_dict = dict() for i in uniuqe_geos: coord_dict[i] = [haversine(i, j) for j in uniuqe_geos if j != i] # Compute the mean for each and divide by the number of times it occured in geos_array coord_dict_mean = {k: mean(v) / float(geos_array_count[k]) for k, v in coord_dict.items()} # Use the most central point as the medoid medoid_mean_coord = min(coord_dict_mean, key=coord_dict_mean.get) # Check against threshold if coord_dict_mean[medoid_mean_coord] <= valid_medoid: return medoid_mean_coord else: return np.NaN
def get_relations_informations(user): min_distance = None nearest = None stale = [] no_gps = [] user_loc = (user.last_latitude, user.last_longitude) for relation in user.relationships: # If too old, we add the relation to the stale list time_diff = (datetime.utcnow() - relation.updated_at) / timedelta(minutes=1) if time_diff > 5: stale.append(relation.facebook_id) continue # If the GPS is deactivated, we add it to the no-gps list loc = (relation.last_latitude, relation.last_longitude) if loc[0] is None: no_gps.append(relation.facebook_id) continue # If the GPS is active and up to date, we test if the relation is the nearest. distance = haversine(user_loc, loc) * 1000. if distance < DISTANCE_THRESHOLD: min_distance = distance nearest = relation return {"user": nearest, "distance": min_distance, "stale": stale, "no_gps": no_gps}
def medium(): a = [] b = [] while len(a) < 3: c1 = random_coord() c2 = random_coord() if haversine(c1, c2) >= 0.1 and haversine(c1, c2) < 1000: a.append(c1) b.append(c2) print("\n\t@Test") print("\tpublic void mediumDistanceTest() {") fmtprt("testMedium", a, b) print("\t}")
def social_closeness(self): """ The social tightness based model is based on the assumption that different friends hae different importance to a user. The social closeness between two users is measured via cosine similarly, then we estimate the probability of user i and user j located at a distance | l_i - l_j | with social closeness. Then we estimate the probability of user_i located at l_i and use the location with the top probability """ pairs = 0 #here we calculate social closeness logger.debug("Calcuating social closeness") for user in self.users_with_location: user_location = self.mention_network.node_data_(user) for friend in self.mention_network.neighbors_iter_(user): friend_location = self.mention_network.node_data_(friend) if not friend_location: continue pairs += 1 social_closeness = round(self.cosine_similarity(user,friend),2) self.sij[user][friend] = social_closeness distance = round(haversine(user_location,friend_location),0) self.probability_distance_social_closeness[distance][social_closeness] += 1.0 #the normalizing factor is the total number of social_closeness probabilities added above... normalizing_factor = pairs for distance in self.probability_distance_social_closeness: for social_closeness in self.probability_distance_social_closeness[distance]: self.probability_distance_social_closeness[distance][social_closeness] /= normalizing_factor logger.debug("Finished calculating the social closeness...")
def compare_coordinates(left_lat, left_lon, right_lat, right_lon, accuracy=0.1): '''Compares dates with specified accuracy Before comparison dates are parsed into datetime.datetime format and localized. :param left_lat: First coordinate latitude :param left_lon: First coordinate longitude :param right_lat: Second coordinate latitude :param right_lon: Second coordinate longitude :param accuracy: Max difference between coordinates to consider them equal Default value - 0.1 Possible values - float or integer value of kilometers :returns: Boolean value :error: ValueError when there is problem with converting accuracy into float value. When it will be catched warning will be given and accuracy will be set to 0.1. ''' for key, value in {'left_lat': left_lat, 'left_lon': left_lon, 'right_lat': right_lat, 'right_lon': right_lon}.iteritems(): if not isinstance(value, NUM_TYPES): raise TypeError("Invalid type for coordinate '{0}'. " "Expected one of {1}, got {2}".format( key, str(NUM_TYPES), str(type(value)))) distance = haversine((left_lat, left_lon), (right_lat, right_lon)) if distance > accuracy: return False return True
def isinradius(point, distance): """Takes a tuple of (lat, lon) where lon and lat are floats, and a distance in miles. Returns a list of zipcodes near the point.""" zips_in_radius = list() if not isinstance(point, tuple): raise TypeError('point should be a tuple of floats') for f in point: if not isinstance(f, float): raise TypeError('lat and lon must be of type float') dist_btwn_lat_deg = 69.172 dist_btwn_lon_deg = math.cos(point[0]) * 69.172 lat_degr_rad = float(distance)/dist_btwn_lat_deg lon_degr_rad = float(distance)/dist_btwn_lon_deg latmin = point[0] - lat_degr_rad latmax = point[0] + lat_degr_rad lonmin = point[1] - lon_degr_rad lonmax = point[1] + lon_degr_rad if latmin > latmax: latmin, latmax = latmax, latmin if lonmin > lonmax: lonmin, lonmax = lonmax, lonmin stmt = ('SELECT * FROM ZIPS WHERE LONG > {lonmin} AND LONG < {lonmax}\ AND LAT > {latmin} AND LAT < {latmax}') _cur.execute(stmt.format(lonmin=lonmin, lonmax=lonmax, latmin=latmin, latmax=latmax)) results = _cur.fetchall() for row in results: if haversine(point, (row[_LAT], row[_LONG])) <= distance: zips_in_radius.append(Zip(row)) return zips_in_radius
def hotspots(lat, lon): for city in cities.values(): if haversine(float(lat), float(lon), city[0],city[1])<city[2]: newradius=city[3] else: newradius='' return newradius
def consultar(BT1, bt2_gps): a = randint(1, 125000) # b = r_server.get("BT"+str(a).zfill(12)) b = r_server.get(BT1) b_str = str(b).split("|") bt1_gps = (float(b_str[0]), float(b_str[1])) return (haversine(bt1_gps, bt2_gps),bt1_gps)
def cluster_gifts(self,verbose,giftDF): # df=pd.read_csv(path) # Kaggle specific variables print('initializing santa data...') self.cluster=True # set demand as weight print("creating customer demands (gift weights)...") # self.d=df.Weight.values # self.xc=zip(df.Latitude.values,df.Longitude.values) # self.xf=self.xc # np.vstack([df.Longitude.values,df.Latitude.values]).T #since we are clustering, any customer can be a factory self.n=len(giftDF) self.m=self.n # For memory efficiency, do not create data matrix print("creating distance matrix...") self.create_distance_matrix(x=giftDF[['Latitude','Longitude']].values) # Set start up -- fixed -- cost to be the distance # to the centroid print("creating startup costs...") self.s=[haversine(xx,self.north_pole) for xx in giftDF[['Latitude','Longitude']].values] return self.gurobi_cluster(verbose,giftDF)
def walk(self): if not self.is_paused: walked_distance = 0.0 time_passed = time.time() remaining_points = [] time_passed_distance = self.speed * abs(time_passed - self._timestamp - self._paused_total) for step in self.walk_steps(): step_distance = haversine.haversine(*step) * 1000 if walked_distance + step_distance >= time_passed_distance: if walked_distance > time_passed_distance: percentage_walked = 0.0 else: if step_distance == 0.0: percentage_walked = 1.0 else: percentage_walked = (time_passed_distance - walked_distance) / step_distance remaining_points += self.calculate_coord(percentage_walked, *step) else: percentage_walked = 1.0 walked_distance += step_distance * percentage_walked self.points = remaining_points if self.points: self.lat, self.long = self.points[0][0], self.points[0][1] self.polyline = self.combine_polylines(self.points) else: self.lat, self.long = None, None self.polyline = '' self.reset_timestamps() return (self.lat, self.long)
'Please input the number of hospital you want to show: ')) ratingin = float( raw_input( 'What is the minimum rating (pleast input an int between 1 and 5)? ' )) print "Please wait for a few seconds..." #latitude and longitude of input zipcode x = zipcode[zipcode.zip_code == zipcodein]['lat'] y = zipcode[zipcode.zip_code == zipcodein]['lng'] start = [x, y] #distance between input zipcode and hospitals distance = [] for i in range(result.shape[0]): ending = result[['lat', 'lng']].ix[i].values distance.append(haversine(start, ending)) #append distance as the last column result1 = result.copy() result1['distance_miles'] = distance #convert the data type of 0verall_rating from string into int result1['overall_rating'] = pd.to_numeric(result1['overall_rating'], errors='coerce') #drop all overall_rating less than input result1 = result1[result1.overall_rating >= ratingin] #change the order of columns result2 = result1[[ 'ccn', 'name', 'Street', 'City', 'State', 'zip_code', 'overall_rating', 'spending_score', 'lat', 'lng', 'distance_miles' ]] result1 = result1[[
def haversine_distance(lat1, lon1, lat2, lon2): loc1 = (lat1, lon1) loc2 = (lat2, lon2) return hs.haversine(loc1, loc2)
other_longitude = 85.76064 if my_latitude == 0 or my_longitude == 0 or other_latitude == 0 or other_longitude == 0: return 0 else: R = 6372.8 # Earth radius in kilometers dLat = radians(other_latitude - my_latitude) print("dLat", dLat) dLon = radians(other_longitude - my_longitude) print("dLon", dLon) lat1 = radians(my_latitude) print("lat1", lat1) lat2 = radians(other_latitude) print("lat2", lat2) a = sin(dLat / 2)**2 + cos(lat1) * cos(lat2) * sin(dLon / 2)**2 print("a", a) c = 2 * asin(sqrt(a)) print("c", c) return R * c * 1000 print("haversine", haversine((38.21273, 85.76018), (38.2126, 85.75976))) print("haversine2", haversine2(38.21273, 85.76018, 38.21232, 85.76064)) print(calculate_distance_between_subsystems2()) 38.21273, 85.76018 38.21232, 85.76064
def df_haversine(lat1: float, lng1: float, lat2: float, lng2: float): # print(f"lat1: {lat1} is of type {type(lat1)}") # print(f"lat2: {lat2} is of type {type(lat2)}") return haversine((lat1, lng1), (lat2, lng2), Unit.MILES)
def astar(apf, start, distance_target, genome, values_matrix, K, pre_matrix, x_value, type_astar): """ Returns a list of tuples as a path from the given start to the given end in the given maze This is a normal a star algorithm is supposed to work The current version does not know the destination point It only knows the distance from the destination point :param apf: matrix representing the routing system of the area :param start: starting point :param distance_target: distance from the target (total length trip) :param genome: genome :param values_matrix: values to translate cells to coordinates :param K: constant for computing charge :param pre_matrix: pre computation of distance from the cell to the objects :param type_astar: typology of the astar wanted """ # make x_value a percentege of the total distance target x_value = (distance_target * x_value) / 100 # end_node = Node(parent=None, position=end_point) # if it takes too long, going to stop it # start_time = time.time() # Create start and end node start_node = Node(parent=None, position=start) start_node.g = start_node.h = start_node.f = 0 # Initialize both open and closed list # open_list = [] open_queue = [] second_open_queue = {} closed_list = {} # Add the start node # open_list.append(start_node) heapq.heappush(open_queue, (start_node.f, start_node)) second_open_queue.update({start_node.id: 0}) # Loop until you find the end while len(open_queue) > 0: # Get the current node # current_node = open_list[0] # current_index = 0 # for index, item in enumerate(open_list): # if item.f < current_node.f: # current_node = item # current_index = index current_node = heapq.heappop(open_queue)[1] del second_open_queue[current_node.id] # with open("a_star_{}.txt".format(LoadConfigs.configurations["name_exp"]), "a") as myfile: # myfile.write("current node -> {} \n".format(current_node)) # Pop current off open list, add to closed list # open_list.pop(current_index) # closed_list.add(current_node) closed_list[current_node.id] = "" # current_time = time.time() # time_from_start = current_time - start_time # time in seconds # block execution astar after 10 minutes -> hardcoded function # end = False # if time_from_start >= 300: # end = True # find the bigger node.g # max_id = max(second_open_queue.items(), key=operator.itemgetter(1))[0] # # for el in open_queue: # if el[1].id == max_id: # current_node = el[1] # break # distance_target = current_node.g - 10 # Found the goal # on the cuxrrent node, the distance from the start is saved as g # so if the distance from the start is equals to distance_target then I found the goal # if current_node.id == end_node.id: # or end: if current_node.g >= distance_target: # with open("a_star_{}.txt".format(LoadConfigs.configurations["name_exp"]), "a") as myfile: # myfile.write("closed list {}, open list {} \n".format(len(closed_list), len(second_open_queue))) # myfile.write("------------------ \n") # myfile.write("------------------ \n") # myfile.write("------------------ \n") return return_best_path_so_far(current_node=current_node) # Generate children points = list_neighbours(x_value=current_node.position.x, y_value=current_node.position.y, apf=apf) # points_on_the_street = keep_only_points_on_street(apf=pre_matrix.get_apf(), points=points) points_on_the_street = pre_matrix.keep_only_points_on_street(points=points) children = [Node(parent=current_node, position=node_position) for node_position in points_on_the_street] # for node_position in points_on_the_street: # Adjacent squares # # # Create new node # new_node = Node(current_node, node_position) # # # Append # children.append(new_node) # Loop through children for child in children: # Child is on the closed list if closed_list.get(child.id, None) is not None: # if child in closed_list: continue # Child already computed if second_open_queue.get(child.id, None) is not None: # result = list(filter(lambda x: x[1] == child, open_queue)) # if len(result) > 0: continue # Create the f, g, and h values r = haversine((values_matrix[0][child.position.x], values_matrix[1][child.position.y]), (values_matrix[0][current_node.position.x], values_matrix[1][current_node.position.y])) * 1000 # in metres child.g = current_node.g + r # distance to end is total distance - distance from start distance_to_end = distance_target - child.g # distance_to_end = haversine((values_matrix[0][child.position.x], values_matrix[1][child.position.y]), # (values_matrix[0][end_node.position.x], # values_matrix[1][end_node.position.y])) * 1000 # in metres child.h = abs(_compute_h(distance_to_end=distance_to_end, genome=genome, type_astar=type_astar, current_position=child.position, K=K, pre_matrix=pre_matrix, x_value=x_value, tra_moved_so_far=return_best_path_so_far(current_node=current_node) if type_astar == 1 else None)) total_g_normalised = _standard_normalisation(old_value=child.g, old_min=0, old_max=distance_target + 100, new_min=0, new_max=10) # now higher is the most attractive child.f = -(total_g_normalised + child.h) # child.f = -child.h # Child is already in the open list # result = list(filter(lambda x: x[1] == child, open_queue)) # if len(result) > 0 and child.g > result[0].g: # continue res = second_open_queue.get(child.id, None) # if res is not None: # print("S") if res is not None and child.g > res: continue # print(child) # Add the child to the open list # open_list.append(child) heapq.heappush(open_queue, (child.f, child)) second_open_queue.update({child.id: child.g})
data = pd.read_csv('filtered_3rdgtw_loravar.csv', sep=',') data['received'] = pd.to_datetime(data['received']) results = data.shape results = results[0] devloc = pd.read_csv('device_location.csv', sep=';') gtwloc = pd.read_csv('gateway_location.csv', sep=';') tableofdistance = [] indices = [] for index, row in data.iterrows(): for index_gtw, row_gtw in gtwloc.iterrows(): for index_dev, row_dev in devloc.iterrows(): if (row['gtw_id'] == row_gtw['gtw_id'] and row['dev_id'] == row_dev['dev_id']): loc1 = (devloc.loc[index_dev]['dev_lat'], devloc.loc[index_dev]['dev_long']) loc2 = (gtwloc.loc[index_gtw]['gtw_lat'], gtwloc.loc[index_gtw]['gtw_long']) hav = haversine(loc1, loc2) hav = hav * 1000 #Distance in meters (By default, haversine returns in kilometers) distance = (hav**2 + (devloc.loc[index_dev]['dev_alt'] - gtwloc.loc[index_gtw]['gtw_alt'])**2)**(0.5) tableofdistance.append(distance) distanceDF = pd.DataFrame(tableofdistance, columns=['dist_devgtw']) data_distance = data.join(distanceDF) data_distance.to_csv('loravar_distances.csv', sep=',', index=False)
def find_dist(y1, y2): dists = [] for i in range(len(y1)): a = haversine(y1[i], y2[i]) dists.append(a) return dists
def calcDistanceKm(geom1, geom2): coord1 = ST_AsTuple(session.query(functions.ST_AsText(geom1)).one()) coord2 = ST_AsTuple(session.query(functions.ST_AsText(geom2)).one()) return haversine(coord1, coord2)
import timeit import haversine as hs # The reason these are slightly different is that # haversine computes radians(x2 - x1) where as # dis uses radians(x2) - radians(x1). The # advantage is that dis is about 15% faster # and the difference is in the noise: # hs.haversine(pt1, pt2)=111178.14375531959 # perf=1.1505752360008046 # ptr1.dis(pt2)=111178.1437553196 # perf=0.9691372659999615 hpt1: Tuple[float, float] = 1.0, 2.0 hpt2: Tuple[float, float] = 1.0, 3.0 hd: float = hs.haversine(hpt1, hpt2) print(f'hs.haversine(hpt1, hpt2)={hd}') #loops: int = 1_000_000 #print(f'perf={timeit.timeit("hs.haversine(hpt1, hpt2)", number=loops, globals=globals())}') pt1: TrackPoint = TrackPoint(lat=1.0, lon=2.0) pt2: TrackPoint = TrackPoint(lat=1.0, lon=3.0) d: float = pt1.disMeters(pt2) print(f' ptr1.disMeters(pt2)={d}') #print(f'perf={timeit.timeit("pt1.disMeters(pt2)", number=loops, globals=globals())}') import unittest class TestTrackPoint(unittest.TestCase): def test_init_default(self: TestTrackPoint):
def famous_route(data, veo_data, dist): lon = veo_data['Lon'].to_numpy() lat = veo_data['Lat'].to_numpy() start_points_long = data['START LONG'].to_numpy() start_points_lat = data['START LAT'].to_numpy() end_points_long = data['END LONG'].to_numpy() end_points_lat = data['END LAT'].to_numpy() length = len(start_points_long) fro = [""] * length to = [""] * length distance = [""] * length destination = veo_data['STATION'].to_numpy() for i in range(length): loc2 = (float(start_points_lat[i]), float(start_points_long[i])) for j in range(len(lon)): loc1 = (float(lat[j]), float(lon[j])) if fro[i] == "": if hs.haversine(loc1, loc2, unit=Unit.METERS) <= dist: fro[i] = destination[j] distance[i] = hs.haversine(loc1, loc2, unit=Unit.METERS) elif distance[i] > hs.haversine(loc1, loc2, unit=Unit.METERS): fro[i] = destination[j] if fro[i] == "": fro[i] = "unknown" distance = [""] * len(start_points_long) for i in range(length): loc2 = (float(end_points_lat[i]), float(end_points_long[i])) for j in range(len(lon)): loc1 = (float(lat[j]), float(lon[j])) if to[i] == "": if hs.haversine(loc1, loc2, unit=Unit.METERS) <= dist: to[i] = destination[j] distance[i] = hs.haversine(loc1, loc2, unit=Unit.METERS) elif distance[i] > hs.haversine(loc1, loc2, unit=Unit.METERS): distance[i] = hs.haversine(loc1, loc2, unit=Unit.METERS) to[i] = destination[j] if to[i] == "": to[i] = "unknown" map_path = {} for i in range(length): try: s = map_path[fro[i] + "-" + to[i]] map_path[fro[i]+"-"+to[i]] = s + 1 except Exception as e: map_path[fro[i]+"-"+to[i]] = 1 #print(map_path) start_unknown = [False] * length end_unknown = [False] * length for i in range(len(start_points_long)): if fro[i] == "unknown": start_unknown[i] = True if to[i] == "unknown": end_unknown[i] = True data['FROM'] = fro data['TO'] = to data['START_UNKNOWN'] = start_unknown data['END_UNKNOWN'] = end_unknown return map_path
def get_best_flight_price(self, departure_airport, arrival_airports): final_response = self.handle_error( None, None, None, None, []) #initialize empty array to store the results if departure_airport is None: return self.handle_error(None, None, 'No departure airport is found') if not arrival_airports or arrival_airports is None: return self.handle_error(None, None, 'No arrival airports are found') url = self.flights_api price_distance_ratio = [] for arrival_city in arrival_airports: parameters = { "fly_from": departure_airport["id"], "fly_to": arrival_city["id"], "v": 3, "date_from": "29/06/2020", #str(date.today().strftime("%d/%m/%Y")), "date_to": '27/09/2020', #str((date.today() + timedelta(days=120)).strftime("%d/%m/%Y")), "max_fly_duration": 6, "flight_type": "oneway", "one_for_city": 1, "one_per_date": 0, "adults": 1, "children": 0, "infants": 0, "partner": "picky", "partner_market": "us", "curr": "USD", "locale": "en", "limit": 30, "sort": "price", "asc": 1, "xml": 0 } distance = round( haversine((departure_airport["lat"], departure_airport["lon"]), (arrival_city["lat"], arrival_city["lon"])), 3) response = self.make_request(url, parameters) if response["error_message"] == None: response = json.loads(response["response"].content) if response["data"]: price = list(response["data"].values())[0] ratio = round(price / distance, 3) price_distance_ratio.append( (arrival_city["city"], ratio, distance, price)) optimal_flight = None if price_distance_ratio: optimal_flight = sorted(price_distance_ratio, key=lambda x: x[1]) final_response["data"] = optimal_flight else: final_response[ "error_message"] = 'No flight information for given cities' return final_response
def get(self, request): try: store = Store.objects.select_related('category', 'address', 'open_status').\ prefetch_related('menu_set', 'storeimage_set', 'metrostationstore_set', 'review_set').\ get(id=request.GET['store_id']) review_ratings_avg = ReviewDetail.get_review_ratings_avg(store) review_count = ReviewDetail.get_review_count(store) result = { 'region_1depth_name' : store.address.region_1depth_name, 'region_2depth_name' : store.address.region_2depth_name, 'region_3depth_name' : store.address.region_3depth_name, 'store_id' : store.id, 'lat' : store.address.latitude, 'lng' : store.address.longitude, 'full_address' : store.address.full_address, 'store_name' : store.name, 'one_line_introduction' : store.one_line_introduction, 'opening_time_description': store.opening_time_description, 'phone_number' : store.phone_number, 'sns_url' : store.sns_url, 'menu_pamphlet_image_url' : store.menu_pamphlet_image_url, 'is_reservation' : 1 if store.is_reservation else 0, 'is_wifi' : 1 if store.is_wifi else 0, 'is_parking' : 1 if store.is_parking else 0, 'category' : store.category.name, 'open_status' : store.open_status.name, 'menus' : [ { 'name' : menu.name, 'price' : menu.price, 'menu_image_url': menu.menu_image_url } for menu in store.menu_set.all() ], 'store_images' : [store_image.image_url for store_image in store.storeimage_set.all()], 'metro_stations': [ { 'name' : metro_station_store.metro_station.name, 'line' : metro_station_store.metro_station.line, 'lat' : metro_station_store.metro_station.latitude, 'lng' : metro_station_store.metro_station.longitude, 'distance_from_store_m': haversine( (store.address.longitude, store.address.latitude), (metro_station_store.metro_station.longitude, metro_station_store.metro_station.latitude), unit = 'm' ) } for metro_station_store in store.metrostationstore_set.all() ], 'reviews': [ { 'review_id' : review.id, 'rating' : review.rating, 'content' : review.content, 'image_url' : review.image_url, 'updated_at': review.updated_at } for review in store.review_set.all() ], 'visitor_photos': [review.image_url for review in store.review_set.all()], 'rating_average': review_ratings_avg, 'review_count' : review_count } return JsonResponse({'result': result}, status=200) except KeyError: return JsonResponse({'message': 'KEY_ERROR'}, status=400) except Store.DoesNotExist: return JsonResponse({'message': 'STORE_DOES_NOT_EXIST'}, status=404)
def build_dataset(documents, user_location, query, collection): ''' build the dataset for each query, which will be used for training and testing later on Args: documents: the documents returning from the mongodb cluster user_location: [longitude, latitude] query: inputted query query: the query's text Returns: dataframe of the query's data, with columns: 'query', 'document', 'query_length', 'document_length', 'jaccard_entire', 'sub_jaccard', 'prefix_match', 'elasticsearch_score', 'distance' ''' # define the list result of the documents data documents_data = [] # go over each document and gather its info for doc in documents: # slice the location of the document coordinates = list(doc['_source']['location'].values()) # get the tweets tweets = find_tweets_near_place(coordinates, collection) # neglect the doc that don't have tweets if len(tweets): # get the document name document = doc['_source']['name'] # find the jaccard similarity jaccard_entire = jaccard_similarity(query.split(" "), document.split(" ")) # slice the first 3 characters and find the jaccard similaity sub_query = [word[:3] for word in query.split(" ")] sub_document = [word[:3] for word in document.split(" ")] sub_jaccard = jaccard_similarity(sub_query, sub_document) # check if the query and the document have the same prefix prefix_match = query[:3] == document[:3] # create two tuples of coordinates tweet_loc = (coordinates[0], coordinates[1]) user_loc = (user_location[0], user_location[1]) # get the distance between the user and the document(in meters) distance = hs.haversine(tweet_loc, user_loc) # define the dict for the document doc_data = { "query": query, "document": document, "query_length": len(query), "document_length": len(doc['_source']['name']), "jaccard_entire": jaccard_entire, "sub_jaccard": sub_jaccard, "prefix_match": prefix_match, "elasticsearch_score": doc['_score'], "distance": distance } # add the tweets info doc_data.update(tweets[0]) # append the doc dict to the list result documents_data.append(doc_data) # return the documents data return pd.json_normalize(documents_data)
def analyze_events(event_masks_xarray, class_masks_xarray, results_dir): """Analzse event masks of ARs and TCs Produces PNGs of - histograms of event lifetimes, speeds, and travel_distances - Frequency plots of genesus, termination, and global occurence Keyword arguements: class_masks_xarray -- the class masks as xarray, 0==Background, 1==TC, 2 ==AR event_masks_xarray -- the event masks as xarray with IDs as elements results_dir -- the directory where the PNGs get saved to """ # create results_dir if it doesn't exist pathlib.Path(results_dir).mkdir(parents=True, exist_ok=True) class_masks = class_masks_xarray.values event_masks = event_masks_xarray.values print('calculating centroids..', flush=True) def pixel_to_degree(pos): """Returns the (lat,long) position of a pixel coordinate""" return (pos[0] * 180.0 / event_masks.shape[1] - 90, pos[1] * 360 / event_masks.shape[2] + 180) def average_location(coordinates_pixel): """Returns the average geolocation in pixel space Based on https://stackoverflow.com/questions/37885798/how-to-calculate-the-midpoint-of-several-geolocations-in-python """ coordinates_degree = [ pixel_to_degree(cord) for cord in coordinates_pixel ] x = 0.0 y = 0.0 z = 0.0 for lat_deg, lon_deg in coordinates_degree: latitude = math.radians(lat_deg) longitude = math.radians(lon_deg) x += math.cos(latitude) * math.cos(longitude) y += math.cos(latitude) * math.sin(longitude) z += math.sin(latitude) total = len(coordinates_degree) x = x / total y = y / total z = z / total central_longitude = math.atan2(y, x) central_square_root = math.sqrt(x * x + y * y) central_latitude = math.atan2(z, central_square_root) average_degree = math.degrees(central_latitude), math.degrees( central_longitude) return (event_masks.shape[1] * (average_degree[0] + 90) / 180, event_masks.shape[2] * (average_degree[1] + 180) / 360) global centroids # make function visible to pool def centroids(event_mask): """Returns a dict mapping from the IDs in event_mask to their centroids""" coordinates_per_id = {} for row in range(np.shape(event_mask)[0]): for col in range(np.shape(event_mask)[1]): this_id = event_mask[row][col] if this_id == 0: # don't consider background as event continue coordinates_per_id.setdefault(this_id, []).append((row, col)) centroid_per_id = {} for this_id in coordinates_per_id: centroid_per_id[this_id] = average_location( coordinates_per_id[this_id]) return centroid_per_id pool = Pool(psutil.cpu_count(logical=False)) centroid_per_id_per_time = pool.map(centroids, event_masks) # %% print('extracting event types..', flush=True) global event_type_of_mask # make function visible to pool def event_type_of_mask(event_mask, class_mask): """Returns a dict mapping from the IDs in event_mask to their type ('tc' or 'ar")""" event_type = {} # event type as tring 'ar' or 'tc' per event ID for row in range(np.shape(event_mask)[0]): for col in range(np.shape(event_mask)[1]): this_id = event_mask[row][col] this_class = class_mask[row][col] if this_id == 0: continue elif this_class == 1: event_type[this_id] = 'tc' else: event_type[this_id] = 'ar' return event_type pool = Pool(psutil.cpu_count(logical=False)) pool_result = pool.starmap(event_type_of_mask, zip(event_masks, class_masks)) event_type = dict(i for dct in pool_result for i in dct.items()) # %% print('calculating genesis and termination frequencies..', flush=True) genesis_time_per_id = {} termination_time_per_id = {} previous_ids = set() for time in range(len(event_masks)): for this_id in centroid_per_id_per_time[time].keys(): if this_id not in previous_ids: genesis_time_per_id[this_id] = time previous_ids.add(this_id) termination_time_per_id[this_id] = time genesis_ids_per_time = {} termination_ids_per_time = {} for this_id, time in genesis_time_per_id.items(): genesis_ids_per_time.setdefault(time, []).append(this_id) for this_id, time in termination_time_per_id.items(): termination_ids_per_time.setdefault(time, []).append(this_id) genesis_count_ar = np.zeros( event_masks.shape[1:3]) # sum over all AR genesis events genesis_count_tc = np.zeros(event_masks.shape[1:3]) termination_count_ar = np.zeros(event_masks.shape[1:3]) termination_count_tc = np.zeros(event_masks.shape[1:3]) for time in range(event_masks.shape[0]): genesis_events = np.isin(event_masks[time], genesis_ids_per_time.get(time, [])) termination_events = np.isin(event_masks[time], termination_ids_per_time.get(time, [])) genesis_count_tc += (class_masks[time] == 1) * genesis_events genesis_count_ar += (class_masks[time] == 2) * genesis_events termination_count_tc += (class_masks[time] == 1) * termination_events termination_count_ar += (class_masks[time] == 2) * termination_events genesis_frequency_ar = genesis_count_ar / (5 * 12) genesis_frequency_tc = genesis_count_tc / (5 * 12) termination_frequency_ar = termination_count_ar / (5 * 12) termination_frequency_tc = termination_count_tc / (5 * 12) # %% print('generating histograms..', flush=True) event_ids = set(genesis_time_per_id.keys()).union( set(termination_time_per_id.keys())) for event_class in ['tc', 'ar']: this_class_ids = set() for event_id in event_ids: if event_type[event_id] == event_class: this_class_ids.add(event_id) # lifetime calculation termination_times = np.array( [termination_time_per_id[event_id] for event_id in this_class_ids]) genesis_times = np.array( [genesis_time_per_id[event_id] for event_id in this_class_ids]) lifetimes = termination_times - genesis_times # lifetime histogram plt.figure(dpi=100) plt.hist(3 * lifetimes, bins=np.arange(0, 264, 12), cumulative=0, rwidth=0.85, color='#607c8e') # multiplied by 3 to get result in hours plt.title(f"Lifetime histogram of {event_class.upper():s}s", fontdict={'fontsize': 16}) plt.rc('xtick', labelsize=8) plt.rc('ytick', labelsize=8) plt.xlabel("Lifetime in hours") plt.xticks(np.arange(12, 264, 48)) plt.xlim(12, 252) plt.ylabel("Count") # plt.show() plt.savefig(results_dir + f"histogram_lifetime_{event_class:s}") # travel distance calculation termination_centroids = [] genesis_centroids = [] for i in range(len(this_class_ids)): termination_centroids.append(centroid_per_id_per_time[ termination_times[i]][list(this_class_ids)[i]]) genesis_centroids.append(centroid_per_id_per_time[genesis_times[i]] [list(this_class_ids)[i]]) distances = np.array([ hs.haversine(pixel_to_degree(pos1), pixel_to_degree(pos2)) for pos1, pos2 in zip(termination_centroids, genesis_centroids) ]) # travel distance histogram plt.figure(dpi=100) plt.hist(distances, bins=np.arange(0, 10000, 500), rwidth=0.85, color='#607c8e') plt.title(f"Travel distance histogram of {event_class.upper():s}s", fontdict={'fontsize': 16}) plt.rc('xtick', labelsize=8) plt.rc('ytick', labelsize=8) plt.xlabel("distance in km") plt.xticks(np.arange(0, 10001, 2500)) plt.xlim(0, 10000) plt.ylabel("Count") plt.savefig(results_dir + f"histogram_travel_distance_{event_class:s}") # speed histogram plt.figure(dpi=100) plt.hist(distances / (3 * lifetimes), bins=np.arange(0, 100, 5), rwidth=0.85, color='#607c8e') # multiplied by 3 to get result in km/h) plt.title(f"Speed histogram of {event_class.upper():s}s", fontdict={'fontsize': 16}) plt.rc('xtick', labelsize=8) plt.rc('ytick', labelsize=8) plt.xlabel("speed in km/h") plt.xticks(np.arange(0, 101, 25)) plt.xlim(0, 100) plt.ylabel("Count") plt.savefig(results_dir + f"histogram_speed_{event_class:s}") # set cartopy background dir to include blue marble os.environ['CARTOPY_USER_BACKGROUNDS'] = str(os.getcwd() + '/climatenet/bluemarble') def map_instance(title): """Returns a matplotlib instance with bluemarble background""" plt.figure(figsize=(100, 20), dpi=100) plt.rc('xtick', labelsize=20) plt.rc('ytick', labelsize=20) mymap = plt.subplot(111, projection=ccrs.PlateCarree()) mymap.set_global() mymap.background_img(name='BM') mymap.coastlines() mymap.gridlines(crs=ccrs.PlateCarree(), linewidth=2, color='k', alpha=0.5, linestyle='--') mymap.set_xticks([-180, -120, -60, 0, 60, 120, 180]) mymap.set_yticks([-90, -60, -30, 0, 30, 60, 90]) plt.title(title, fontdict={'fontsize': 44}) return mymap def visualize_frequency_map(frequency_map, title, colorbar_text, filepath): """Save a PNG of frequency_map with title and colorbar_text at filepath""" # initialize mymap = map_instance(title) lon = np.linspace(0, 360, frequency_map.shape[1]) lat = np.linspace(-90, 90, frequency_map.shape[0]) # draw frequencies contourf = mymap.contourf( lon, lat, np.ma.masked_array(frequency_map, mask=(frequency_map == 0)), levels=np.linspace(0.0, frequency_map.max(), 11), alpha=0.7) #colorbar and legend cbar = mymap.get_figure().colorbar(contourf, orientation='vertical', ticks=np.linspace( 0, frequency_map.max(), 3)) cbar.ax.set_ylabel(colorbar_text, size=32) #save mymap.get_figure().savefig(filepath, bbox_inches="tight", facecolor='w') print('generating frequency maps..', flush=True) visualize_frequency_map(genesis_frequency_tc, "Genesis frequency map of TCs", "Frequency in events per month", results_dir + "genesis_frequency_tc") visualize_frequency_map(genesis_frequency_ar, "Genesis frequency map of ARs", "Frequency in events per month", results_dir + "genesis_frequency_ar") visualize_frequency_map(termination_frequency_tc, "Termination frequency map of TCs", "Frequency in events per month", results_dir + "termination_frequency_tc") visualize_frequency_map(termination_frequency_ar, "Termination frequency map of ARs", "Frequency in events per month", results_dir + "termination_frequency_ar") visualize_frequency_map( 100 * ((class_masks == 1) * (event_masks != 0)).sum(axis=0) / event_masks.shape[0], "Global frequency map of TCs", "Frequency in % of time steps", results_dir + "global_frequency_tc") visualize_frequency_map( 100 * ((class_masks == 2) * (event_masks != 0)).sum(axis=0) / event_masks.shape[0], "Global frequency map of ARs", "Frequency in % of time steps", results_dir + "global_frequency_ar")
from haversine import haversine, Unit lyon = (45.7597, 4.8422) # (lat, lon) paris = (48.8567, 2.3508) haversine(lyon, paris)
def distance(lat1, long1, lat2, long2): return haversine((lat1, long1), (lat2, long2), miles=True)
def main(): #DIMENSIONE AREA DI RICERCA SIZE = 25 #CRITERI IDENTIFICAZIONE forward_speed = 10 # m/s slp_threshold = 1000 # hPa ws_threshold = 15 # m/s #CRITERI DATA start_date = datetime(2018, 10, 30, 0, 0, 0) stop_date = datetime(2018, 10, 30, 12, 0, 0) delta_hours = 1 current_date = start_date L = [] while current_date < stop_date: url = "/home/giangiui/Dropbox/Uni/Progetto/MedistormTracker/MEDIACANE_data/" + \ "/wrf5_d01_" + \ "{:04d}".format(current_date.year) + "{:02d}".format(current_date.month) + "{:02d}".format( current_date.day) + \ "Z{:02d}".format(current_date.hour) + "{:02d}".format(current_date.minute) + ".nc" f = Dataset(url) lats = f.variables["latitude"][:] lons = f.variables["longitude"][:] slp = f.variables["SLP"][:][0] u10m = f.variables["U10M"][:][0] v10m = f.variables["V10M"][:][0] t2c = f.variables["T2C"][:][0] rh2 = f.variables["RH2"][:][0] uh = f.variables["UH"][:][0] for j in range(0, len(slp), SIZE): for i in range(0, len(slp[0]), SIZE): S = slp[j:j + SIZE, i:i + SIZE] iMin = None jMin = None for jj in range(0, len(S)): for ii in range(0, len(S[0])): if S[jj, ii] < slp_threshold: iMin = i + ii jMin = j + jj ws = math.pow( u10m[jMin, iMin] * u10m[jMin, iMin] + v10m[jMin, iMin] * v10m[jMin, iMin], 0.5) if ws >= ws_threshold: L.append({ "date": str(current_date), "lat": lats[jMin], "lon": lons[iMin], "data": { "slp": S[jj, ii], "ws": ws, "t2c": t2c[jMin, iMin], "rh2": rh2[jMin, iMin], "uh": uh[jMin, iMin] } }) current_date = current_date + timedelta(hours=delta_hours) #ALGORITMO TEMPORALE # Input: L, velocità massima tempeste Umax, minima durata Tmin, criteri identificazione tempeste, Dmax distanza #CRITERI IDENTIFICAZIONE SUCCESSORE Umax = 15 #ms^-1 mpsToKph = 3.6 DMAX = Umax * mpsToKph * delta_hours #CRITERI IDENTIFICAZIONE TEMPESTA minDuration = 12 # hours maxSpeed = 30.0 # meters per second T = [] current_date = start_date while current_date < stop_date: list_filtered = [ item for item in L if (item['date'] == str(current_date)) ] for l in list_filtered: track = [] track.append(l) continueC = True current_date_exam = current_date + timedelta(hours=delta_hours) while continueC == True: list_filtered_exam = [ item for item in L if item['date'] == str(current_date_exam) ] if list_filtered_exam: for ll in list_filtered_exam: current_distance = haversine((l["lat"], l["lon"]), (ll["lat"], ll["lon"])) if current_distance < DMAX: track.append(ll) current_date_exam = current_date_exam + timedelta( hours=delta_hours) break else: continueC = False else: continueC = False if (len(track) >= 12): T.append(track) current_date = current_date + timedelta(hours=delta_hours) print(T)
def computeFastestTrain(self): nodes = OrderedDict() edges = [] mrtPath = [] mrtRoutes = OrderedDict() mrtNodes = {} temp = {} # Retrieve all the json files under MRT directory path_to_json = "MRT/" json_files = [ pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.geojson') ] for (index, name) in enumerate(json_files): with open(path_to_json + str(name)) as json_file: data = json.load(json_file) for feature in data['features']: #Added all the coordinates that follow the mrt path into mrtPath list if feature['geometry']['type'] == 'MultiLineString': for y in feature['geometry']['coordinates']: mrtPath.append(y) else: coordinates = feature['geometry']['coordinates'] nodes[feature['properties']['node-details']] = coordinates mrtNodes[tuple( coordinates)] = feature['properties']['node-details'] lowest = 1000 lowestIndex = 0 i = 0 while i < len(mrtPath): distance = haversine(coordinates, mrtPath[i]) if distance < lowest: lowest = distance lowestIndex = i i += 1 mrtPath.insert(lowestIndex, coordinates) length = len(mrtPath) for i in range(length): c = tuple(mrtPath[i]) k = str(i) mrtRoutes[k] = c temp[c] = k for i in range(length): if i + 1 != length: distance = haversine(mrtPath[i], mrtPath[i + 1]) if tuple(mrtPath[i]) in mrtNodes: edges.append((mrtNodes[tuple(mrtPath[i])], temp[tuple(mrtPath[i + 1])], distance / 70, "LRT")) elif tuple(mrtPath[i + 1]) in mrtNodes: edges.append((temp[tuple(mrtPath[i])], mrtNodes[tuple(mrtPath[i + 1])], distance / 70, "LRT")) else: edges.append((temp[tuple(mrtPath[i])], temp[tuple(mrtPath[i + 1])], distance / 70, "LRT")) temp.clear() mrtPath.clear() with open('Combined/nodes.json') as f: getJson = json.load(f) feature_access = getJson['features'] for feature_data in feature_access: prop = feature_data['properties'] if 'node-details' in prop: location_name = prop['node-details'] nodes[location_name] = feature_data['geometry'][ 'coordinates'] findPath = ShortestPath(nodes) findPath.createEdges() findPath.createMrtEdgeNodes(edges, mrtNodes, mrtRoutes) graph = findPath.buildAGraph() print("Get graph: " + str(graph)) path = findPath.findShortestPath(graph, self.comboStart.currentText(), self.comboEnd.currentText()) print("Get Path: " + str(path)) self.m = folium.Map(location=[1.4053, 103.9021], zoom_start=16) self.lblSelectedBusRoute.setText('Bus Route Displayed: ') folium.PolyLine(path, opacity=1, color='red').add_to(self.m) self.marker_cluster = MarkerCluster().add_to(self.m) self.initMap(self.m, self.marker_cluster) data = io.BytesIO() self.m.save(data, close_file=False) self.mapView.setHtml(data.getvalue().decode())
location_2 = (lat2, long2) thewriter.writerow({ 'node': i, 'destination': j, 'Lat1': format(lat1, '.4f'), 'Long1': format(long1, '.4f'), 'Lat2': format(lat2, '.4f'), 'Long2': format(long2, '.4f'), 'value': haversine(location_1, location_2, unit=Unit.KILOMETERS) }) with open('sequence.csv', 'w', newline='') as f: fieldnames = [ 'node', 'destination', 'Lat1', 'Long1', 'Lat2', 'Long2', 'value' ] thewriter = csv.DictWriter(f, fieldnames=fieldnames) thewriter.writeheader() for i in range(length): if not i == (length - 1): lat1 = float(df[latColumnName].values[i]) long1 = float(df[longColumnName].values[i]) lat2 = float(df[latColumnName].values[i + 1]) long2 = float(df[longColumnName].values[i + 1])
sns.scatterplot(LAT_LNG[:, 0], LAT_LNG[:, 1], hue=y_label, palette=sns.color_palette("Set1", n_colors=NUM_CLUSTERS)) # highlight the furthest point in black sns.scatterplot(LAT_LNG[max_indices, 0], LAT_LNG[max_indices, 1], color='black') plt.show() # endregion distances = [] for i, centroid in enumerate(centroids): distances.append( haversine(farthest_points[i], centroid, unit=Unit.METERS)) if distances[ i] > MAX_DISTANCE: # generate new cluster if distance is more than a threshold temp = NUM_CLUSTERS + 1 if temp == NUM_CLUSTERS: outlier_label = outlier_cluster(kmeans, MIN_POINTS_PER_CLUSTER) if outlier_label == -1: # no outliers detected print("Success! No outliers detected") break else: print("Outlier cluster:", outlier_label) indices = outlier_cluster_data_point_indices(outlier_label) LAT_LNG = remove_outlier_cluster(LAT_LNG, indices) elif temp > NUM_CLUSTERS: NUM_CLUSTERS = temp
def mark(request): """ Adds a duck, location, photo and link from webform """ # if this is a POST request we need to process the form data if request.method == 'POST': # create a form instance and populate it with data from the request: form = DuckForm(request.POST) # check whether it's valid: if form.is_valid(): duck_id = form.cleaned_data['duck_id'] try: duck = Duck.objects.get(pk=duck_id) if duck.name == 'Unnamed' and form.cleaned_data[ 'name'] != 'Unnamed': duck.name = form.cleaned_data['name'] except Duck.DoesNotExist: name = form.cleaned_data['name'] if form.cleaned_data[ 'name'] else 'Unnamed' duck = Duck(duck_id=duck_id, name=name, approved='Y', create_time=datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S'), comments='') # Calculate the distance since last location last_duck_location = DuckLocation.objects.filter( duck_id=duck_id).order_by('-date_time')[0] distance_travelled = haversine( (last_duck_location.latitude, last_duck_location.longitude), (form.cleaned_data['lat'], form.cleaned_data['lng']), unit=Unit.MILES) duck_location = DuckLocation( duck=duck, latitude=form.cleaned_data['lat'], longitude=form.cleaned_data['lng'], location=form.cleaned_data['location'], date_time=form.cleaned_data['date_time'], comments=form.cleaned_data['comments'], distance_to=round(distance_travelled, 2), user=request.user, approved='Y') duck_location.save() if request.FILES and request.FILES['image']: photo_info = media.handle_uploaded_file( request.FILES['image'], duck_id, duck.name, form.cleaned_data['comments']) duck_location_photo = DuckLocationPhoto( duck_location=duck_location, flickr_photo_id=photo_info['id'], flickr_thumbnail_url=photo_info['sizes']['Small 320'] ['source']) duck_location_photo.save() duck.total_distance = round( DuckLocation.objects.filter(duck_id=duck_id).aggregate( Sum('distance_to'))['distance_to__sum'], 2) duck.save() # redirect to a new URL: return HttpResponseRedirect('/location/' + str(duck_location.duck_location_id)) # if a GET (or any other method) we'll create a blank form else: form = DuckForm() map_data = { 'width': '100%', 'height': '400px', 'focus_lat': 35, 'focus_long': -30, 'focus_zoom': 1, 'location_list': [], 'duck_location_id': 0, } return render(request, 'duck/mark.html', {'form': form, 'map': map_data})
def computeFastestBus(self): # Fastest Bus Route nodes = OrderedDict() edges = [] busPath = [] busRoutes = OrderedDict() busNodes = {} temp = {} #Retrieve all the json files under Bus_Path directory path_to_json = "Bus_Path/" #Referenced from: https://stackoverflow.com/questions/30539679/python-read-several-json-files-from-a-folder json_files = [ pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.geojson') ] for (index, name) in enumerate(json_files): with open(path_to_json + str(name)) as json_file: data = json.load(json_file) for feature in data['features']: #Added all the coordinates that follow the bus path into busPath list if feature['geometry']['type'] == 'MultiLineString': for y in feature['geometry']['coordinates']: busPath.append(y) #Added all the nodes that are placed on the maop into busNodes list else: coordinates = feature['geometry']['coordinates'] nodes[feature['properties']['node-details']] = coordinates print("Nodes: " + str(coordinates)) busNodes[tuple( coordinates)] = feature['properties']['node-details'] lowest = 1000 i = 0 while i < len(busPath): distance = haversine(coordinates, busPath[i]) if distance < lowest: lowest = distance lowestIndex = i i += 1 busPath.insert(lowestIndex, coordinates) length = len(busPath) for i in range(length): c = tuple(busPath[i]) k = str(i) busRoutes[k] = c temp[c] = k #Added for overall edges for the other node coordinates to find out the fastest path based on speed for i in range(length): if i + 1 != length: distance = haversine(busPath[i], busPath[i + 1]) if tuple(busPath[i]) in busNodes: edges.append((busNodes[tuple(busPath[i])], temp[tuple(busPath[i + 1])], distance / 60, "Bus")) elif tuple(busPath[i + 1]) in busNodes: edges.append((temp[tuple(busPath[i])], busNodes[tuple(busPath[i + 1])], distance / 60, "Bus")) else: edges.append((temp[tuple(busPath[i])], temp[tuple(busPath[i + 1])], distance / 60, "Bus")) temp.clear() busPath.clear() with open('Combined/nodes.json') as f: getJson = json.load(f) feature_access = getJson['features'] for feature_data in feature_access: prop = feature_data['properties'] if 'node-details' in prop: location_name = prop['node-details'] nodes[location_name] = feature_data['geometry'][ 'coordinates'] findPath = ShortestPath(nodes) findPath.createEdges() findPath.createBusEdgeNodes(edges, busNodes, busRoutes) graph = findPath.buildAGraph() print("Get graph: " + str(graph)) path = findPath.findShortestPath(graph, self.comboStart.currentText(), self.comboEnd.currentText()) print("Get Path: " + str(path)) self.m = folium.Map(location=[1.4053, 103.9021], zoom_start=16) self.lblSelectedBusRoute.setText('Bus Route Displayed: ') folium.PolyLine(path, opacity=1, color='#800080').add_to(self.m) self.marker_cluster = MarkerCluster().add_to(self.m) self.initMap(self.m, self.marker_cluster) data = io.BytesIO() self.m.save(data, close_file=False) self.mapView.setHtml(data.getvalue().decode())
def process_points(data): alt_dif = [] time_dif = [] dist_vin = [] dist_hav = [] dist_vin_no_alt = [] dist_hav_no_alt = [] dist_dif_hav_2d = [] dist_dif_vin_2d = [] for start, stop in zip(data[0::], data[1::]): distance_vin_2d = distance.geodesic((start.latitude, start.longitude), (stop.latitude, stop.longitude)).m dist_dif_vin_2d.append(distance_vin_2d) distance_hav_2d = haversine.haversine( (start.latitude, start.longitude), (stop.latitude, stop.longitude)) * 1000 dist_dif_hav_2d.append(distance_hav_2d) dist_vin_no_alt.append( (dist_vin_no_alt[-1] if len(dist_vin_no_alt) > 0 else 0) + distance_vin_2d) dist_hav_no_alt.append( (dist_hav_no_alt[-1] if len(dist_hav_no_alt) > 0 else 0) + distance_hav_2d) alt_d = start.elevation - stop.elevation alt_dif.append(alt_d) distance_vin_3d = math.sqrt(distance_vin_2d**2 + (alt_d)**2) distance_hav_3d = math.sqrt(distance_hav_2d**2 + (alt_d)**2) time_delta = (stop.time - start.time).total_seconds() time_dif.append(time_delta) dist_vin.append((dist_vin[-1] if len(dist_vin) > 0 else 0) + distance_vin_3d) dist_hav.append((dist_hav[-1] if len(dist_hav) > 0 else 0) + distance_hav_3d) # print('Vincenty 2D : ', dist_vin_no_alt[-1]) # print('Haversine 2D : ', dist_hav_no_alt[-1]) # print('Vincenty 3D : ', dist_vin[-1]) # print('Haversine 3D : ', dist_hav[-1]) # print('Total Time : ', math.floor(sum(time_dif)/60), # ' min ', int(sum(time_dif) % 60), ' sec ') # print('Elevation diff: ', int(sum(alt_dif))) # print('Elevation loss: ', abs(int(sum([a for a in alt_dif if a > 0])))) # print('Elevation gain: ', abs(int(sum([a for a in alt_dif if a < 0])))) df = pd.DataFrame() df['dis_vin_2d'] = dist_vin_no_alt df['dist_hav_2d'] = dist_hav_no_alt df['dis_vin_3d'] = dist_vin df['dis_hav_3d'] = dist_hav df['alt_dif'] = alt_dif df['time_dif'] = time_dif df['dis_dif_hav_2d'] = dist_dif_hav_2d df['dis_dif_vin_2d'] = dist_dif_vin_2d # Clear data set df = df[df['time_dif'] > 0.0] df['dist_dif_per_sec'] = df['dis_dif_hav_2d'] / df['time_dif'] df['spd'] = (df['dis_dif_hav_2d'] / df['time_dif']) * 3.6 df_with_timeout = df[df['dist_dif_per_sec'] > MOVEMENT_THRESHOLD] avg_km_h = (sum((df_with_timeout['spd'] * df_with_timeout['time_dif'])) / sum(df_with_timeout['time_dif'])) # print(math.floor(60 / avg_km_h), 'minutes', # round(((60 / avg_km_h - math.floor(60 / avg_km_h))*60), 0), # ' seconds') return { 'dist': dist_hav_no_alt[-1], 'total_time': math.floor(sum(time_dif)), 'moving_time': sum(df_with_timeout['time_dif']), 'alt_loss': abs(int(sum([a for a in alt_dif if a > 0]))), 'alt_gain': abs(int(sum([a for a in alt_dif if a < 0]))), 'avg_km_h': avg_km_h }
ON a.brewery_id = c.brewery_id """ df = pd.read_sql_query(query, engine) # Concatenate beer types on brewery level for visualization df_agg = df.copy() group = ['brewery_id', 'brewery_name', 'latitude', 'longitude'] df_agg = df_agg.groupby(group)['beer_type'].agg( lambda x: '|'.join(x)).reset_index() # Calculated distances in KM between coordinates # Discard places which are too far from starting point # Filter distance matrix and sort by distance df_loc = pd.concat([home, df_agg]).reset_index(drop=True) df_loc['distance'] = df_loc.apply( lambda x: haversine(starting_point, [x['latitude'], x['longitude']]), axis=1) df_loc = df_loc.sort_values(by='distance') df_loc = df_loc[df_loc['distance'] < MAX_DISTANCE * 0.4] points_coordinate = df_loc[coord].copy() distance_matrix = pairwise_distances(X=points_coordinate, metric=haversine) num_points = points_coordinate.shape[0] # raise exception if total number of points is less than 8: if num_points < 8: raise Exception( 'Not enough factories in this location. Try a different location.') num_points = num_points - 1 if num_points % 2 == 1 else points_coordinate.shape[ 0]
def calculate_adj_points_dis(df_origin, inplace=False): df = df_origin if inplace else df_origin.copy() df.loc[:, 'x1'], df.loc[:, 'y1'] = df.x.shift(1), df.y.shift(1) return df.apply(lambda i: haversine((i.y, i.x), (i.y1, i.x1)) * 1000, axis=1)
def write_crm_train_test(): import sqlalchemy as sql import json import pandas as pd import datetime import os import numpy as np from haversine import haversine import glob import random from multiprocessing import Pool from itertools import repeat from dateutil.relativedelta import relativedelta print("Start_Part_2: %s" % str(datetime.datetime.now())) with open('./config.json', 'rb') as f: dict_config = json.load(f) username = dict_config['username'] password = dict_config['password'] database = dict_config['database'] folder_store_list = dict_config['folder_store_list'] path_TA_excel = dict_config['path_TA_excel'] path_json_zip_center = dict_config['path_json_zip_center'] pos_end_date = dict_config['pos_end_date'] folder_store_list = dict_config['folder_store_list'] folder_email_unsub = dict_config['folder_email_unsub'] with open('./table_names_%s.json' % str(pos_end_date).replace("-", ""), 'rb') as f: dict_table_names = json.load(f) table_filtered_crm = dict_table_names['table_filtered_crm'] BL_engine = sql.create_engine("mysql+pymysql://%s:%s@localhost/%s" % (username, password, database)) # In[3]: def create_index(table_name, list_of_columns): columns = ', '.join(list_of_columns) query = "CREATE INDEX id_index ON %s(%s)" % (table_name, columns) print(query) with BL_engine.connect() as connection: result = connection.execute(query) result.close() return def week_end_dt(date_input): weekday_int = date_input.weekday() if weekday_int == 6: return date_input + datetime.timedelta(days=6) else: return date_input + datetime.timedelta(days=5 - weekday_int) high_date = datetime.datetime.strptime(dict_config['crm_end_date'], "%Y-%m-%d").date() if dict_config['recent_n_month']: recent_n_month = dict_config['recent_n_month'] pos_start_date_id_filter = str(high_date - datetime.timedelta( days=int(np.ceil(365 * recent_n_month / 12)))) else: pos_start_date_id_filter = dict_config["pos_start_date"] sql_str_high_date = "'%s'" % str(high_date) sql_str_lastweekstart_date = "'%s'" % str(high_date - datetime.timedelta(days=6)) # sql_sign_up_start_date="'%s'"%str(sign_up_start_date) sql_POS_start_date = "'%s'" % str(pos_start_date_id_filter) str_week_end_d = str(high_date).replace("-", "") print("check point 1") path_store_list = glob.glob(folder_store_list + "*.txt") path_store_list.sort() path_store_list_ahead = [ x for x in path_store_list if "MediaStormStores%s" % str_week_end_d[:6] in x ][0] # updated 2020-10-03 str_month_after = (datetime.datetime.strptime(str_week_end_d, '%Y%m%d') + relativedelta(months=1)).date() str_month_after = str(str_month_after).replace("-", "") # path_store_list_after=[x for x in path_store_list if "MediaStormStores%s"%str_month_after in x][0] df_store_list = pd.read_csv(path_store_list_ahead, sep="|") df_store_list = df_store_list[[ 'location_id', 'address_line_1', 'address_line_2', 'city_nm', 'state_nm', 'zip_cd', 'latitude_meas', 'longitude_meas' ]] df_store_list['latitude_meas'] = df_store_list['latitude_meas'].astype( float) df_store_list['longitude_meas'] = df_store_list['longitude_meas'].astype( float) df_store_list['zip_cd'] = df_store_list['zip_cd'].apply( lambda x: x.split("-")[0].zfill(5)) df_store_list = df_store_list[~df_store_list['location_id']. isin(['145', '6990'])] df_store_list['location_id'] = df_store_list['location_id'].astype(str) # TA_zips = pd.ExcelFile(path_TA_excel) TA_zips = TA_zips.parse("view_by_store", dtype=str) df_temporary = TA_zips[[ 'location_id', 'trans_P_zips_70_within_TA', 'trans_S_zips_70_within_TA', 'zips_in_10' ]] df_zip_by_store = pd.DataFrame() for ind, row in df_temporary.iterrows(): location_id = str(row['location_id']) P_zips = eval(row['trans_P_zips_70_within_TA']) S_zips = eval(row['trans_S_zips_70_within_TA']) zip_10 = eval(row['zips_in_10']) df_P = pd.DataFrame(zip([location_id] * len(P_zips), P_zips)) if len(df_P) > 0: df_P.columns = ['location_id', 'zip_cd'] df_P['zip_type'] = "P" df_S = pd.DataFrame(zip([location_id] * len(S_zips), S_zips)) if len(df_S) > 0: df_S.columns = ['location_id', 'zip_cd'] df_S['zip_type'] = "S" df_10 = pd.DataFrame(zip([location_id] * len(zip_10), zip_10)) if len(df_10) > 0: df_10.columns = ['location_id', 'zip_cd'] df_10['zip_type'] = "zip_10" df_zip_by_store = df_zip_by_store.append(df_P).append(df_S).append( df_10) df_zip_by_store['location_id'] = df_zip_by_store['location_id'].astype(str) df_store_list = df_store_list[[ 'location_id', 'latitude_meas', 'longitude_meas' ]] df_store_zip = pd.merge(df_store_list, df_zip_by_store, on="location_id", how="left") df_store_zip_new = df_store_zip[pd.isnull(df_store_zip['zip_cd'])] df_store_zip_existing = df_store_zip[pd.notnull(df_store_zip['zip_cd'])] df_store_zip_new_no_loc = df_store_zip_new[ df_store_zip_new['latitude_meas'] == 0] df_store_zip_new_with_loc = df_store_zip_new[ df_store_zip_new['latitude_meas'] != 0] df_store_zip_new_with_loc = df_store_zip_new_with_loc[[ 'location_id', 'latitude_meas', 'longitude_meas' ]] df_store_zip_new_no_loc = df_store_zip_new_no_loc[[ 'location_id', 'latitude_meas', 'longitude_meas' ]] if len(df_store_zip_new_no_loc) > 0: store_list_later = [ x for x in path_store_list if x.split("MediaStormStores")[1][:6] > str_week_end_d ] store_list_later = sorted(store_list_later, key=lambda x: os.stat(x).st_mtime) for file in store_list_later: df = pd.read_csv( file, dtype=str, sep="|", usecols=['location_id', 'latitude_meas', 'longitude_meas']) df = df[['location_id', 'latitude_meas', 'longitude_meas']] df['latitude_meas'] = df['latitude_meas'].astype(float) df['longitude_meas'] = df['longitude_meas'].astype(float) df['location_id'] = df['location_id'].astype(str) df = df[df['location_id'].isin( df_store_zip_new_no_loc['location_id'].tolist())] df = df[df['latitude_meas'] != 0] df_store_zip_new_with_loc = df_store_zip_new_with_loc.append(df) df_store_zip_new_no_loc = df_store_zip_new_no_loc[ ~df_store_zip_new_no_loc['location_id'].isin(df['location_id']. tolist())] if len(df_store_zip_new_no_loc) == 0: break df_store_zip_new = df_store_zip_new_with_loc.reset_index() del df_store_zip_new['index'] if len(df_store_zip_new_with_loc) > 0: del df_store_zip_new_with_loc if len(df_store_zip_new_no_loc) > 0: del df_store_zip_new_no_loc zip_centers = json.load(open(path_json_zip_center, "r")) if len(df_store_zip_new) > 0: df_all_new_zip = pd.DataFrame() for i, row in df_store_zip_new.iterrows(): store_coor = (row['latitude_meas'], row['longitude_meas']) store_num = row['location_id'] list_store_zip = [] for zip_cd, v in zip_centers.items(): dist = haversine(store_coor, v, unit="mi") if dist <= 10: list_store_zip.append(zip_cd) df = pd.DataFrame( { "zip_cd": list_store_zip, "zip_type": ["zip_10"] * len(list_store_zip) }, index=[store_num] * len(list_store_zip)) df = df.reset_index().rename(columns={"index": "location_id"}) df_all_new_zip = df_all_new_zip.append(df) df_store_zip_new = pd.merge(df_store_zip_new, df_all_new_zip, on="location_id", how="left") df_store_zip = df_store_zip_existing.append(df_store_zip_new) else: df_store_zip = df_store_zip_existing df_zip_type = df_store_zip[['zip_cd', 'zip_type']].drop_duplicates() df_zip_type = df_zip_type.sort_values(['zip_cd', 'zip_type']) print(df_zip_type['zip_type'].unique().tolist()) df_unique_zip_type = df_zip_type.drop_duplicates("zip_cd") list_P_zips = df_zip_type[df_zip_type['zip_type'] == "P"]['zip_cd'].tolist() list_S_zips = df_zip_type[df_zip_type['zip_type'] == "S"]['zip_cd'].tolist() list_10_zips = df_zip_type[df_zip_type['zip_type'] == "zip_10"]['zip_cd'].tolist() df_store_list = df_store_zip[[ 'location_id', 'latitude_meas', 'longitude_meas' ]].drop_duplicates().reset_index() del df_store_list['index'] df_store_list = df_store_zip[[ 'location_id', 'latitude_meas', 'longitude_meas' ]].drop_duplicates().reset_index() del df_store_list['index'] # print("check point 2") # In[5]: processors = 20 list_all_zips = list(zip_centers.keys()) len_chunck = int(np.ceil(len(list_all_zips) / processors)) list_of_input_all_us_zip_list = [] for i in range(processors): l = list_all_zips[i * len_chunck:(i + 1) * len_chunck] list_of_input_all_us_zip_list.append(l) p = Pool(processors) result = p.starmap( get_dist_output_df, zip(list_of_input_all_us_zip_list, repeat(df_store_list), repeat(zip_centers))) ## result=p.map(get_dist_output_df, list_of_input_all_us_zip_list) # get_dist_output_df defined in the main py file, due to the thread need to be defined top-level df_zips_with_BL_store = pd.DataFrame() for res in result: if res is not None: df_zips_with_BL_store = df_zips_with_BL_store.append(res) p.close() p.join() print("check point 3") print(df_zips_with_BL_store.shape, df_zips_with_BL_store['zip_cd'].nunique(), df_zips_with_BL_store['nearest_BL_store'].nunique()) df_zips_with_BL_store['zip_cd'] = df_zips_with_BL_store['zip_cd'].astype( str) df_zips_with_BL_store['zip_cd'] = df_zips_with_BL_store['zip_cd'].apply( lambda x: x.zfill(5)) # In[6]: # IVs print(datetime.datetime.now()) df_1 = pd.read_sql( "select t1.customer_id_hashed, sign_up_channel, sign_up_location, customer_zip_code, t1.sign_up_date from BL_Rewards_Master as t1 right join %s as t2 on t1.customer_id_hashed=t2.customer_id_hashed;" % table_filtered_crm, con=BL_engine) df_1 = df_1.sort_values("sign_up_date", ascending=False) df_1 = df_1.drop_duplicates("customer_id_hashed") df_1_len = df_1.shape[0] df_1_id_nunique = df_1['customer_id_hashed'].nunique() print("df_1_len", df_1_len) print("df_1_id_nunique", df_1_id_nunique) print(datetime.datetime.now()) df_1['customer_zip_code'] = df_1['customer_zip_code'].astype(str) df_1['customer_zip_code'] = df_1['customer_zip_code'].apply( lambda x: x.split("-")[0].split(" ")[0].zfill(5)[:5]) # df_1['sign_up_date']=pd.to_datetime(df_1['sign_up_date'],format="%Y-%m-%d").dt.date # df_1['weeks_since_sign_up']=df_1['sign_up_date'].apply(lambda x: int(np.ceil((high_date-x).days/7))) df_1['P_zip'] = np.where(df_1['customer_zip_code'].isin(list_P_zips), 1, 0) df_1['S_zip'] = np.where(df_1['customer_zip_code'].isin(list_S_zips), 1, 0) df_1['else_10_zip'] = np.where( df_1['customer_zip_code'].isin(list_10_zips), 1, 0) # del df_1['customer_zip_code'] df_1['signed_online'] = np.where(df_1['sign_up_channel'] == "STORE", 0, 1) del df_1['sign_up_channel'] df_1['sign_up_location'] = df_1['sign_up_location'].fillna("-1") df_1['sign_up_location'] = df_1['sign_up_location'].astype(float) df_1['sign_up_location'] = df_1['sign_up_location'].astype(int).astype(str) df_copy_sign_up = df_1[['sign_up_location', 'customer_zip_code']].drop_duplicates() df_copy_sign_up = df_copy_sign_up.reset_index() del df_copy_sign_up['index'] print("check point 4") # In[7]: # distance to sign up stores df_store_all = pd.DataFrame( columns=['location_id', 'latitude_meas', 'longitude_meas']) list_all_stores = glob.glob(folder_store_list + "*.txt") list_all_stores = [x for x in list_all_stores if "MediaStormStores" in x] list_all_stores = sorted(list_all_stores, key=lambda x: x.split("MediaStormStores")[1][:8]) list_all_stores = [ x for x in list_all_stores if x.split("MediaStormStores")[1][:8] <= str(high_date + datetime.timedelta(days=2)).replace("-", "") ] list_all_stores.reverse() for file in list_all_stores: df = pd.read_table( file, dtype=str, sep="|", usecols=['location_id', 'latitude_meas', 'longitude_meas']) df = df[['location_id', 'latitude_meas', 'longitude_meas']] df['latitude_meas'] = df['latitude_meas'].astype(float) df['longitude_meas'] = df['longitude_meas'].astype(float) df = df[~df['location_id'].isin(['145', '6990'])] df = df[~df['location_id'].isin(df_store_all['location_id'].tolist())] df_store_all = df_store_all.append(df) df_store_all['store_coor'] = df_store_all[[ 'latitude_meas', 'longitude_meas' ]].values.tolist() dict_store_all = df_store_all.set_index( "location_id").to_dict()['store_coor'] df_copy_sign_up['distc_to_sign_up'] = np.nan for i, row in df_copy_sign_up.iterrows(): try: store_coor = dict_store_all[row['sign_up_location']] zip_center = zip_centers[row['customer_zip_code']] dist = haversine(store_coor, zip_center, unit="mi") df_copy_sign_up.loc[i, "distc_to_sign_up"] = dist except: continue df_1 = pd.merge(df_1, df_copy_sign_up, on=['sign_up_location', 'customer_zip_code'], how="left") print("check point 5") # list_unsub = glob.glob(folder_email_unsub + "*.csv") df_unsub_files = pd.DataFrame({"file_path": list_unsub}) df_unsub_files['date'] = df_unsub_files['file_path'].apply( lambda x: x.split("ile_Refresh__")[1][:8]) df_unsub_files['date'] = pd.to_datetime(df_unsub_files['date']).dt.date df_unsub_files['day_diff'] = abs(df_unsub_files['date'] - high_date) path_unsub = df_unsub_files[ df_unsub_files['day_diff'] == df_unsub_files['day_diff'].min()]['file_path'].values.tolist()[0] ###### list_unsunsribe_ids = pd.read_csv( path_unsub, dtype=str, usecols=['customersummary_c_primaryscnhash' ])['customersummary_c_primaryscnhash'].unique().tolist() print(len(list_unsunsribe_ids)) df_1['email_unsub_label'] = np.where( df_1['customer_id_hashed'].isin(list_unsunsribe_ids), 1, 0) del list_unsunsribe_ids df_zips_with_BL_store = df_zips_with_BL_store.rename( columns={"zip_cd": "customer_zip_code"}) df_1 = pd.merge(df_1, df_zips_with_BL_store, on="customer_zip_code", how="left") df_1 = df_1.reset_index() del df_1['index'] df_1 = df_1.reset_index() del df_1['index'] df_1 = df_1.reset_index() # Changed to 3 weeks dv_start_date = high_date + datetime.timedelta(days=1) dv_end_date = high_date + datetime.timedelta(days=21) str_sql_dv_start_date = "'" + str(dv_start_date) + "'" str_sql_dv_end_date = "'" + str(dv_end_date) + "'" print(str_sql_dv_start_date, str_sql_dv_end_date) print(datetime.datetime.now()) df_dvs = pd.read_sql( "select customer_id_hashed, transaction_dt from Pred_POS_Department where transaction_dt between %s and %s and sales >0" % (str_sql_dv_start_date, str_sql_dv_end_date), con=BL_engine).drop_duplicates() print(datetime.datetime.now()) print("check point 6") # In[36]: df_dvs['week_end_dt'] = df_dvs['transaction_dt'].apply(week_end_dt) df_dvs = df_dvs[['customer_id_hashed', 'week_end_dt']].drop_duplicates() list_unique_weeks = df_dvs['week_end_dt'].unique().tolist() list_unique_weeks.sort() df_dv_binary = df_dvs[df_dvs['week_end_dt'] == list_unique_weeks[0]][[ 'customer_id_hashed' ]] df_dv_binary['DV_cumulative_week_updated_1'] = 1 for i in range(1, 3): w = list_unique_weeks[i] df = df_dvs[df_dvs['week_end_dt'] <= w][['customer_id_hashed' ]].drop_duplicates() df['DV_cumulative_week_updated_%d' % (i + 1)] = 1 df_dv_binary = pd.merge(df_dv_binary, df, on="customer_id_hashed", how="outer") print(w, datetime.datetime.now()) df_dv_binary = df_dv_binary.fillna(0) df_1 = pd.merge(df_dv_binary, df_1, on="customer_id_hashed", how="right") for i in range(3): df_1['DV_cumulative_week_updated_%d' % (i + 1)] = df_1['DV_cumulative_week_updated_%d' % (i + 1)].fillna(0) print(df_1.shape, df_1['customer_id_hashed'].nunique()) if "index" in df_1.columns.tolist(): del df_1['index'] print("check point 7") # self table_crm_id_list_train = "crm_table_id_list_train_%s" % str_week_end_d table_crm_id_list_test = "crm_table_id_list_test_%s" % str_week_end_d table_df_1 = "table_pred_1_crm_up_to_%s" % str_week_end_d dict_table_names.update( {"table_crm_id_list_train": table_crm_id_list_train}) dict_table_names.update({"table_crm_id_list_test": table_crm_id_list_test}) dict_table_names.update({"table_df_1": table_df_1}) # split len_df_1 = len(df_1) train_sample_size = 10**6 test_ratio = 0.25 if len_df_1 > train_sample_size / (1 - test_ratio): list_ind_train = random.sample(range(len_df_1), train_sample_size) else: list_ind_train = random.sample(range(len_df_1), int(len_df_1 * (1 - test_ratio))) df_1 = df_1.reset_index() df_1_train = df_1[['customer_id_hashed' ]][df_1['index'].isin(list_ind_train)] df_1_test = df_1[['customer_id_hashed' ]][~df_1['index'].isin(list_ind_train)] del df_1['index'] print("df_1_train.shape", df_1_train.shape) print("df_1_test.shape", df_1_test.shape) chunksize = 10**6 dtype_id = {"customer_id_hashed": sql.types.VARCHAR(length=64)} df_1_train.to_sql(name=table_crm_id_list_train, chunksize=chunksize, con=BL_engine, index=False, if_exists="replace", dtype=dtype_id) df_1_test.to_sql(name=table_crm_id_list_test, chunksize=chunksize, con=BL_engine, index=False, if_exists="replace", dtype=dtype_id) dtype_df_1 = { 'customer_id_hashed': sql.types.VARCHAR(length=64), 'DV_cumulative_week_updated_1': sql.types.Integer, 'DV_cumulative_week_updated_2': sql.types.Integer, 'DV_cumulative_week_updated_3': sql.types.Integer, # 'DV_cumulative_week_updated_4':sql.types.Integer, 'sign_up_location': sql.types.VARCHAR(length=5), 'customer_zip_code': sql.types.VARCHAR(length=5), 'P_zip': sql.types.Integer, 'S_zip': sql.types.Integer, 'else_10_zip': sql.types.Integer, 'signed_online': sql.types.Integer, 'distc_to_sign_up': sql.types.Float, 'email_unsub_label': sql.types.Integer, 'nearest_BL_store': sql.types.VARCHAR(length=4), 'nearest_BL_dist': sql.types.Float } df_1.to_sql(name=table_df_1, con=BL_engine, index=False, if_exists="replace", dtype=dtype_df_1, chunksize=chunksize) print("check point 8") create_index(table_name=table_crm_id_list_train, list_of_columns=["customer_id_hashed"]) create_index(table_name=table_crm_id_list_test, list_of_columns=["customer_id_hashed"]) create_index(table_name=table_df_1, list_of_columns=["customer_id_hashed"]) # In[38]: path_json_table_names = "./table_names_%s.json" % str(high_date).replace( "-", "") with open(path_json_table_names, "w") as json_file: json.dump(dict_table_names, json_file) print("Done_of_part_2: %s" % str(datetime.datetime.now()))
import networkx as nx from haversine import haversine import sys graph = ns.read_shp( './shape_files/tl_2013_48_prisecroads/tl_2013_48_prisecroads.shp') edges = graph.edges() nodes = graph.nodes() d1 = sys.maxint p1 = None d2 = sys.maxint p2 = None for n in nodes: d = haversine(n, (-101.897681, 32.08691)) if d < d1: d1 = d p1 = n d = haversine(n, (-97.032193, 32.759417)) if d < d2: d2 = d p2 = n def dist(a, b): return haversine(a, b) print(nx.astar_path(graph, p1, p2, dist))
def get_graph_temp(self): from haversine import haversine from collections import defaultdict g = nx.Graph() nodes = set(self.df_train.index.tolist() + self.df_dev.index.tolist() + self.df_test.index.tolist()) assert len(nodes) == len(self.df_train) + len(self.df_dev) + len( self.df_test), 'duplicate target node' nodes_list = self.df_train.index.tolist() + self.df_dev.index.tolist( ) + self.df_test.index.tolist() node_id = {node: id for id, node in enumerate(nodes_list)} g.add_nodes_from(node_id.values()) train_locs = self.df_train[['lat', 'lon']].values for node in nodes: g.add_edge(node_id[node], node_id[node]) pattern = '(?<=^|(?<=[^a-zA-Z0-9-_\\.]))@([A-Za-z]+[A-Za-z0-9_]+)' pattern = re.compile(pattern) logging.info('adding the train graph') for i in range(len(self.df_train)): user = self.df_train.index[i] user_id = node_id[user] mentions = [m for m in pattern.findall(self.df_train.text[i])] idmentions = set() for m in mentions: if m in node_id: idmentions.add(node_id[m]) else: id = len(node_id) node_id[m] = id idmentions.add(id) if len(idmentions) > 0: g.add_nodes_from(idmentions) for id in idmentions: g.add_edge(id, user_id) celebrities = [] for i in range(len(nodes_list), len(node_id)): deg = len(g[i]) if deg > self.celebrity_threshold: celebrities.append(i) #get neighbours of celebrities id_node = {v: k for k, v in node_id.iteritems()} degree_distmean = defaultdict(list) degree_distance = defaultdict(list) c_distmean = {} for c in celebrities: c_name = id_node[c] c_nbrs = g[c].keys() c_degree = len(c_nbrs) c_locs = train_locs[c_nbrs, :] c_lats = c_locs[:, 0] c_lons = c_locs[:, 1] c_median_lat = np.median(c_lats) c_median_lon = np.median(c_lons) distances = [ haversine((c_median_lat, c_median_lon), tuple(c_locs[i].tolist())) for i in range(c_locs.shape[0]) ] degree_distance[c_degree].extend(distances) c_meandist = np.mean(distances) degree_distmean[c_degree].append(c_meandist) c_distmean[c_name] = [c_degree, c_meandist] with open('celebrity.pkl', 'wb') as fin: pickle.dump((c_distmean, degree_distmean, degree_distance), fin) logging.info('removing %d celebrity nodes with degree higher than %d' % (len(celebrities), self.celebrity_threshold)) self.biggraph = g
def parseResponse(gpsLine): gpsChars = ''.join(chr(c) for c in gpsLine) local_pending_redraw = False if "$GNGGA" in gpsChars: if ",1," not in gpsChars: print("Looking for fix... (GGA)") add_to_image.rectangle(status_icon_zone, fill="black", outline = "black") add_to_image.rectangle(status_zone, fill="black", outline = "black") add_to_image.text(status_icon_start, "\uf252", font=FA_solid, fill="white") add_to_image.text(status_start, "GPS...", fill="white") return False try: nmea = pynmea2.parse(gpsChars, check=True) print('%.6f'%(nmea.latitude), ",",'%.6f'%(nmea.longitude), ", sats:", nmea.num_sats, ", alt:", nmea.altitude) # GGA ## update altitude add_to_image.text(alti_icon_start, "\uf077", font=FA_solid, fill="white") add_to_image.rectangle(alti_zone, fill="black", outline = "black") add_to_image.text(alti_start, str('%.0f'%(nmea.altitude)), font=text_medium, fill="white") ## fix found, show nb satelites add_to_image.rectangle(status_icon_zone, fill="black", outline = "black") add_to_image.rectangle(status_zone, fill="black", outline = "black") text_sats = "Sats.:" + nmea.num_sats add_to_image.text(status_start, text_sats, fill="white") ## update total distance global reading_nr global total_km global prev_lat global prev_long dist = 0 if reading_nr != 1: dist = haversine(((float(prev_lat)), (float(prev_long))), ((float(nmea.latitude)), (float(nmea.longitude)))) total_km = total_km+dist print("Total KM:", total_km) add_to_image.text(dist_icon_start, "\uf1b9", font=FA_solid, fill="white") add_to_image.rectangle(dist_zone, fill="black", outline = "black") add_to_image.text(dist_start, "%0.1f" % total_km, font=text_medium, fill="white") prev_lat = nmea.latitude prev_long = nmea.longitude reading_nr +=1 ## log every 10th GPS coordinate in CSV file if reading_nr % 10 == 0: filename = 'data/gps/gps_' + datetime.datetime.now().strftime("%Y%m%d") + '.csv' with open(filename, 'a', newline='') as csvfile: gps_writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) gps_writer.writerow([nmea.timestamp, nmea.latitude, nmea.longitude, nmea.altitude]) local_pending_redraw = True except Exception as e: print("NMEA parse error (GGA)") print(e) pass if "$GNRMC" in gpsChars: if ",A," not in gpsChars: # 1 for GGA, A for RMC print("Looking for fix... (RMC)") return False try: nmea = pynmea2.parse(gpsChars, check=True) print("Speed: ", nmea.spd_over_grnd) # RMC ## update speed add_to_image.rectangle(speed_zone, fill="black", outline = "black") add_to_image.text(speed_start, str('%.0f'%(nmea.spd_over_grnd)), font=text_largest, fill="white") local_pending_redraw = True except Exception as e: print("NMEA parse error (RMC)") print(e) pass if local_pending_redraw == True: global pending_redraw pending_redraw = True
def dist(a, b): return haversine(a, b)