async def getToilet(request): pool = request.app["pool"] data = await request.post() try: maxSize = int(data["mx"]) w = float(data["w"]) j = float(data["j"]) except: data = request.query maxSize = int(data["mx"]) w = float(data["w"]) j = float(data["j"]) geo = geohash.encode(w, j) neib = getN(geo) neib.append(geo) sql = "" for item in neib: sql += f"or geo like '{item[:6]}%' " sql = sql[3:] try: async with pool.acquire() as conn: values = await conn.fetch("select * from geo where " + sql) to = [[x["tid"], x["w"], x["j"]] for x in values] to.sort( key=lambda x: haversine(float(j), float(w), float(x[1]), float(x[2])) ) res = {"mx": min(maxSize, len(to)), "ans": to[:maxSize]} except asyncpg.exceptions.UniqueViolationError: return web.Response(text="Already exist") return web.json_response(data=res)
def height(self): left = self.mpoly.envelope.tuple[0][0][0] bottom = self.mpoly.envelope.tuple[0][0][1] right = self.mpoly.envelope.tuple[0][1][0] top = self.mpoly.envelope.tuple[0][2][1] return haversine((left+right)/2, top, (left+right)/2, bottom)
def get_retailer_closest_wholesalers(retailer, wholesalers): # Shamelessly pulled from the GeoRasterViewer code because it works really well. # Convert the wholesalers into arrays of lats/lons. wholesale_lats = wholesalers.Latitude.values wholesale_lons = wholesalers.Longitude.values wholesale_names = wholesalers.Wholesale_Name.values wholesale_ids = wholesalers.Location_ID.values # Calculate the distances and prune out the farther ones and isolate variables. distances = np.ndarray.flatten( haversine(retailer[0], retailer[1], wholesale_lats, wholesale_lons)) closest_index = np.argmin(distances) closest_distance = distances[closest_index] # Consolidate into a dictionary and return closest = { "Wholesaler": [wholesale_lats[closest_index], wholesale_lons[closest_index]], "Wholesaler_Name": wholesale_names[closest_index], "Wholesaler_ID": wholesale_ids[closest_index], "Distance": closest_distance } return closest
def width(self): left = self.mpoly.envelope.tuple[0][0][0] bottom = self.mpoly.envelope.tuple[0][0][1] right = self.mpoly.envelope.tuple[0][1][0] top = self.mpoly.envelope.tuple[0][2][1] return haversine(left, (top + bottom)/2, right, (top + bottom)/2)
def home(request): try: location = request.GET.get('location') except: location = None if location: lat, lon = get_location(location) a =get_restaurants(lat,lon) custom = [] count = 0 for restaurant in a['restaurants']: restaurant['restaurant']['review'] = get_reviews_details(restaurant['restaurant']['url'].split("?")[0]) rlat = float(restaurant['restaurant']['location']['latitude']) rlong = float(restaurant['restaurant']['location']['longitude']) restaurant['restaurant']['distance'] = str(haversine(lon, lat, rlong, rlat))+' km' restaurants = json_normalize( a['restaurants']) # print restaurants return HttpResponse(restaurants.to_html()) # print restaurants # return render_to_response("result.html",{'a':custom},context_instance=RequestContext(request)) else: return render_to_response("home.html",{},context_instance=RequestContext(request))
def process_item(self, item, spider): col_loc = get_mongodb('geo', 'Locality', profile='mongodb-general') # get country country_code = item['country_code'] if country_code not in QyerCityProcPipeline.country_map: col_country = get_mongodb('geo', 'Country', profile='mongodb-general') country = col_country.find_one({'code': country_code}) assert country != None QyerCityProcPipeline.country_map[country_code] = country else: country = QyerCityProcPipeline.country_map[country_code] city_id = item['city_id'] city = col_loc.find_one({'source.qyer.id': city_id}) if not city: city = col_loc.find_one({'alias': item['zh_name'].lower(), 'location': { '$near': {'type': 'Point', 'coordinates': [item['lng'], item['lat']]}}, 'country._id': country['_id']}) if city: dist = utils.haversine(city['location']['coordinates'][0], city['location']['coordinates'][1], item['lng'], item['lat']) if dist > 100: city = {} if not city: city = {} city['enName'] = item['en_name'] zh_name = item['zh_name'] short_name = utils.get_short_loc(zh_name) city['zhName'] = short_name alias1 = city['alias'] if 'alias' in city and city['alias'] else [] alias2 = item['alias'] if 'alias' in item and item['alias'] else [] alias1.extend(alias2) alias1.append(short_name) city['alias'] = list(set(filter(lambda val: val, [tmp.lower().strip() for tmp in alias1]))) source = city['source'] if 'source' in city else {} source['qyer'] = {'id': item['city_id'], 'url': item['url']} city['source'] = source city['country'] = {'id': country['_id'], '_id': country['_id']} for k in ('enName', 'zhName'): if k in country: city['country'][k] = country[k] city['level'] = 2 city['desc'] = item['desc'] city['imageList'] = item['imageList'] city['images'] = [] city['location'] = {'type': 'Point', 'coordinates': [item['lng'], item['lat']]} city['abroad'] = country_code != 'CN' city['isHot'] = item['is_hot'] > 0 col_loc.save(city) return item
def search_proximity(cls, lat=50.848, lon=4.351, radius=8): """List stations within given radius from a location. Args: lat: latitude of the center of search, in decimal degrees lon: longitude of the center of search, in decimal degrees radius: maximum distance from center, in kilometers Default values are the approximate center and radius of Brussels. Returns: Dataframe of matching stations, listing sensor types, locations and distances in kilometers from the search center, indexed by station ID The search is based on the station list retrieved as part of the metadata. The irceline.be API offers an alternative way to get an (unordered) list of stations near a location: `https://geo.irceline.be/sos/api/v1/stations? near={{"center":{{"type":"Point","coordinates":[{lon}, {lat}]}},"radius":{radius}}}` """ near_stations = cls.stations.copy() near_stations["distance"] = (near_stations.apply( lambda x: haversine(lon, lat, x["lon"], x["lat"]), axis=1)) near_stations = near_stations[near_stations["distance"] <= radius] near_stations.sort_values("distance", inplace=True) return near_stations
def distance(): # in meters lon1 = float(request.args.get('lon1')) lat1 = float(request.args.get('lat1')) lon2 = float(request.args.get('lon2')) lat2 = float(request.args.get('lat2')) d = round(utils.haversine(lon1, lat1, lon2, lat2), 2) return json.dumps(d)
def parse_geocode(self, response): item = response.meta['item'] lang = response.meta['lang'] try: data = json.loads(response.body) if data['status'] == 'OVER_QUERY_LIMIT': return Request(url=response.url, callback=self.parse_geocode, meta={'item': item, 'lang': lang}, headers={'Accept-Language': response.request.headers['Accept-Language'][0]}, dont_filter=True) elif data['status'] == 'ZERO_RESULTS': return elif data['status'] != 'OK': self.log('ERROR GEOCODING. STATUS=%s, URL=%s' % (data['status'], response.url)) return city_result = None location = None for result in data['results']: # 必须和原来的经纬度比较接近,才能采信 geometry = result['geometry'] lat = geometry['location']['lat'] lng = geometry['location']['lng'] dist = utils.haversine(lng, lat, item['lng'], item['lat']) if dist > 100: continue else: city_result = result location = [lng, lat] break if city_result: # 查找第一个types包含political的项目 address_components = filter(lambda val: 'political' in val['types'], city_result['address_components']) data = address_components[0] short_name = data['short_name'] long_name = data['long_name'] s = set(item['alias']) s.add(short_name.lower()) s.add(long_name.lower()) k = 'zh_name' if lang == 'zh' else 'en_name' s.add(item[k].lower()) item[k] = long_name item['alias'] = list(s) if location: item['lng'] = location[0] item['lat'] = location[1] except (KeyError, IndexError): self.log('ERROR GEOCODEING: %s' % response.url, log.WARNING) if lang == 'zh': return Request(url='http://maps.googleapis.com/maps/api/geocode/json?address=%s,%s&sensor=false' % ( item['en_name'], item['en_country']), callback=self.parse_geocode, meta={'item': item, 'lang': 'en'}, headers={'Accept-Language': 'en-US'}, dont_filter=True) else: return item
def way(self, w): # default function called on edge # way starts and ends at one point if w.nodes[0].ref == w.nodes[-1].ref: return for i in range(len(w.nodes) - 1): start = GraphNode(w.nodes[i].ref, w.nodes[i].location) end = GraphNode(w.nodes[i + 1].ref, w.nodes[i + 1].location) distance = haversine(start, end) edge = GraphEdge(start.node_id, end.node_id, distance, 4) self.neo_handler.add_edge(edge)
def get_bounds(self, indices): tolerance = .0001 # about 11 meters. Need this, otherwise we lose some records xmin, ymin, xmax, ymax = self.gdf.iloc[indices].total_bounds xmin = xmin - tolerance ymin = ymin - tolerance xmax = xmax + tolerance ymax = ymax + tolerance partition_size = len( indices ) # we want to keep regions small enough so cross-product matching will not be too slow partition_span = utils.haversine(xmin, ymin, xmax, ymax) return xmin, ymin, xmax, ymax, partition_size, partition_span
def maps_directions(start_lat, start_lon, end_lat, end_lon, gmaps): # directions API can take many types of input, including strings start_string = "{},{}".format(start_lat, start_lon) end_string = "{},{}".format(end_lat, end_lon) result = gmaps.directions(origin=start_string, destination=end_string) # we only care about finding the travelled length in the JSON output, which we convert from meters to km, * 0.001 manhattan_distance = float( result[0]['legs'][0]['distance']['value']) * 0.001 euclidean_distance = haversine(float(start_lat), float(start_lon), float(end_lat), float(end_lon)) ratio = float(manhattan_distance / euclidean_distance) return euclidean_distance, manhattan_distance, ratio
def get_data(data, lon1, lat1,iata1,departure_date): iata2 = data.iata if iata1 != iata2: lat2 = data.lat lon2 = data.lon iata = data.iata distance = haversine(lon1,lat1,lon2,lat2) url_mockup = 'http://stub.2xt.com.br/air/search/qhjvlDvYOwbbu9yq9Dq9DpfCqEbqWfvO/'+iata1+'/'+iata2+'/'+departure_date value = requests.get(url_mockup, auth=HTTPBasicAuth('leandroalmeida','tefvlD')) options = pd.DataFrame(value.json()['options']) if not options.empty: x = options.apply(lambda column: aircraft_values(column,distance), axis=1) save(x,url_mockup,iata1,iata2)
def find_closest_store(self, address, units='mi'): store = utils.convert_to_1D({'address': address}) (lo, hi) = self.t.closest(store['id']) if lo: dist1 = utils.haversine(store['lat'], store['lon'], lo['lat'], lo['lon'], units) else: dist1 = float('inf') if hi: dist2 = utils.haversine(store['lat'], store['lon'], hi['lat'], hi['lon'], units) else: dist2 = float('inf') if dist1 < dist2: lo['distance'] = "%0.3f %s" % (dist1, units) return lo else: hi['distance'] = "%0.3f %s" % (dist2, units) return hi
def search_proximity(lat=50.848, lon=4.351, radius=8): """Find sensors within given radius from a location. Args: lat: latitude of the center of search, in decimal degrees lon: longitude of the center of search, in decimal degrees radius: maximum distance from center, in kilometers Default values are the approximate center and radius of Brussels. Returns: Dataframe of matching sensors, listing sensor types, locations and distances in kilometers from the search center, indexed by sensor ID """ url = (API_ENDPOINTS["proximity search pattern"].format(lat=lat, lon=lon, radius=radius)) _json = requests.get(url).json() sensors = json_normalize(_json) if len(sensors) == 0: sensors = pd.DataFrame( columns=["sensor_type", "latitude", "longitude", "distance"]) sensors.index.name = "sensor_id" return sensors sensors = (sensors[[ "sensor.id", "sensor.sensor_type.name", "location.latitude", "location.longitude" ]].rename( columns={ "sensor.id": "sensor_id", "sensor.sensor_type.name": "sensor_type", "location.latitude": "latitude", "location.longitude": "longitude" })) for col in "latitude", "longitude": sensors[col] = pd.to_numeric(sensors[col], downcast="float") sensors.set_index("sensor_id", inplace=True) # Drop duplicates - sensors appear once for each measurement in past 5 mins sensors = sensors[~sensors.index.duplicated()] # Calculate distances from search center and sort by those distances sensors["distance"] = sensors.apply(lambda x: haversine( lat, lon, float(x["latitude"]), float(x["longitude"])), axis=1) sensors.sort_values("distance", inplace=True) return sensors
def __get_centroid_medoid_node_region(region): lat_tot = lon_tot = 0 for v in region: lat_tot += region[v][2]['lat'] lon_tot += region[v][2]['lon'] lat_centroid = lat_tot / len(region.keys()) lon_centroid = lon_tot / len(region.keys()) min_distance = sys.maxint medoid = None for v in region: dist = haversine(region[v][2]['lat'], region[v][2]['lon'], lat_centroid, lon_centroid) if dist < min_distance: min_distance = dist medoid = v return (lat_centroid, lon_centroid), medoid
def reverse(db, lon, lat): min_distance = 50000000 # bigger than Earth's circumference match = None h = geohash(lon, lat) arg = find_index(h, db.numbers_geohash_index, db.numbers['geohash']) k = 100 # k-nearest lo = max(0, arg - k // 2) hi = min(db.numbers_geohash_index.size, arg + k // 2) for hyp in db.numbers_geohash_index[lo:hi]: hlon, hlat = reverse_geohash(db.numbers[hyp]['geohash']) # Calculate haversine distance to get the real orthonormic distance d = haversine(hlon, hlat, lon, lat) if d < min_distance: min_distance = d match = hyp return Result.from_plate(db, match, 0, distance=min_distance)
def dianping_match(self, entry): """ 进行match操作 """ city_info = self.city_map[entry['locality']['_id']] city_id = city_info['city_id'] shop_name = entry['zhName'] context = {'city_info': city_info, 'shop_name': shop_name} url = 'http://www.dianping.com/search/keyword/%d/0_%s' % (city_id, shop_name) search_response = self.request.get(url, user_data={ 'ProxyMiddleware': { 'validator': self.default_validator } }) if search_response.status_code == 404: return shop_list = list(self.parse_search_list(search_response, context)) if not shop_list: return self.store_shops(shop_list) the_shop = shop_list[0] # 检查经纬度是否一致 try: coords1 = entry['location']['coordinates'] coords2 = [the_shop['lng'], the_shop['lat']] except KeyError: return from utils import haversine # 最多允许1km的误差 max_distance = 1 try: if haversine(coords1[0], coords1[1], coords2[0], coords2[1]) < max_distance: self.bind_shop_id(entry, the_shop['shop_id']) except TypeError: self.log('Unable to locate shop: %d' % the_shop['shop_id'], logging.WARN)
async def get_data_test(data1, data2, departure_date, session, df): iata1 = df[data1].iata iata2 = df[data2].iata if iata1 != iata2: lat1 = df[data1].lat lat2 = df[data2].lat lon1 = df[data1].lon lon2 = df[data2].lon distance = haversine(lon1, lat1, lon2, lat2) url_mockup = 'http://stub.2xt.com.br/air/search/qhjvlDvYOwbbu9yq9Dq9DpfCqEbqWfvO/' + iata1 + '/' + iata2 + '/' + departure_date async with session.get(url_mockup, auth=aiohttp.BasicAuth('leandroalmeida', 'tefvlD')) as response: value = await response.json() options = pd.DataFrame(value['options']) if not options.empty: return opapply(options, distance, url_mockup, iata1, iata2)
def villages_in_range(villages, origin): near_lat = np.array([origin['lat']]) near_lon = np.array([origin['lon']]) far_lats = villages.Latitude.values far_lons = villages.Longitude.values distance = np.ndarray.flatten(haversine(near_lat, near_lon, far_lats, far_lons)) villages["From_Center"] = distance within_range = villages.loc[villages["From_Center"] <= origin['radius']] within_range.to_csv("villages_within_{}.csv".format(origin['radius']), index=False) # Now we print out some quick data for the user to feed into georasterviewer, with a little extra buffer space print("\nGeoraster Values:\n===================\nMin Lat: {}\nMax Lat: {}\nMin Lon: {}\nMax Lon: {}".format( within_range['Latitude'].min() + 0.15, within_range['Latitude'].max() + 0.15, within_range['Longitude'].min() + 0.15, within_range['Longitude'].max() + 0.15, ))
def get_neighbor(self, lat, lng): ''' rtype: tuple(error, json(result)) Find the nearest station by calculate distance from lat, lng ''' self.check_full() dist_dict = {i:utils.haversine(self.crawl_data[i]['latlng'] ,float(lat), float(lng)) \ for i in self.crawl_data if self.crawl_data[i]['num_ubike']} sort_dict = OrderedDict(sorted(dist_dict.items(), key=lambda t: t[1])) near_station = [] for idx in sort_dict: print self.crawl_data[idx]['sna'] for idx in sort_dict: Order = OrderedDict( (('station', self.crawl_data[idx]['sna']), ('num_ubike', self.crawl_data[idx]['num_ubike']))) near_station.append(Order) if len(near_station) == 2: return 0, near_station
def main(api_key): # Set up gmaps object with API key gmaps = googlemaps.Client(key=api_key) ## Get global variables start_string = 'The Anchor, 34 Park St, Southwark, London SE1 9EF' end_string = 'Southwark Brewing Company' n_places = 2 radius_factor = 1 keywords = ['bar','beer'] mode="bicycling" ## Get location info on start and end points start = get_lat_lng(gmaps,start_string) end = get_lat_lng(gmaps,end_string) print start_string , start print end_string , end ##Find centre and radius c_cen = get_centre_lat_long(start,end) print "centre coord", c_cen # You may prefer to use the text_search API, instead. radius = haversine(c_cen['lat'],c_cen['lng'],start['lat'],start['lng'])*1000 print "radius metres", radius ##Get nearby places nearby = gmaps.places_radar( (c_cen['lat'], c_cen['lng']), radius=radius*radius_factor, keyword=keywords ) ###get info lists place_ids = map(lambda x: x['place_id'],nearby['results']) place_info = map(lambda x: gmaps.place(x),place_ids) place_names = map(lambda x: x['result']['formatted_address'],place_info)
def localization_error(camera_measured, camera_true, alt, subsampling=100): """ Compute localization errors for a given camera wrt a given true camera. Args: camera_measured: measured camera camera_true: true camera, the error is computed with respect to that one alt: altitude (above the sea level) at which the localization error is computed subsampling (default 100): number of localization error samples Returns: list of localization errors. """ out = [] c = camera_true.instrument.n_pix / 2 # middle of the row for r in xrange(0, np.round(camera_true.lig_f).astype(int), subsampling): lon1, lat1 = camera_measured.locdir(r, c, alt) lon2, lat2 = camera_true.locdir(r, c, alt) radius = ps.PhysicalConstants.earth_radius + alt out.append(utils.haversine(radius, lon1, lat1, lon2, lat2)) return out
def query_time_series(cls, phenomenon, lat_nearest=None, lon_nearest=None): """Convenience method to filter time series for those that measure a given phenomenon, and sort by distance to a point if given. Args: phenomenon: character sequence or regular expression to filter phenomena by; operates on the "phenomenon" column of the time_series dataframe lat_nearest: latitude of the reference point lon_nearest: longitude of the reference point Returns: Subset of time_series property. If lat_nearest and lon_nearest are given, the result has an additional column indicating distance in km from that point, and is sorted by that distance. Raises: ValueError if only one of lat_nearest, lon_nearest is given """ if bool(lat_nearest is None) != bool(lon_nearest is None): raise ValueError("Provide both or none of lat_nearest, " "lon_nearest") phenomena_lower = cls.time_series["phenomenon"].str.lower() matches = phenomena_lower.str.contains(phenomenon.lower()) results = cls.time_series[matches].copy() if lat_nearest is None: return results if len(results) == 0: results["distance"] = None return results results["distance"] = results.apply(lambda row: haversine(lat_nearest, lon_nearest, row["station_lat"], row["station_lon"]), axis=1) results = results.sort_values("distance") return results
def locate_point(point, nodes): """ Find the best corresponding node to the point in the given list. Parameters: point Point to evaluate. nodes List of nodes. Returns: The best node in the list or None if not considered in a node. """ best_dist = 10000 best_node = None for node in nodes: dist = haversine(point, node.position) # Take the closest node, make sure the point is in the city circle if dist < best_dist and dist < node.radius: best_dist = dist best_node = node return best_node
def trackSummary(self): """ Parse only the key data needed for the model in GPXViewer. Details will only be read on demand. This function populates the summary dictionary. """ # populate the summary dict w/ the filename self.summary["file"] = self._source # date as ISO string # with some sources the date is only in the metadata and not each track point # self.summary["date"] = self.allPoints[0].find('gpx:time', self.namespaces).text metaData = self._root.findall('gpx:metadata', self.namespaces) for md in metaData: time = md.findall('gpx:time', self.namespaces) self.summary["date"] = time[0].text # duration t0 = dateutil.parser.parse(self.allPoints[0].find('gpx:time', self.namespaces).text) t1 = dateutil.parser.parse(self.allPoints[-1].find('gpx:time', self.namespaces).text) self.summary["duration"] = (t1-t0).total_seconds() # distance lat = [] lon = [] for point in self.allPoints: lat.append(float(point.attrib['lat'])) lon.append(float(point.attrib['lon'])) _dist = sum([haversine(y0, x0, y1, x1) for x0, x1, y0, y1 in zip( lat[:-1], lat[1:], lon[:-1], lon[1:])]) self.summary["distance"] = _dist # and average speed in km/h self.summary["speed"] = 3.6 * self.summary["distance"]/self.summary["duration"]
def calc_length(self): return utils.haversine(self.n_station.lng, self.n_station.lat, self.s_station.lng, self.s_station.lat)
def RestaurantsListView(request, username): user = User.objects.get(username=username) user_restaurants_ids = [r.id for r in user.restaurants_following.all()] restaurants = Restaurant.objects.all().select_related('user', 'restaurant', 'food') restaurants_list = [] if request.GET.get('search', False): query_string = request.GET.get('search', False) restaurants = restaurants.filter(Q(name__icontains=query_string) | Q(description__icontains=query_string) | Q(location_name__icontains=query_string)) if request.GET.get('following', False): restaurants = restaurants.filter(restaurants_following__in=[user]) if request.GET.get('friends_following', False): friends = [u['id'] for u in user.following.values('id')] restaurants = restaurants.filter(restaurants_following__in=friends) if request.GET.get('recommended', False): restaurants = restaurants.filter(is_recommended=True) if request.GET.get('me_like', False): resturant_ids = user.foods_liked.values('restaurant__id') seen = set() unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])] restaurants = restaurants.filter(id__in=unique_rids) if request.GET.get('friends_like', False): resturant_ids = user.following.values('foods_liked__restaurant__id') seen = set() unique_rids = [r['foods_liked__restaurant__id'] for r in resturant_ids if r['foods_liked__restaurant__id'] not in seen and not seen.add(r['foods_liked__restaurant__id'])] restaurants = restaurants.filter(id__in=unique_rids) if request.GET.get('me_review', False): resturant_ids = Review.objects.filter(user=user).values('restaurant__id') seen = set() unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])] restaurants = restaurants.filter(id__in=unique_rids) if request.GET.get('recommended_people_review', False): users = User.objects.filter(is_recommended=True) resturant_ids = Review.objects.filter(user__in=users).values('restaurant__id') seen = set() unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])] restaurants = restaurants.filter(id__in=unique_rids) if request.GET.get('friends_review', False): friends = user.following.all() resturant_ids = Review.objects.filter(user__in=friends).values('restaurant__id') seen = set() unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])] restaurants = restaurants.filter(id__in=unique_rids) if request.GET.get('amenity_ids', False): amenity_ids = request.GET.get('amenity_ids', False).split(',') restaurants = restaurants.filter(amenities__in=amenity_ids) if request.GET.get('dietary_ids', False): dietary_ids = request.GET.get('dietary_ids', False).split(',') food_ids = Food.objects.filter(dietary__in=dietary_ids) restaurants = restaurants.filter(food__in=food_ids) if request.GET.get('cuisine_ids', False): cuisine_ids = request.GET.get('cuisine_ids', False).split(',') food_ids = Food.objects.filter(cuisine__in=cuisine_ids) restaurants = restaurants.filter(food__in=food_ids) # filter by range if request.GET.get('price_max', False): price_max = request.GET.get('price_max', False) restaurants = restaurants.filter(price_high__lte=int(price_max)) if request.GET.get('price_min', False): price_min = request.GET.get('price_min', False) restaurants = restaurants.filter(price_low__gte=int(price_min)) distance_max = request.GET.get('distance_max', False) if distance_max: distance_max = float(distance_max) distance_min = request.GET.get('distance_min', False) if distance_min: distance_min = float(distance_min) #sorting by non-derived field sort = request.GET.get('sort', False) if sort == 'price': restaurants = restaurants.extra(select={'price_range': 'price_high + price_low'}).extra(order_by=['price_range']) # get distinct restaurants restaurants = unique(restaurants) for restaurant in restaurants: restaurant_obj = {} restaurant_obj['name'] = restaurant.name restaurant_obj['id'] = restaurant.id restaurant_obj['location_name'] = restaurant.location_name restaurant_obj['location'] = {'x':restaurant.location_x, 'y':restaurant.location_y} restaurant_obj['dist'] = haversine(float(user.location_x), float(user.location_y), float(restaurant.location_x), float(restaurant.location_y)) restaurant_obj['distance'] = '{0:0.2f}km'.format(restaurant_obj['dist']) # if a distance filter has been set, we only add qualifying restaurants if distance_max: if restaurant_obj['dist'] > distance_max: continue if distance_min: if restaurant_obj['dist'] < distance_min: continue restaurant_obj['photo'] = restaurant.photo restaurant_obj['price_low'] = '${0:0.0f}'.format(restaurant.price_low) restaurant_obj['price_high'] = '${0:0.0f}'.format(restaurant.price_high) # restaurant_obj['amenities'] = [{'id': res.id, 'image': res.image} for res in restaurant.amenities.all()] # get the people following this restaurant restaurant_obj['followed_by'] = [{'user_id':person.id, 'username': person.username, 'photo': person.photo} for person in User.objects.filter(restaurants_following__in=[restaurant])[:7]] restaurant_obj['following_count'] = User.objects.filter(restaurants_following__in=[restaurant]).count() restaurant_obj['is_following'] = (restaurant.id in user_restaurants_ids) restaurant_obj['is_recommended'] = restaurant.is_recommended # ratings reviews = Review.objects.filter(restaurant__in=[restaurant]) if reviews.count(): rating = 0 for review in reviews: rating = rating + review.rating rating = rating / reviews.count() else: rating = 0 restaurant_obj['rating'] = rating restaurant_obj['reviews_count'] = reviews.count() restaurants_list.append(restaurant_obj) # sorting by derived field if sort == 'followers': restaurants_list = sorted(restaurants_list, key=lambda x: x['following_count'], reverse=True) elif sort == 'location': restaurants_list = sorted(restaurants_list, key=lambda x: x['dist']) elif sort == 'ratings': restaurants_list = sorted(restaurants_list, key=lambda x: x['rating']) return HttpResponse(json.dumps(restaurants_list), content_type="application/json")
def FoodListView(request, username): food_list_serialized = [] user = User.objects.get(username=username) food_list = Food.objects.all().select_related('user', 'restaurant', 'food') if request.GET.get('search', False): query_string = request.GET.get('search', False) food_list = food_list.filter(Q(name__icontains=query_string) | Q(description__icontains=query_string)) if request.GET.get('liked', False): food_list = food_list.filter(foods_liked__in=[user]).order_by('-id') if request.GET.get('friends_like', False): friends = [u['id'] for u in user.following.values('id')] food_list = food_list.filter(foods_liked__in=friends).order_by('-id') if request.GET.get('recommended', False): restaurants = Restaurant.objects.filter(is_recommended=True) food_list = food_list.filter(restaurant__in=restaurants).order_by('-id') if request.GET.get('following', False): restaurants = Restaurant.objects.filter(restaurants_following__in=[user]) food_list = food_list.filter(restaurant__in=restaurants).order_by('-id') if request.GET.get('friends_following', False): friends = user.following.all() restaurants = Restaurant.objects.filter(restaurants_following__in=friends) food_list = food_list.filter(restaurant__in=restaurants).order_by('-id') if request.GET.get('disliked', False): food_list = food_list.filter(foods_disliked__in=[user]).order_by('-id') if request.GET.get('explore', False): food_list = food_list.exclude(foods_liked__in=[user]).exclude(foods_disliked__in=[user]).order_by('id') if request.GET.get('dietary_ids', False): dietary_ids = request.GET.get('dietary_ids', False).split(',') food_list = food_list.filter(dietary__in=dietary_ids) if request.GET.get('cuisine_ids', False): cuisine_ids = request.GET.get('cuisine_ids', False).split(',') food_list = food_list.filter(cuisine__in=cuisine_ids) # filter by range if request.GET.get('price_max', False): price_max = request.GET.get('price_max', False) food_list = food_list.filter(price__lte=int(price_max)) if request.GET.get('price_min', False): price_min = request.GET.get('price_min', False) food_list = food_list.filter(price__gte=int(price_min)) distance_max = request.GET.get('distance_max', False) if distance_max: distance_max = float(distance_max) distance_min = request.GET.get('distance_min', False) if distance_min: distance_min = float(distance_min) #sorting by non-derived field sort = request.GET.get('sort', False) if sort == 'price': food_list = food_list.order_by('price') food_list = unique(food_list) for food in food_list: food_obj = {} food_obj['id'] = food.id food_obj['name'] = food.name # food_obj['description'] = food.description food_obj['price'] = '${0:0.2f}'.format(food.price) food_obj['dist'] = haversine(float(user.location_x), float(user.location_y), float(food.restaurant.location_x), float(food.restaurant.location_y)) food_obj['distance'] = '{0:0.2f}km'.format(food_obj['dist']) # if a distance filter has been set, we only add qualifying restaurants if distance_max: if food_obj['dist'] > distance_max: continue if distance_min: if food_obj['dist'] < distance_min: continue food_obj['photo'] = food.photo food_obj['restaurant'] = food.restaurant.name food_obj['restaurant_id'] = food.restaurant.id food_obj['dietary_ids'] = [{'id':i.id, 'name':i.name} for i in food.dietary.all()] food_obj['cuisine_ids'] = [{'id':i.id, 'name':i.name} for i in food.cuisine.all()] food_obj['is_liked'] = food in user.foods_liked.all() food_obj['num_likes'] = User.objects.filter(foods_liked__in=[food]).count() food_list_serialized.append(food_obj) # sorting by derived field if sort == 'likes': food_list_serialized = sorted(food_list_serialized, key=lambda x: x['num_likes'], reverse=True) elif sort == 'location': food_list_serialized = sorted(food_list_serialized, key=lambda x: x['dist']) return HttpResponse(json.dumps(food_list_serialized), content_type="application/json")
def dist(self, other): """ Returns the distance between two nodes in kilometers. """ return haversine(self.position, other.position)
def compare(): """ 将a问与c问结果比较 :return: """ ll_data_2g = utils.gongcan_to_ll() train_data = utils.ll_to_grid(ll_data_2g) # print(train_data) # 删除原有的ID,不作为训练特征 for i in range(1, 8): train_data.drop(['RNCID_' + str(i)], axis=1, inplace=True) train_data.drop(['CellID_' + str(i)], axis=1, inplace=True) # 将空余的信号强度,用0补填补 train_data = train_data.fillna(0) # features和labels X_ = train_data.drop( ['MRTime', 'Longitude', 'Latitude', 'Num_connected', 'grid_num'], axis=1, inplace=False).as_matrix() y_ = train_data[['grid_num', 'Longitude', 'Latitude']].as_matrix() # 通过设置每一次的随机数种子,保证不同分类器每一次的数据集是一样的 random_states = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] start = datetime.datetime.now() errors_all = [] for i in range(10): # 切分训练集和验证集 X_train, X_test, y_train, y_test = train_test_split( X_, y_, test_size=0.2, random_state=random_states[i]) clf = RandomForestClassifier(max_depth=20, random_state=0) y_pred = clf.fit(np.delete(X_train, 0, axis=1), y_train[:, 0]).predict(np.delete(X_test, 0, axis=1)) ll_pred = [] for y in y_pred: X_box = int(y % X_box_num) y_box = int(y / X_box_num) + 1 if X_box == 0: X_box = X_box_num y_box -= 1 lon = lb_Longitude + per_lon * X_box - 0.5 * per_lon lat = lb_Latitude + per_lat * y_box - 0.5 * per_lat ll_pred.append([lon, lat]) ll_true = np.delete(y_test, 0, axis=1).tolist() errors = [] for (true, pred) in zip(ll_true, ll_pred): error = utils.haversine(true[0], true[1], pred[0], pred[1]) errors.append(error) errors.sort() errors_all.append(errors) print("RandomForest") print("Median error: {}".format( np.percentile(np.array(errors_all).mean(axis=0), 50))) print("Time: {}".format(datetime.datetime.now() - start)) print("****************************") # 获得 c 问结果 start = datetime.datetime.now() c_errors = main() print("Time: {}".format(datetime.datetime.now() - start)) plt.figure('Comparision 2G DATA') plt.xlabel('Comparision 2G DATA - CDF figure') plt.ylabel('Error(meters)') # 绘制 c 问的结果的总体CDF曲线 mean_errors = [] for i in range(len(c_errors)): errors = np.array(c_errors[i][1]) mean_error = errors.mean(axis=0) mean_errors.extend(mean_error) mean_errors.sort() plt.plot( [float(i) / float(len(mean_errors)) for i in range(len(mean_errors))], list(mean_errors), '--', linewidth=1, alpha=0.6, label="c-method median error(m): %.3f" % np.percentile(mean_errors, 50)) # 绘制 a 问的结果的总体CDF曲线 errors = np.array(errors_all) mean_errors = errors.mean(axis=0) # print(mean_errors) plt.plot( [float(i) / float(len(mean_errors)) for i in range(len(mean_errors))], list(mean_errors), '--', linewidth=1, alpha=0.6, label="a-method median error: %.3f" % np.percentile(mean_errors, 50)) plt.legend() plt.show()
def isRelevant(self,record): """ Checks whether record passes the specified filters """ if record["status"]["name"] == "Abandoned": if self.startTS is not None: #Probes abandoned before measurement period should be dropped if record["status"]["since"] < self.startTS: return False else: return False if record["status"]["name"] == "Never Connected": #Never connected probes should be dropped return False if record["status"]["name"] == "Noisy": # Probe constantly disconnecting return False if (record["status"]["name"] == "Connected") or (record["status"]["name"] == "Disconnected"): if self.endTS is not None: if "first_connected" not in record or record['first_connected'] is None: return False #Probes connected after end of measurement period should be dropped if record["first_connected"] > self.endTS: return False if (record["status"]["name"] == "Disconnected"): if self.startTS is not None: #Probes disconnected before start of measurement period should be dropped if record["status"]["since"] < self.startTS: return False else: return False probeASNv4 = record["asn_v4"] probeASNv6 = record["asn_v6"] asnCheckPassed = False if self.asnFilters == []: asnCheckPassed = True else: for asn in self.asnFilters: if (probeASNv4 == asn) or (probeASNv6 == asn): asnCheckPassed = True break probeCountry = record["country_code"] probeAdmin1 = record["admin1"] countryCheckPassed = False if self.countryFilters == []: countryCheckPassed = True else: for country in self.countryFilters: if probeCountry == country: countryCheckPassed = True break if not probeCountry: countryCheckPassed = True if probeAdmin1: if country in probeAdmin1: countryCheckPassed = True probeLocation = record["geometry"]["coordinates"] probeLat = probeLocation[0] probeLongt = probeLocation[1] proximityCheckPassed = False if self.proximityFilters == []: proximityCheckPassed = True elif (probeLat is None) or (probeLongt is None): #A lot of cases without coordinates. Currently discarding these probes proximityCheckPassed = False else: for coordinates in self.proximityFilters: lat = coordinates[0] longt = coordinates[1] distance = haversine(probeLat,probeLongt,lat,longt) if distance <= self.proximityThreshold: proximityCheckPassed = True break if asnCheckPassed and countryCheckPassed and proximityCheckPassed: return True else: return False
def trackDetails(self): """Read the track in the GPX file and populate the `track` dictionary. The time is read twice once local (as datetime so we can easily compute offsets) and once as an ISO 8601 string (as it is written) to be stored in the `track` dict. """ # empty list for all the vars for the details self.track["source"] = self._source self.track["lat"] = [] self.track["lon"] = [] self.track["ele"] = [] self.track["time"] = [] self.track["distances"] = [] # steps in space self.track["durations"] = [] # steps in time (sec) self.track["speed"] = [] # speed in km/h # locate time version for datetime objects times = [] # extract original lat/lon and cast as float for point in self.allPoints: # lat/lon/ele are just list of floats self.track["lat"].append(float(point.attrib['lat'])) self.track["lon"].append(float(point.attrib['lon'])) self.track["ele"].append(float(point.find('gpx:ele', self.namespaces).text)) # self.time are datetime instances whereas track["time"] are # ISO strings (so the can be serialized) times.append(dateutil.parser.parse( point.find('gpx:time', self.namespaces).text )) self.track["time"].append(point.find('gpx:time', self.namespaces).text) # here we already compute individual steps between trackpoints # 1) distance (haversine) # list comprehension is fancy but a tad unreadable # also decide wether we need haversion or if euclidean is enough self.track["distances"] = [haversine(y0, x0, y1, x1) for x0, x1, y0, y1 in zip( self.track["lat"][:-1], self.track["lat"][1:], self.track["lon"][:-1], self.track["lon"][1:])] # # the euclidean should work for small distances too and is less demanding # self.track["distances"] = [euclidean(y0,x0,y1,x1) for x0,x1,y0,y1 in zip( # self.track["lat"][:-1],self.track["lat"][1:], # self.track["lon"][:-1],self.track["lon"][1:])] # 2) durations (these have to be converted to datatime object) # actually we could just substract the seconds since the GPS *should* be # sampled every few secs but you never know self.track["durations"] = [(t1-t0).total_seconds() for t0, t1 in zip( times[:-1], times[1:])] # 3) speed in segments # d/t * 3.6 since we are in m/s but want km/h self.track["speed"] = [(d/t)*3.6 if t > 0.0 else 0.0 for d, t in zip( self.track["distances"], self.track["durations"])] # distance, duration and speed can be computer for n=2 only and thus # are too short self.track["distances"] = [0] + self.track["distances"] self.track["durations"] = [0] + self.track["durations"] self.track["speed"] = [0] + self.track["speed"] # sum distances consecutively self.track["distances"] = [sum(self.track["distances"][:i+1]) for i in range(len(self.track["distances"]))] # sum distances consecutively self.track["durations"] = [sum(self.track["durations"][:i+1]) for i in range(len(self.track["durations"]))]
def _calc(self): # find lap length max_time = 0 end_time = 0 start_time = 0 min_time = 999999 utc = "" total_distance = 0 is_utc = self.fixes[0].is_utc if self.fixes and is_utc: tzname = " UTC" else: tzname = tzlocal.get_localzone()._tzname # Peek speed / gforce variables peek_state = {} for fix in self.fixes: def peek_metric_calc(state, metric_name, storage_name): last_metric_name = "last_%s" % metric_name last_direction_name = "last_%s_direction" % metric_name last_metric = state.get(last_metric_name, None) last_direction = state.get(last_direction_name, None) last_fix = state.get('last_fix', None) cur_metric = getattr(fix, metric_name) cur_direction = None if last_metric is not None: if cur_metric > last_metric: cur_direction = 1 elif cur_metric < last_metric: cur_direction = -1 else: cur_direction = last_direction if (last_direction is not None and cur_direction is not None and cur_direction != last_direction): # We've found either a straight vmax or a corner vmin getattr(self, storage_name).append({ "metric": last_metric, "direction": last_direction, "fix": last_fix, "seconds": last_fix.lap_time }) state[last_direction_name] = cur_direction state[last_metric_name] = cur_metric if 'last_fix' in peek_state: peek_metric_calc(peek_state, 'speed_mph', 'speed_markers') peek_metric_calc(peek_state, 'lat_g', 'lat_g_markers') peek_metric_calc(peek_state, 'lin_g', 'lin_g_markers') last_fix = peek_state['last_fix'] hav = utils.haversine(last_fix.lat, last_fix.long, fix.lat, fix.long) total_distance += hav self.distance_at_fix[fix] = total_distance if fix.lap_time <= min_time: min_time = fix.lap_time start_time = fix.wall_time self.date = parser.parse(fix.date) if fix.lap_time > max_time: max_time = fix.lap_time end_time = fix.wall_time peek_state['last_fix'] = fix self.total_distance = total_distance self.lap_time = max_time datestr = "%s %s %s" % (fix.date, start_time, tzname) self.start_time = parser.parse(datestr) self.end_time = parser.parse("%s %s %s" % (fix.date, end_time, tzname)) if is_utc: self.start_time = self.start_time.astimezone( tzlocal.get_localzone()) self.end_time = self.end_time.astimezone(tzlocal.get_localzone()) else: lz = tzlocal.get_localzone() self.start_time = lz.localize(self.start_time) self.end_time = lz.localize(self.end_time)
# We can then get the number of people covered by each location. people_per_market = odisha_population.sum_by_labels(closest_villages) for market_index in range(0, len(villages)): if market_index in people_per_market.keys(): villages_service.append(people_per_market[market_index]) else: villages_service.append(0) # From here, we want to find the closest Wholesaler to each Village. villages_closest_wm_name = [] villages_closest_wm_lat = [] villages_closest_wm_lon = [] villages_closest_wm_distance = [] for village in villages.itertuples(): # Amass distances, calculate the closest, and append the data to lists. distances = haversine(village.Latitude, village.Longitude, wholesales["Latitude"], wholesales["Longitude"]) closest = np.argmin(np.ndarray.flatten(distances)) raw_distance = distances[closest][0][0] closest_wm = wholesales.iloc[closest] villages_closest_wm_name.append(closest_wm['Wholesale_Name']) villages_closest_wm_lat.append(closest_wm['Latitude']) villages_closest_wm_lon.append(closest_wm['Longitude']) villages_closest_wm_distance.append(raw_distance) # And produce an accessible village market list with the population served and closest wholesaler data. villages["Closest_Wholesaler"] = villages_closest_wm_name villages["Wholesaler_Latitude"] = villages_closest_wm_lat villages["Wholesaler_Longitude"] = villages_closest_wm_lon villages["Wholesaler_Distance"] = villages_closest_wm_distance villages["Population_Served"] = villages_service
def __init__(self, graph, terminals, hot_spots=None, generator=None, distances=None): # Check whether graph is node-weighted. if not graph.is_node_weighted(): raise (ValueError, "Lazy Steiner Tree only works with node-weighted graphs.") # Extract POI from the terminals list. if len(terminals) > 0: self.__poi = terminals[0] else: return # Set object variables. self.__graph = SuitabilityGraph() self.__graph.append_graph(graph) self.__terminals = terminals self.__hot_spots = None self.__nodes = None self.__s_d = {} self.__paths = {} self.__refs = {} # Set hot spots. if hot_spots is None: if generator is None: generator = SuitableNodeWeightGenerator() self.__hot_spots = self.__graph.get_suitable_nodes( generator, excluded_nodes=terminals) else: self.__hot_spots = list(hot_spots) # Set nodes = hot spots + terminals. self.__nodes = list(self.__hot_spots) for t in terminals: self.__nodes.append(t) # Set distances. if distances is None: len_hot_spots = len(self.__hot_spots) self.__distances = {} for t in self.__terminals: dist, paths = dijkstra(self.__graph, t, self.__nodes) for n in self.__nodes: try: self.__distances[tuple(sorted([t, n]))] = (dist[n], 'N') self.__paths[tuple(sorted([t, n]))] = paths[n] except KeyError: self.__distances[tuple(sorted([t, n]))] = (sys.maxint, 'N') self.__paths[tuple(sorted([t, n]))] = [] for h1 in self.__hot_spots: for i in range(self.__hot_spots.index(h1), len_hot_spots): h2 = self.__hot_spots[i] distance = 0 d_type = 'E' if h1 == h2: d_type = 'N' else: distance = haversine(self.__graph[h1][2]['lat'], self.__graph[h1][2]['lon'], self.__graph[h2][2]['lat'], self.__graph[h2][2]['lon']) self.__distances[tuple(sorted([h1, h2]))] = (distance, d_type) else: self.__distances = dict(distances)
def func(entry=val): self.log('Parsing: %s, id=%d' % (entry['title'], entry['id']), logging.DEBUG) data = {} tmp = self.parse_name(entry['title']) if not tmp: self.log('Failed to get names for id=%d' % entry['id'], logging.ERROR) return for key in ['enName', 'zhName', 'locName']: if key in tmp: data[key] = tmp[key] alias = set([]) # 去除名称中包含国家的条目 for a in tmp['alias']: c = col_country.find_one({'alias': a}, {'_id': 1}) if not c: alias.add(a) data['alias'] = list(alias) if 'tags' in entry: data['tags'] = list( set( filter( lambda val: val, [tmp.lower().strip() for tmp in entry['tags']]))) # 热门程度 if 'comment_cnt' in entry: data['commentCnt'] = entry['comment_cnt'] if 'vs_cnt' in entry: data['visitCnt'] = entry['vs_cnt'] # 计算hotness def calc_hotness(key): if key not in entry: return 0.5 x = entry[key] sig = '%s:%d' % (key, x) if sig not in hotness_cache: hotness_cache[sig] = col_raw.find({ key: { '$lt': x } }).count() / float(tot_num) return hotness_cache[sig] hotness_terms = map(calc_hotness, ('comment_cnt', 'images_tot', 'vs_cnt')) data['hotness'] = sum(hotness_terms) / float( len(hotness_terms)) crumb_ids = [] for crumb_entry in entry['crumb']: if isinstance(crumb_entry, int): cid = crumb_entry else: cid = int( re.search( r'travel-scenic-spot/mafengwo/(\d+)\.html', crumb_entry['url']).group(1)) if cid not in crumb_ids: crumb_ids.append(cid) data['crumbIds'] = crumb_ids data['source'] = {'mafengwo': {'id': entry['id']}} if 'lat' in entry and 'lng' in entry: data['location'] = { 'type': 'Point', 'coordinates': [entry['lng'], entry['lat']] } else: if self.args.type == 'mdd': tmp = self.retrieve_loc(entry['id']) if tmp: data['location'] = tmp else: tmp = self.poi_info(entry['id']) if tmp: data['location'] = { 'type': 'Point', 'coordinates': [tmp['lng'], tmp['lat']] } # 获得对应的图像 sig = 'MafengwoMdd-%d' % data['source']['mafengwo']['id'] image_list = [{ 'key': md5(tmp['url']).hexdigest() } for tmp in col_raw_im.find({ 'itemIds': sig }).limit(10)] if image_list: data['images'] = image_list if self.args.type == 'mdd': self.parse_mdd_contents(entry, data) else: self.parse_vs_contents(entry, data) if self.args.baidu_match: if 'location' in data: coords = data['location']['coordinates'] ret = self.get_baidu_sug(data['zhName'], coords) if not ret: ret = [] for val in ret: val['dist'] = haversine(coords[0], coords[1], val['lng'], val['lat']) ret = filter(lambda val: val['sname'] == data['zhName'] and \ (5 >= val['type_code'] >= 3 if self.args.type == 'mdd' else val['type_code'] >= 5) and val['dist'] < 400 if self.args.type == 'mdd' else 200, ret) ret = sorted(ret, key=lambda val: (val['type_code'], val['dist'])) if ret: data['source']['baidu'] = { 'id': ret[0]['sid'], 'surl': ret[0]['surl'] } self.log('Matched: %s => %s' % (data['zhName'], ret[0]['sname'])) if 'baidu' not in data['source']: self.log('Not matched: %s' % data['zhName']) self.log('Parsing done: %s / %s / %s' % tuple(data[key] if key in data else None for key in ['zhName', 'enName', 'locName'])) col_proc.update( {'source.mafengwo.id': data['source']['mafengwo']['id']}, {'$set': data}, upsert=True)
def generate_graph(results, generator, cost_type="distance", capacitated=False): graph = SuitabilityGraph(capacitated=capacitated) # prev_way_id = None prev_node_id = None hotspots = set() pois = set() for r in results: way_id = r[0] node_id = r[1] type_ = r[3] stype = r[4] poi_name = r[5] lat = float(r[6]) lon = float(r[7]) sa1_code = r[8] sa2_code = r[9] hw_type = r[10] if node_id not in graph: if type_ == "hotspot": graph[node_id] = (generator.weights["VERY_SUITABLE"][0], {}, {'lat': lat, 'lon': lon, 'sa1': sa1_code, 'sa2': sa2_code, 'subtype': stype}) hotspots.add(node_id) else: if type_ == "poi": pois.add(node_id) graph[node_id] = (generator.weights["WARNING"][0], {}, {'lat': lat, 'lon': lon, 'sa1': sa1_code, 'sa2': sa2_code, 'subtype': stype, 'name': poi_name}) if prev_way_id == way_id: prev_lat = graph[prev_node_id][2]['lat'] prev_lon = graph[prev_node_id][2]['lon'] # Cost estimation cost = 0 distance = haversine(lat, lon, prev_lat, prev_lon) if cost_type == "distance": cost = distance elif cost_type == "travel_time": cost = osm_avg(distance, hw_type) # graph[node_id][1][prev_node_id] = cost graph[prev_node_id][1][node_id] = cost prev_way_id = way_id prev_node_id = node_id # # pdb.set_trace() isolated = [] # Both dictionaries will INCLUDE HOT SPOTS AND POIs. nodes_by_sa1_code = {} nodes_by_sa2_code = {} # for node_id, info in graph.iteritems(): if len(info[1]) == 0 or (len(info[1]) == 1 and info[1].keys()[0] == node_id): isolated.append(node_id) else: sa1_code = info[2]['sa1'] sa2_code = info[2]['sa2'] if sa1_code in nodes_by_sa1_code: nodes_by_sa1_code[sa1_code].append(node_id) else: nodes_by_sa1_code[sa1_code] = [node_id] if sa2_code in nodes_by_sa2_code: nodes_by_sa2_code[sa2_code].append(node_id) else: nodes_by_sa2_code[sa2_code] = [node_id] for node_id in isolated: del graph[node_id] if node_id in hotspots: hotspots.remove(node_id) if node_id in pois: pois.remove(node_id) # print "h:", len(hotspots), "p:", len(pois) return graph, list(hotspots), list(pois), nodes_by_sa1_code, nodes_by_sa2_code
def split_data_in_user_groups(): """ :param raw_data : raw data is in the form of 'shopId,userId,score,"date",filtered,latitude,longitude' :param count : for each user, this value is used to record the number of bad reviews. :param cur_label: this value decide whether this user is a review spammer. :param loc_arr: a record of this users location for different reviews :param cur_data : a list of values measuring the distance based on the distance mode :return: output two csv files mode_x_data.csv and mode_x_label.csv based on the distance mode """ raw_data = defaultdict(list) data = [] label = [] os.chdir(config.dataset_dir) with open(config.raw_data_name, "r", encoding="latin-1") as csvfile: csvfile.readline() read = csv.reader(csvfile) for i in read: i[3] = i[3].replace('-', '') raw_data[i[1]].append(i[1:]) for key in raw_data: if len(raw_data[key]) > config.filter_num: sorted_data = sorted(raw_data[key], key=lambda x: int(x[2])) count = sum([int(item[3]) for item in sorted_data]) cur_label = 0 if count / len(sorted_data) <= 0.5 else 1 loc_arr = [[float(item[-2]), float(item[-1])] for item in sorted_data] if config.distance_mode == 0: center = list(zip(*loc_arr)) center_point = np.array([ np.mean(np.array(center[0])), np.mean(np.array(center[1])) ]) loc_arr = np.array(loc_arr) cur_data = [haversine(center_point, item) for item in loc_arr] else: loc_arr = np.array(loc_arr) cur_data = [ haversine(loc_arr[i - 1], loc_arr[i]) for i in range(1, len(loc_arr)) ] data.append(cur_data) label.append(cur_label) path = [ 'mode', str(config.distance_mode), '_', ] file = ['data.csv', 'label.csv'] with open('{0[0]}{0[1]}{0[2]}{1[0]}'.format(path, file), "w", newline="") as data_csv: data_writer = csv.writer(data_csv) for item in data: data_writer.writerow(item) with open('{0[0]}{0[1]}{0[2]}{1[1]}'.format(path, file), "w", newline="") as label_csv: label_writer = csv.writer(label_csv) label_writer.writerow(label) return 0
def distance(self, otherLocation): return utils.haversine(self, otherLocation)