def get_user_msoas_paralel(args): user_coord_chunks = args[0] cityshape = args[1] thread_id = args[2] city = args[3] outfolder = args[4] nnn = len(user_coord_chunks) fout = open(outfolder + '/venue_msoa_attributes_' + str(thread_id), 'w') for ind, (user, coord) in enumerate(user_coord_chunks.items()): #if ind == 50: break if ind % 100 == 0: print(thread_id, '\t', ind, '/', nnn) lat = float(coord[1]) lng = float(coord[0]) pnt = Point(lng, lat) query_df = cityshape[cityshape.contains(pnt)] if query_df.shape[0] == 1: msoa = query_df.iloc[0]['msoa11cd'] fout.write(user + '\t' + msoa + '\n') fout.close()
def get_state_count(data): ''' Get state widgets from a single process ''' state_count = {} for index in list(data.index): if index % 1000 == 0: print("processing at:", index) try: point = Point(data.loc[index, "lng"], data.loc[index, "lat"]) for index1 in list(state_map.index): state_polygon = state_map.loc[index1, "geometry"] state_name = state_map.loc[index1, "NAME"] if point.within(state_polygon): if state_count.get(state_name) != None: state_count[state_name] += data.loc[index, "widgets"] else: state_count[state_name] = data.loc[index, "widgets"] break except: print("ERROR in index: ", index, data.loc[index, "lng"]) return state_count
def get_wards_paralel(args): venues_coord_chunks = args[0] cityshape = args[1] thread_id = args[2] bbox = args[3] city = args[4] outfolder = args[5] nnn = len(venues_coord_chunks) fout = open(outfolder + '/venue_ward_attributes_' + str(thread_id), 'w') for ind, (venue, coord) in enumerate(venues_coord_chunks.items()): if ind % 100 == 0: print(thread_id, '\t', ind, '/', nnn) lat = float(coord[1]) lng = float(coord[0]) #if 2 == 2: print(lat, lng, '\t', bbox, check_box(bbox, city, lat, lng)) if check_box(bbox, city, lat, lng): pnt = Point(lng, lat) query_df = cityshape[cityshape.contains(pnt)] if query_df.shape[0] == 1: #try: ward, polygon = (query_df.iloc[0]['GSS_CODE'], query_df.iloc[0]['geometry']) bounds = polygon.bounds lng0 = str(bounds[0]) lat0 = str(bounds[1]) lng1 = str(bounds[2]) lat1 = str(bounds[3]) length = str(polygon.length) area = str(polygon.area) fout.write('\t'.join([ venue, str(lng), str(lat), ward, lng0, lat0, lng1, lat1, length, area ]) + '\n') #except: # pass fout.close()
def coordinates_to_msoa(lats, lons, cityshape): poly = (0, 0) try: pnt = Point(lons, lats) query_df = cityshape[cityshape.contains(pnt)] if query_df.shape[0] == 1: poly = (query_df.iloc[0]['msoa11cd'], query_df.iloc[0]['geometry']) except Exception as exception: pass return poly
def true_local_pop(self, x, y, r): """ Calculates local population by geometric intersection. More precise but much slower than the rasterization-based method; included for validation. """ bounds = Point((x, y)).buffer(r) pop = 0 for fid in list(self.vtd_idx.intersection(bounds.bounds)): if getattr(self.df.iloc[fid], 'geometry').intersects(bounds): intersect = getattr( self.df.iloc[fid], 'geometry').buffer(0).intersection(bounds).area pop += self.total_pop[fid] * ( intersect / getattr(self.df.iloc[fid], 'geometry').area) return pop
flowlines = gpd.GeoDataFrame.from_file( "H:/NHDPlusV21/NHDPlusPN/NHDPlus17/NHDSnapshot/Hydrography/NHDFlowline.shp" ) catchments = gpd.GeoDataFrame.from_file( "H:/NHDPlusV21/NHDPlusPN/NHDPlus17/NHDPlusCatchment/Catchment.shp") for index, row in flowlines.iterrows(): if row[flowlines._geometry_column_name].geom_type != 'LineString': print(row['COMID']) flowlines.loc[flowlines['COMID'] == 947050377, 'geometry'] flowlines = flowlines[flowlines.COMID != 947050377].copy(deep=True) COMs = flowlines['COMID'] endpts = gpd.GeoSeries( [Point(list(pt['geometry'].coords)[-1]) for i, pt in flowlines.iterrows()]) endpts = gpd.GeoDataFrame(endpts) endpts = endpts.rename(columns={0: 'geometry'}).set_geometry('geometry') # Bring in the COMIDs from original flowlines result = pd.concat([endpts, COMs], axis=1, ignore_index=True) result = result[[1, 0]] result = gpd.GeoDataFrame(result) result = result.rename(columns={ 1: 'COMID', 0: 'geometry' }).set_geometry('geometry') result.to_file("H:/WorkingData/Junk/result_test.shp") # just to check results # restrict to catchment endpoints result = result[result['COMID'].isin(catchments['FEATUREID'])].dropna() result["Lon"] = result.centroid.map(lambda p: p.x)
def allocate(self, r_P, theta, to_vtd=None): """ Coordinates are given in the (r_P, θ) system, where r_P is a proportion of the population (0-1) and θ is a direction (in radians). """ if random() < P_RANDOM_ALLOC and self.vtd_by_district[ self.current_district] and not to_vtd: border_vtds = [] self.allocate(0, 0, to_vtd=choice( self.graph.unallocated_on_border( self.current_district))) return if not to_vtd: r_P_abs = min(max(0, r_P), 1) * self.total_pop.sum() r_G = self.people_to_geo(r_P_abs) to_x = min(max(self.min_x, r_G * np.cos(theta) + self.x), self.max_x) to_y = min(max(self.min_y, r_G * np.sin(theta) + self.y), self.max_y) p = Point((to_x, to_y)) vtd_idx = None for fid in list(self.vtd_idx.intersection(p.bounds)): # API: https://streamhsacker.com/2010/03/23/python-point-in-polygon-shapely/ if getattr(self.df.iloc[fid], 'geometry').contains(p): vtd_idx = fid break if vtd_idx in self.vtd_by_district[self.current_district]: self.x = to_x self.y = to_y return elif not vtd_idx or vtd_idx not in self.vtd_by_district[0]: return # already allocated or out of bounds else: vtd_idx = to_vtd to_x = self.centroids[0][vtd_idx] to_y = self.centroids[1][vtd_idx] """ Algorithm for allocating VTDs: 1. Figure out if the county, or the remaining unallocated part of the county, can be allocated wholly to the current district. This requires: a. The remaining county is contiguous to the current district. b. The remaining county’s population plus the current district’s population less than or equal to the expected number of people per district, ± some very small ϵ. If the allocating the county results in two isolated regions of whitespace, the smaller region of whitespace will be allocated to the district, and the population of this region will be added to the remaining county's population when checking the equal population constraint. If the remaining county can be allocated, do so. If contiguity is violated, abort. Otherwise, proceed to step 2. """ county_pop = 0 county = self.df.iloc[vtd_idx]['county'] for idx in self.unallocated_in_county[county]: county_pop += self.total_pop[idx] if not self.graph.contiguous(self.unallocated_in_county[county]): return # no connection between county and current district if to_vtd: self.x = to_x self.y = to_y self.i += 1 if self.update(self.unallocated_in_county[county], county_pop): self.i += 1 return # whole county allocated """ Algorithm for allocating VTDs (cont'd): 2. If updating fails due to population constraints, remove cities (whole or fractional) on the border of the allocation. Do this greedily until the constraints are satisfied, removing the cities farthest from (x,y) first. 3. If updating fails due to population constraints, remove VTDs on the border of the allocation. Do this greedily until the constraints are satisfied, removing the VTDs farthest from (x,y) first. """ vtds = copy(self.unallocated_in_county[county]) tested = set([]) tested_cities = set([]) distances = np.flip( np.argsort( np.sqrt((self.centroids[0][vtds] - self.x)**2 + (self.centroids[1][vtds] - self.y)**2))) border_vtds = set(self.graph.border_vtds(vtds)) last_idx = 0 while len(tested_cities) < len( set([self.vtd_to_city[vtd] for vtd in vtds])) and len(tested) < len(vtds): farthest_vtd = None for idx, vtd_idx in enumerate(distances[last_idx:]): if vtds[vtd_idx] not in tested and self.vtd_to_city[vtds[ vtd_idx]] not in tested_cities: #and vtds[vtd_idx] in border_vtds: farthest_vtd = vtds[vtd_idx] last_idx = idx + 1 break if not farthest_vtd: break test_vtds = copy(vtds) removed = [] for city_vtd in self.unallocated_in_city[ self.vtd_to_city[farthest_vtd]]: if city_vtd in test_vtds: test_vtds.remove(city_vtd) removed.append(city_vtd) if self.graph.contiguous(test_vtds): if self.update(test_vtds, sum([self.total_pop[vtd] for vtd in test_vtds])): return else: for vtd in removed: vtds.remove(vtd) tested = set([]) distances = np.flip( np.argsort( np.sqrt((self.centroids[0][vtds] - self.x)**2 + (self.centroids[1][vtds] - self.y)**2))) border_vtds = set(self.graph.border_vtds(vtds)) last_idx = 0 else: tested.add(farthest_vtd) tested_cities.add(self.vtd_to_city[farthest_vtd]) # TODO clean up to avoid duplication tested = set([]) # TODO should this be here? distances = np.flip( np.argsort( np.sqrt((self.centroids[0][vtds] - self.x)**2 + (self.centroids[1][vtds] - self.y)**2))) border_vtds = set(self.graph.border_vtds(vtds)) last_idx = 0 while len(vtds) > 0 and len(tested) < len(vtds): farthest_vtd = None for idx, vtd_idx in enumerate(distances[last_idx:]): if vtds[vtd_idx] not in tested and vtds[vtd_idx] in border_vtds: farthest_vtd = vtds[vtd_idx] last_idx = idx + 1 break if not farthest_vtd: break test_vtds = copy(vtds) test_vtds.remove(farthest_vtd) if self.graph.contiguous(test_vtds): if self.update(test_vtds, sum([self.total_pop[vtd] for vtd in test_vtds])): return else: vtds.remove(farthest_vtd) tested = set([]) distances = np.flip( np.argsort( np.sqrt((self.centroids[0][vtds] - self.x)**2 + (self.centroids[1][vtds] - self.y)**2))) border_vtds = set(self.graph.border_vtds(vtds)) last_idx = 0 else: tested.add(farthest_vtd) # last resort: allocate a single VTD if self.graph.contiguous([vtd_idx ]) and vtd_idx in self.vtd_by_district[0]: self.update([vtd_idx], self.total_pop[vtd_idx])