def get_pop_s(shp, pop_units, verbose=0): s2p = gis.polys2polys(pop_units, shp, pop_units.index.name, shp.index.name, area_crs=mex.crs, intersection_only=False, verbose=verbose) s2p = s2p.merge(pop_units[['pobtot']], left_on=pop_units.index.name, right_index=True) # there could be pop_units just touch on the border, to avoid bug in the next step, filter this first s2p = s2p[s2p.weight > .0001].copy() # pu area is the sum area appearing in the s2p # in case pu' polgyons are not exactly the same as the official map (happens for localidads) # also, the points are bufferred, which adds fake areas and can be used multiple times pu_used_area = s2p.groupby(pop_units.index.name).iarea.sum() pu_used_area.name = pop_units.index.name + '_area' s2p = s2p.drop([pop_units.index.name + '_area', 'weight'], axis=1).merge(pu_used_area.to_frame(), left_on=pop_units.index.name, right_index=True) # Pop is the population of the intersected area between a shape and a pop_unit # within a pop_unit, the population is assumed to be distributed evenly over space # therefore the population is divided proportionally to the intersection area # area of intersection / area of pop_unit * pop of pop_unit s2p['Pop'] = s2p.iarea / s2p[pop_units.index.name + '_area'] * s2p.pobtot s_pop = s2p.groupby(shp.index.name).Pop.sum().to_frame().reindex( shp.index, fill_value=0) return s_pop
def mpa_grids(side, per_mun=False, urb_only=False, to_4326=False): per_mun_str = 'perMun' if per_mun else 'whole' urb_only_str = 'urb' if urb_only else 'uNr' path = f'{DIR_ZM}/grids_{side}_{per_mun_str}_{urb_only_str}.geojson.gz' if os.path.exists(path): print('loading existing file', path) grids = gp.read_file(f'gzip://{path}') grids = grids.set_index('id') grids.index.name = 'grid' return grids print('=====computing mpa_grids') grids = mex_helper.grids('metropolitans_all', side) if not per_mun and not urb_only: zms = mpa_all() nom2sun = {} for sun, nom in zms['NOM_SUN'].to_frame().iterrows(): if sun == 13: nom2sun['Zona metropolitana del ' + nom.values[0]] = sun else: nom2sun['Zona metropolitana de ' + nom.values[0]] = sun grids['CVE_SUN'] = grids.metropolitan.apply(lambda x: nom2sun[x]) if not to_4326: grids = grids.to_crs(mex.crs) grids = grids.set_index('grid')[['CVE_SUN', 'geometry']] else: import gzip grids = grids.to_crs(mex.crs) variant = mpa_all_variants(per_mun=per_mun, urb_only=urb_only) vname = variant.index.name grids_x_variant = gis.polys2polys(grids, variant, 'grid', vname, area_crs=mex.crs, intersection_only=False) grids_per_mun = grids_x_variant.merge(variant.drop('geometry', axis=1), left_on=vname, right_index=True) grids_per_mun.grid = grids_per_mun[vname].astype( str) + '-' + grids_per_mun.grid.astype(str) cols = ['grid', vname, 'geometry'] if vname != 'CVE_SUN': cols.append('CVE_SUN') grids = grids_per_mun[cols].set_index('grid') with gzip.open(path, 'wt') as fout: fout.write(grids.to_json()) if to_4326: grids = grids.to_crs(epsg=4326) return grids
def tl2grid(t2loc, rkind, grid_side, loc_buffer): path = f'data/mex_tower/Tw2Loc2GridByArea-{rkind}-GS{grid_side}-LBf{loc_buffer}.csv' if os.path.exists(path): print(path, 'exist, skipped') return grids = mex.grids(RKind, Grid_side) print('T2LOC.shape =', t2loc.shape, 'Grids.shape =', grids.shape) print('gis.p2p on t2loc and grids', datetime.datetime.now()) tl2g_raw = gis.polys2polys(t2loc, grids, pname1='tl', pname2='grid', cur_crs=4326, area_crs=mex.AREA_CRS, intersection_only=False) print('computing the final weight from tower to grid', datetime.datetime.now()) tl2g = tl2g_raw.rename(columns={'iarea': 'tl2g_area', 'weight': 'w_tl2g_bA'}) tl2g = tl2g.merge(t2loc.drop(['geometry', 'iarea'], axis=1), left_on='tl', right_index=True) tl2g['weight'] = tl2g.w_t2l_bP * tl2g.w_tl2g_bA tl2g = tl2g[['weight', 'tl', 'localidad', 'tower', 'w_t2l_bP', 'grid', 'w_tl2g_bA', 'tl_pop', 'tower_pop', 'tl2g_area', 'tl_area', 'geometry', 'loc_pop', 'tower_area', 'grid_area', 'loclidad_area']] print('saving weights', datetime.datetime.now()) tl2g.drop('geometry', axis=1).to_csv(path) # create example maps for the process Tvor['distributed_weight'] = tl2g.groupby('tower').weight.sum() Tvor.distributed_weight.fillna(0, inplace=True) visualize_selected_towers(RKind, Grid_side) visualize_selected_grids(RKind, Grid_side,tl2g) return tl2g
def tower2loc(loc_buffer): t2loc_path = f'data/mex_tower/tower2loc-{loc_buffer}.geojson.gz' if os.path.exists(t2loc_path): print('reading existing t2loc file') t2loc = gp.read_file(f'gzip://{t2loc_path}') t2loc = t2loc.set_index('id') t2loc.index = t2loc.index.astype(int) t2loc.index.name = None gis.assign_crs(t2loc, 4326) return t2loc # ============= # distribute tower's users count to intersections with localidad by population # ============= print('load tower vor') tvor = mex.tower_vor() print('load localidads') localidad = mex.localidad(loc_buffer, to_crs=4326) loc_with_pop = localidad[localidad.Pop > 0] print('intersect tower and loc') # compute the intersection area between tower and localidad t2loc = gis.polys2polys(tvor, loc_with_pop, pname1='tower', pname2='localidad', cur_crs=4326, area_crs=mex.AREA_CRS, intersection_only=False) t2loc = t2loc.merge(loc_with_pop[['Pop']], left_on='localidad', right_index=True) print('compute weight') # localidad area is the sum area covered by towers # because Localidads' polgyons are note exactly the same as the official map # also, the points are bufferred, which adds fake areas. loc_area = t2loc.groupby('localidad').iarea.sum() loc_area.name = 'loclidad_area' t2loc = t2loc.drop(['localidad_area', 'weight'], axis=1).merge(loc_area.to_frame(), left_on='localidad', right_index=True) # iPop is the population of the intersected area between a tower and a localidad # within a localidad, the population is assumed to be distributed evenly over space # therefore the population is divided proportionally to the intersection area t2loc['iPop'] = t2loc.Pop * t2loc.iarea / t2loc.loclidad_area # the total population covered by a tower is the sum of iPop tower_cover_pop = t2loc.groupby('tower').iPop.sum() tower_cover_pop.name = 'tower_pop' t2loc = t2loc.merge(tower_cover_pop.to_frame(), left_on='tower', right_index=True) # the weight to distribute tower's users count t2loc['weight'] = t2loc.iPop / t2loc.tower_pop print('saving result') with gzip.open(t2loc_path, 'wt') as fout: fout.write(t2loc.to_json()) return t2loc
def to_mpa_vors(by='area', per_mun=False, urb_only=False, zms_vors=None): assert by in ('area', 'pop'), f'by={by}, it should be either "area" or "pop"' path = f'{DIR_INTPL}/tower_to_mpa_vors_{PER_MUN_STR(per_mun)}_{URB_ONLY_STR(urb_only)}_{by}.csv' if os.path.exists(path): print('to_mpa_vors loading existing file', path) t2v = pd.read_csv(path, index_col=0) return t2v print('computing to_map_vors', by, per_mun, urb_only) # allow to pass on zms_vors, without loading it again if zms_vors is None: zms_vors = region.mpa_vors(per_mun=per_mun, urb_only=urb_only, to_4326=False) if by == 'area': t2v = zms_vors.reset_index() else: t2ageb_by_pop = to_mpa_agebs('pop', return_geom=True) t2ageb_by_pop_rename = t2ageb_by_pop.rename(columns={ 'iPop': 'txa_pop', 'Pop': 'ageb_pop', 'weight': 'w_t2a_bP' }) txa2v_raw = gis.polys2polys(t2ageb_by_pop, zms_vors, pname1='txa', pname2='vor', area_crs=mex.crs, intersection_only=False) txa2v = txa2v_raw.rename(columns={ 'iarea': 'txa2v_area', 'weight': 'w_txa2v_bA' }) txa2v = txa2v.merge(t2ageb_by_pop_rename.drop(['geometry', 'iarea'], axis=1), left_on='txa', right_index=True) txa2v['weight'] = txa2v.w_t2a_bP * txa2v.w_txa2v_bA txa2v = txa2v[[ 'weight', 'txa', 'ageb', 'tower', 'w_t2a_bP', 'vor', 'w_txa2v_bA', 'txa_pop', 'tower_pop', 'txa2v_area', 'txa_area', 'geometry', 'pobtot', 'tower_area', 'vor_area', 'ageb_area' ]] t2v = txa2v.groupby(['tower', 'vor']).weight.sum().reset_index() t2v = t2v[t2v.weight > 1e-12] t2v[['tower', 'vor', 'weight']].to_csv(path) return t2v[['tower', 'vor', 'weight']]
def tower_vor(rkind=None, intersection_only=False, in_country=True): """ :param rkind: region kind :param intersection_only: works with rkind only :param in_country: whether to cut vor by country boarder or not :return: tower vor """ in_or_not = 'in_country' if in_country else 'raw_vor' path = f'data/mex_tower/mex_tvor_{in_or_not}.geojson' if os.path.exists(path): tvor = gp.read_file(path) tvor.set_index('gtid', inplace=True) assign_crs(tvor, 4326, ignore_gpdf_crs=True) print(f'loading existing tvor file: {path}') else: t = tower() # voronoi polygons across mexico tvor = lonlats2vor_gp(t.lonlat.tolist(), dataframe=True) tvor['gtid'] = t.gtid if in_country: print('clipping tvor outside mexico country boarder') country_poly = country().geometry.values[0] tvor['geometry'] = tvor.geometry.apply( lambda x: clip_if_not_within(x, country_poly)) assign_crs(tvor, 4326) print(f'saving tvor file: {path}') tvor.to_file(path, driver='GeoJSON') tvor.set_index('gtid', inplace=True) if rkind is None: return tvor else: rgns = regions(rkind) return polys2polys(tvor, rgns, tvor.index.name, rgns.index.name, cur_crs=4326, area_crs=AREA_CRS, intersection_only=intersection_only)
def interpolate_uni(tvors, su, tw_footfall, cache_path=None, verbose=0): """ interpolate tower footfall to spatial units. :param tvors: voronoi polygons :param su: target spatial units :param tw_footfall: 24 hour footfall, shape: [n_tower, 24 hours] :param cache_path: if not None, cache t2su results to cache_path :return: su_footfall, shape: [n_su, 24 hours] """ t2su = gis.polys2polys(tvors, su, pname1='tower', pname2=su.index.name, verbose=verbose) t2su = t2su[['tower', su.index.name, 'weight']].set_index(su.index.name) # TODO: cache t2su if cache_path not None su_footfall = interpolate_stats(tw_footfall, t2su, n_bins=tw_footfall.shape[1]) return su_footfall
def mpa_vors(per_mun=False, urb_only=False, to_4326=False): import src.mex.tower as tower t_pts = tower.pts(to_4326).set_index('gtid') vors = tower.voronoi() variant = mpa_all_variants(per_mun, urb_only) vname = variant.index.name t_pts_x_variant = tower.pts_x_region('mpa', per_mun, urb_only) t_pts_x_variant = t_pts_x_variant.merge(t_pts[['geometry']], left_on='gtid', right_index=True) t_pts_x_variant = t_pts_x_variant.rename(columns={ 'geometry': 'centroid', 'gtid': 'tower' }) vors_x_variant = gis.polys2polys(vors, variant, 'tower', vname, area_crs=mex.crs, intersection_only=False) variant_vors = vors_x_variant.merge(t_pts_x_variant, on=['tower', vname], how='left') if to_4326: variant_vors = variant_vors.to_crs(epsg=4326) # if tower is within zm, then column centroid is the tower location, otherwise the geometric centroid variant_vors['centroid'] = variant_vors.apply( lambda x: x.centroid if x.centroid is not None else x.geometry.centroid, axis=1) if per_mun: variant_vors = variant_vors.drop('CVE_SUN', axis=1).merge( variant.reset_index()[['mun_id', 'CVE_SUN']]) variant_vors['vor'] = variant_vors[vname].astype( str) + '|' + variant_vors['tower'] variant_vors = variant_vors.set_index('vor') return variant_vors
def loc2grid(rkind, grid_side, loc_buffer=500): path = f'data/mex_tower/Loc2GridByArea-{rkind}-GS{grid_side}-LBf{loc_buffer}.csv' if os.path.exists(path): print(path, 'exist, skipped') return grids = mex.grids(rkind, grid_side) localidad = mex.localidad(loc_buffer, to_crs=4326) print('running polys2polys', datetime.datetime.now()) loc2grid = gis.polys2polys(localidad, grids, 'localidad', 'grids', cur_crs=4326, area_crs=mex.AREA_CRS, intersection_only=False) print('saving weights', datetime.datetime.now()) loc2grid[['localidad', 'grids', 'weight', 'iarea']].to_csv(path) print('creating example html') example = loc2grid[loc2grid.localidad.isin( localidad[localidad.CVE_ENT.isin(['09', '12', '14'])].index)] m = folium.Map(location=[19.381495, -99.139095], zoom_start=6) mv.geojson_per_row(grids[grids.grid.isin(example.grids)], 'Grids', some_map=m) mv.geojson_per_row(example, 'G2loc', some_map=m, color='green', tip_cols=[ 'localidad', 'grids', 'weight', 'iarea', 'localidad_area', 'grids_area' ]) folium.LayerControl().add_to(m) m.save('Loc2Grid-{rkind}-GS{grid_side}-LBf{loc_buffer}-Example.html') print('done')
def voronoi_pop_by_ageb(): fn = f'{DIR_INTPL}/voronoi_pop_by_ageb.csv' if os.path.exists(fn): tvor_pop = pd.read_csv(fn).set_index('gtid') print(f'loading existing mexico tower voronoi population file: {fn}') else: towers_vor = voronoi(load_pop=False) agebs = region.agebs() t2a = gis.polys2polys(towers_vor, agebs, 'gtid', 'ageb', area_crs=mex.crs, intersection_only=False) t2a = t2a.merge(agebs[['pobtot']], left_on='ageb', right_index=True) # ageb area is the sum area covered by towers # in case ageb' polgyons are not exactly the same as the official map (happens for localidads) # also, the points are bufferred, which adds fake areas. ageb_area = t2a.groupby('ageb').iarea.sum() ageb_area.name = 'ageb_area' t2a = t2a.drop(['ageb_area', 'weight'], axis=1).merge(ageb_area.to_frame(), left_on='ageb', right_index=True) # iPop is the population of the intersected area between a tower and a ageb # within a ageb, the population is assumed to be distributed evenly over space # therefore the population is divided proportionally to the intersection area # area of intersection / area of ageb * pop of ageb t2a['Pop'] = t2a.iarea / t2a.ageb_area * t2a.pobtot tvor_pop = t2a.groupby('tower').Pop.sum().to_frame().reindex( towers_vor.index, fill_value=0) tvor_pop.to_csv(fn) return tvor_pop
def interpolate_pop(tvors, su, pop_units, tw_footfall, cache_path=None, verbose=0): """ interpolate tower footfall to spatial units proportional to population :param tvors: tower voronoi polygons :param su: target spatial units :param pop_units: the units(ageb in Mex) with population. To avoid units outside admin_boundary but intersects, filter them out first: country_pu.loc[admin_pu] :param tw_footfall: tower footfall, shape: [n_tower, 24 hours] :param cache_path: :param verbose: :return: su_footfall, shape : [n_su, 24 hours] """ su_name = su.index.name # intersecting tvors and su t2su = gis.polys2polys(tvors, su, 'tower', su_name, verbose=verbose)[['tower', su_name, 'geometry']] t2su.crs = tvors.crs t2su.index.name = f'tower_{su_name}' # get population for each intersection t2su_intxn_pop = get_pop_s(t2su, pop_units, verbose) # get t2su proportional to population t2su = t2su.join(t2su_intxn_pop) t2su = t2su.merge(tvors[['Pop']], left_on='tower', right_index=True, suffixes=('_intxn', '_tower')) t2su['weight'] = t2su.Pop_intxn / t2su.Pop_tower t2su = t2su[['tower', su.index.name, 'weight']].set_index(su.index.name) su_footfall = interpolate_stats(tw_footfall, t2su, n_bins=tw_footfall.shape[1]) return su_footfall
def tower2grid(rkind, side, redo=False, t2r_intxn_only=False): """ :param rkind: region kind: cities only :param side: side of grids in meters :param redo: ignore existing t2g mapping :param t2r_intxn_only: keep only the intersection of the regions to compute the distribution weight. :return: """ t2g_path = f'data/mex_tower/mex_t2g_{rkind}_{side}m.csv' if not redo and os.path.exists(t2g_path): print('reading existing t2g file:', t2g_path) t2g = pd.read_csv(t2g_path, index_col=0) return t2g tvor = tower_vor() tname = tvor.index.name rs = regions(rkind) rname = rs.index.name print('keep tower voronoi within', rkind, 'intersection only:', t2r_intxn_only) t2r = polys2polys(tvor, rs, tname, rname, cur_crs=4326, area_crs=AREA_CRS, intersection_only=t2r_intxn_only) gs = grids(rkind, side) print('building tower to grid mapping') t2g = [] for n in rs.index: tr = t2r[t2r[rname] == n] gr = gs[gs[rname] == n] tr2gr = polys2polys(tr, gr, pname1='towerInRegion', pname2='grid', cur_crs=4326, area_crs=AREA_CRS, intersection_only=True) tr2gr = tr2gr.merge(tr[[tname, rname, f'{tname}_area', 'weight']], left_on='towerInRegion', right_index=True) tr2gr.rename(columns={ 'weight_x': 'w_Grid2towerInRegion', 'weight_y': 'w_towerInRegion', 'iarea': 'gridInTowerInRegion_area' }, inplace=True) tr2gr['weight'] = tr2gr.w_Grid2towerInRegion * tr2gr.w_towerInRegion tr2gr = tr2gr[[ rname, tname, 'towerInRegion', 'grid', 'weight', 'w_towerInRegion', 'w_Grid2towerInRegion', 'gridInTowerInRegion_area', 'towerInRegion_area', 'gtid_area', 'geometry', ]] t2g.append(tr2gr[[rname, tname, 'grid', 'weight']]) t2g = pd.concat(t2g, ignore_index=True).drop(rname, axis=1) print('saving tower to grid mapping:', t2g_path) t2g.to_csv(t2g_path) return t2g
LPR_pop = LPRBf4326_Ponly[LPRBf4326_Ponly.Pop > 0].drop( ['CVE_LOC', 'CVE_AGEB', 'CVE_MZA', 'PLANO', 'CVEGEO'], axis=1) LPR_pop['AMBITO'] = 'Rural' # merge two sources L_pop = pd.concat([LUR_pop, LPR_pop[LUR_pop.columns]], ignore_index=True).set_index('loc_id') # ============= # distribute tower's users count by population # ============= # compute the intersection area between tower and localidad t2loc = gis.polys2polys(tvor, L_pop, pname1='tower', pname2='localidad', cur_crs=4326, area_crs=mex.AREA_CRS, intersection_only=False) t2loc = t2loc.merge(L_pop[['Pop']], left_on='localidad', right_index=True) # localidad area is the sum area covered by towers loc_area = t2loc.groupby('localidad').iarea.sum() loc_area.name = 'loclidad_area' t2loc = t2loc.drop(['localidad_area', 'weight'], axis=1).merge(loc_area.to_frame(), left_on='localidad', right_index=True) # iPop is the population of the intersected area between a tower and a localidad # within a localidad, the population is assumed to be distributed evenly over space
def to_mpa_grids(side, by='area', per_mun=False, urb_only=False, grids=None): assert by in ('area', 'pop'), f'by={by}, it should be either "area" or "pop"' path = f'{DIR_INTPL}/tower_to_mpa_g{side}_{PER_MUN_STR(per_mun)}_{URB_ONLY_STR(urb_only)}_{by}.csv' if os.path.exists(path): print('to_map_grids loading existing file', path) t2g = pd.read_csv(path, index_col=0) return t2g print('computing to_map_grids', by, per_mun, urb_only) # allow to pass on grids, without loading it again if grids is None: grids = region.mpa_grids(side, per_mun=per_mun, urb_only=urb_only, to_4326=False) if by == 'area': tvor = tower.voronoi() tvor_x_zm = tower.voronoi_x_region('mpa') zms_tvors = tvor.loc[tvor_x_zm.gtid.unique()] t2g = gis.polys2polys(zms_tvors, grids, 'tower', 'grid', area_crs=mex.crs, intersection_only=False) else: t2ageb_by_pop = to_mpa_agebs('pop', return_geom=True) t2ageb_by_pop_rename = t2ageb_by_pop.rename(columns={ 'iPop': 'txa_pop', 'Pop': 'ageb_pop', 'weight': 'w_t2a_bP' }) txa2g_raw = gis.polys2polys(t2ageb_by_pop, grids, pname1='txa', pname2='grid', area_crs=mex.crs, intersection_only=False) txa2g = txa2g_raw.rename(columns={ 'iarea': 'txa2g_area', 'weight': 'w_txa2g_bA' }) txa2g = txa2g.merge(t2ageb_by_pop_rename.drop(['geometry', 'iarea'], axis=1), left_on='txa', right_index=True) txa2g['weight'] = txa2g.w_t2a_bP * txa2g.w_txa2g_bA txa2g = txa2g[[ 'weight', 'txa', 'ageb', 'tower', 'w_t2a_bP', 'grid', 'w_txa2g_bA', 'txa_pop', 'tower_pop', 'txa2g_area', 'txa_area', 'geometry', 'pobtot', 'tower_area', 'grid_area', 'ageb_area' ]] t2g = txa2g.groupby(['tower', 'grid']).weight.sum().reset_index() t2g[['tower', 'grid', 'weight']].to_csv(path) return t2g[['tower', 'grid', 'weight']]
def to_mpa_agebs(by='area', return_geom=False): assert by in ('area', 'pop'), f'by={by}, it should be either "area" or "pop"' path = f'{DIR_INTPL}/tower_to_mpa_agebs_by_area.csv' if by == 'area' else f'{DIR_INTPL}/tower_to_mpa_agebs_by_pop.csv' if not return_geom and os.path.exists(path): print('to_map_agebs loading existing file', path) t2ageb = pd.read_csv(path, index_col=0) return t2ageb print('computing t2a', by, 'return_geom =', return_geom) zms = region.mpa_all() mun_ids = sorted( list(set(chain(*zms.mun_ids.apply(lambda x: x.split(',')))))) zms_agebs = region.agebs(mun_ids=mun_ids) tvor = tower.voronoi() tvor_x_zm = tower.voronoi_x_region('mpa') zms_tvors = tvor.loc[tvor_x_zm.gtid.unique()] if by == 'area': # by area only t2ageb = gis.polys2polys(zms_tvors, zms_agebs, 'tower', 'ageb', area_crs=mex.crs, intersection_only=False) else: # by pop tvor_x_agebs = tower.voronoi_x_region('mga') covered = tvor_x_agebs[tvor_x_agebs.gtid.isin(zms_tvors.index)] covered_ageb_ids = covered.ageb_id.unique() covered_loc_ids = sorted(set([aid[:9] for aid in covered_ageb_ids])) covered_mun_ids = sorted(set([aid[:5] for aid in covered_ageb_ids])) covered_agebs = region.agebs(mun_ids=covered_mun_ids, loc_ids=covered_loc_ids) t2covered_ageb = gis.polys2polys(zms_tvors, covered_agebs, 'tower', 'ageb', area_crs=mex.crs, intersection_only=False) t2covered_ageb = t2covered_ageb.merge(covered_agebs[['pobtot']], left_on='ageb', right_index=True) # ageb area is the sum area covered by towers # in case ageb' polgyons are not exactly the same as the official map (happens for localidads) # also, the points are bufferred, which adds fake areas. ageb_area = t2covered_ageb.groupby('ageb').iarea.sum() ageb_area.name = 'ageb_area' t2covered_ageb = t2covered_ageb.rename(columns={ 'ageb_area': 'original_ageb_area', 'weight': 'area_weight' }).merge(ageb_area.to_frame(), left_on='ageb', right_index=True) # iPop is the population of the intersected area between a tower and a ageb # within a ageb, the population is assumed to be distributed evenly over space # therefore the population is divided proportionally to the intersection area t2covered_ageb[ 'iPop'] = t2covered_ageb.pobtot * t2covered_ageb.iarea / t2covered_ageb.ageb_area # the total population covered by a tower is the sum of iPop tower_cover_pop = t2covered_ageb.groupby('tower').iPop.sum() tower_cover_pop.name = 'tower_pop' t2covered_ageb = t2covered_ageb.merge(tower_cover_pop.to_frame(), left_on='tower', right_index=True) # the weight to distribute tower's users count t2covered_ageb[ 'weight'] = t2covered_ageb.iPop / t2covered_ageb.tower_pop t2ageb = t2covered_ageb[t2covered_ageb.ageb.isin(zms_agebs.index)] t2ageb[['tower', 'ageb', 'weight']].to_csv(path) print('returning t2ageb') if return_geom: return t2ageb return t2ageb[['tower', 'ageb', 'weight']]