def _post_info( self, shortcode_or_model: Union[str, InstagramPost]) -> InstagramPost: if isinstance(shortcode_or_model, InstagramPost): return shortcode_or_model shortcode = shortcode_or_model d = Addict(self._web_api_client.media_info2(shortcode)) return InstagramPost( post_num_id=d.id or None, # todo: this is actually a str shortcode=d.shortcode or None, img_height=_to_int(d.dimensions.height), img_width=_to_int(d.dimensions.width), display_url=d.display_url or None, is_video=_to_bool(d.is_video), caption_is_edited=_to_bool(d.caption_is_edited), created_at=_timestamp_to_datetime(d.taken_at_timestamp), like_count=_to_int(d.likes.count), comment_count=_to_int(d.comments.count), location_id=_to_int(d.location and d.location.id), location_name=(d.location and d.location.name) or None, location_address_json=(d.location and d.location.address_json) or None, owner_id=_to_int(d.owner.id), owner_username=d.owner.username or None, owner_full_name=d.owner.full_name or None, is_ad=_to_bool(d.is_ad), caption=d.caption.text or None, users_in_photo=[p.user for p in d.users_in_photo], hashtags=_get_hashtags(d.caption.text), mentions=_get_mentions(d.caption.text), )
def fit(self, df: pd.DataFrame) -> None: self.columns = list(df.columns) self.encoders = Addict() for category_type, columns in self.columns_map.items(): for column_id, column in enumerate(columns): encoder_class, parameters = self.type2encoder[category_type] if category_type == "cyclical": parameters["amplitude"] = self.columns_map[category_type][ column_id][1] column = self.columns_map[category_type][column_id][0] encoder = encoder_class(**parameters) x = df[column].values if category_type == "numerical": x = x.reshape(-1, 1) encoder.fit(x) self.encoders[category_type][column] = encoder self.column2type[column] = category_type
def main(config_file): conf = Addict(yaml.safe_load(open(config_file, 'r'))) if conf.get("logging") is not None: logging.config.dictConfig(conf["logging"]) else: logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") raw_file = conf.get("output").get("existing_supermarkets_raw") supermarkets = [] with open(raw_file, encoding='utf-8') as f: for supermarket_raw in json.loads(f.read()): supermarket_obj = {} supermarket_obj["name"] = supermarket_raw.get("name") supermarket_obj["addr"] = supermarket_raw.get("formatted_address") geocode = supermarket_raw.get("geometry").get("location") supermarket_obj["lat"] = geocode.get("lat") supermarket_obj["lng"] = geocode.get("lng") supermarket_obj["type"] = supermarket_raw.get("place_type") supermarkets.append(supermarket_obj) supermarkets_df = pd.DataFrame(supermarkets) logging.info("%s supermarkets are located in the city", supermarkets_df.shape[0]) supermarkets_df = supermarkets_df.drop_duplicates(subset=["lat", "lng"]) logging.info("There are %s supermarkets left after duplicates removed", supermarkets_df.shape[0]) grocery_df = supermarkets_df.loc[supermarkets_df["type"] == "grocery"] logging.info("%s of the results are grocery", grocery_df.shape[0]) supermarkets_df = supermarkets_df.reset_index() supermarkets_df = supermarkets_df.loc[supermarkets_df["type"] == "supermarket"] output_fp = conf.get("output").get("existing_supermarkets_data") supermarkets_df.to_csv(output_fp, index=False) logging.info("Information of existing supermarkets written to %s", output_fp)
def immutable(cls, d): '''If d is not an instance of Addict, turn it into Addict and lock it. This is useful for configs and other static declarations. ''' if not isinstance(d, Addict): return LockedDict.lock(Addict(d)) return LockedDict.lock(d)
def all_pages(page_elem, iter_elem, func, *args, **kwargs): page = 1 acc = [] while True: paginated = Addict(func(*args, **kwargs, page=page))[page_elem] acc.extend(paginated[iter_elem]) if int(paginated.page) >= int(paginated.pages): return acc page += 1
def _get_album_page(page, acc): search_result = Addict(flickr.photosets.getList(page=page)) albums = search_result.photosets for album in albums.photoset: date_create = int(album.date_create) dt_date_create = datetime.fromtimestamp(date_create, timezone.utc) acc.append((album.title._content, dt_date_create, album.id)) if albums.page < albums.pages: return _get_album_page(page + 1, acc) else: return acc
def main(config_file): conf = Addict(yaml.safe_load(open(config_file, 'r'))) if conf.get("logging") is not None: logging.config.dictConfig(conf["logging"]) else: logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") supermarkets_file = conf.get("input").get("supermarkets_file") logging.info("Loading geocode of supermarkets from %s", supermarkets_file) supermarkets_file_reader = csv.reader(open(supermarkets_file)) supermarkets_file_header = next(supermarkets_file_reader) supermarkets = [] for row in supermarkets_file_reader: supermarket = dict(zip(supermarkets_file_header, row)) supermarkets.append(supermarket) logging.info("%s supermarkets located in the city.", len(supermarkets)) grid_geocode_file = conf.get("input").get("grid_geocode_file") logging.info("Loading geocode of city grids from %s", grid_geocode_file) grids_file_reader = csv.reader(open(grid_geocode_file)) grids_file_header = next(grids_file_reader) grids = [] for row in grids_file_reader: grid = dict(zip(grids_file_header, row)) grids.append(grid) logging.info("The city is covered by %s 1km x 1km grids.", len(grids)) api_key = conf.get("API").get("KEY") gmaps = googlemaps.Client(key=api_key) results = [] counter = 0 logging.info("Start querying driving time from city grid to supermarkets ...") start_time = time.time() for grid in grids: for supermarket in supermarkets: logging.debug("Processing grid: %s - supermarket: %s", grid, supermarket) dist_api_worker = DistAPIWorker(gmaps, grid, supermarket) response = dist_api_worker.run() results.append(response) counter += 1 if counter % 1000 == 0: logging.info("%s grid-supermarket pair processed ... Elapsed time %s seconds", counter, round(time.time() - start_time, 4)) # Export query responses to file if len(results) > 0: results_fp = conf.get("output").get("grid_to_supermarket_dist_raw") with open(results_fp, 'w') as output_file: json.dump(results, output_file, indent=4) logging.info("%s query responses dumped to %s", len(results), results_fp)
def wrapper(*args: Any, **kwargs: Any) -> Any: """Wrap function for 'Dict'. Args: args: Arguments to pass into the wrapper. kwargs: Arguments to pass into the wrapper. Returns: addict.Dict: Instance `Dict` object. """ lists = [] return_value = func(*args, **kwargs) if isinstance(return_value, list): for value in return_value: if isinstance(value, dict): lists.append(Addict(value)) else: lists.append(value) return lists elif isinstance(return_value, dict): return Addict(return_value) else: return return_value
def _paginate_thumb_feed( self, feed_name: str, feed_kwargs: dict, media_path: Iterator[str]) -> Iterator[InstagramPostThumb]: has_next_page = True end_cursor = None while has_next_page: r = getattr(self._web_api_client, feed_name)(**feed_kwargs, end_cursor=end_cursor) media = Addict(r) for p in media_path: media = media[p] has_next_page = media.page_info.has_next_page end_cursor = media.page_info.end_cursor for edge in media.edges: yield self._node_to_post_thumb(edge.node)
def _get_page_of_images_in_album(flickr, album_id, page, acc, output=False): album_info = Addict( flickr.photosets.getPhotos( photoset_id=album_id, page=page, extras="url_m,date_taken,geo", )).photoset if output: logger.info( f"Processing album '{album_info.title}' with {album_info.total} " "photos...") acc.extend(album_info.photo) # return album for data about it return album_info
def _node_to_post_thumb(cls, data: dict) -> InstagramPostThumb: data = Addict(data) caption = _get_caption(data) return InstagramPostThumb( post_num_id=data.id, owner_num_id=_to_int(data.owner.id), caption=caption, shortcode=data.shortcode or None, comment_count=_to_int(data.edge_media_to_comment.count), like_count=_to_int(data.edge_media_preview_like.count), created_at=_timestamp_to_datetime(data.taken_at_timestamp), img_height=_to_int(data.dimensions.height), img_width=_to_int(data.dimensions.width), img_url=data.display_url or None, is_video=_to_bool(data.is_video), hashtags=_get_hashtags(caption), mentions=_get_mentions(caption), )
def __init__(self, columns_map): self.columns_map = columns_map self.type2encoder = { "numerical": (MinMaxScaler, { "feature_range": (-1, 1) }), "categorical": (LabelEncoderUnseen, {}), "cyclical": (CyclicEncoder, {}), } if "joined_encoders" in self.columns_map: self.joined_encoders = self.columns_map["joined_encoders"] del columns_map["joined_encoders"] else: self.joined_encoders = {} if set(self.type2encoder.keys()).intersection( columns_map.keys()) != set(columns_map.keys()): raise ValueError( f"Wrong column names in columns_map {columns_map}.") self.category_types = set(self.columns_map.keys()) self.column2type = self.get_columns2type() self.column_classes = Addict() for category_type in self.category_types: for column in columns_map[category_type]: self.column_classes[column] = Column( column, category_type=category_type) for column_name, column_names in self.joined_encoders.items(): category_type = self.column2type[column_name] for subcolumn_name in column_names: self.column_classes[subcolumn_name] = Column( subcolumn_name, category_type=category_type) self.numerical_columns = self._get_columns("numerical") self.categorical_columns = self._get_columns("categorical") self.cyclical_columns = self._get_columns("cyclical")
def main(config_file): conf = Addict(yaml.safe_load(open(config_file, 'r'))) if conf.get("logging") is not None: logging.config.dictConfig(conf["logging"]) else: logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") API_KEY = conf.get("API").get("KEY") base_url = conf.get("API").get("URL") # place_types = ["supermarkets", "convenience_store", "department_store", "store", "grocery"] place_types = ["supermarkets", "grocery"] locations = [] for place in place_types: logging.info("Searching for %s in Penang", place) query = "query=" + place + "+in+Penang" url = base_url + query + "&key=" + API_KEY logging.info("url of API query: %s", url) response = requests.get(url) results = json.loads(response.text).get("results") logging.info("%s results are found.", len(results)) for result in results: location = {} location["name"] = result.get("name") location["addr"] = result.get("formatted_address") geocode = result.get("geometry").get("location") location["lat"] = geocode.get("lat") location["lng"] = geocode.get("lng") location["type"] = place locations.append(location) places_df = pd.DataFrame(locations) places_df = places_df.drop_duplicates(subset=["lat", "lng"]) places_df = places_df.reset_index() output_fp = conf.get("output").get("filename") places_df.to_csv(output_fp, index=False) logging.info("%s supermarkets are located in the city", places_df.shape[0]) logging.info("Information of existing supermarkets written to %s", output_fp)
def _user_info(self, u: Union[str, int]) -> InstagramUser: """ :param u: Prefer username (fewer gets) :return: """ # todo: username <-> user_id bidict cache if not isinstance(u, str) or u.isdigit(): # input is a user_id, and we have to user_id = _to_int(u) # todo: potential problem if user has no posts, how can we get username? user_id = _to_int(u) first_thumb = self.user_feed(user_id).limit(1).to_list()[0] first_post = self._post_info(first_thumb.shortcode) u = first_post.owner_username username = u d = Addict(self._web_api_client.user_info2(user_name=username)) return InstagramUser( biography=d.biography or None, website=d.website or None, followed_by_count=_to_int(d.counts.followed_by), follows_count=_to_int(d.counts.follows), full_name=d.full_name or None, user_id=_to_int(d.id), is_business_account=_to_bool(d.is_business_account), is_joined_recently=_to_bool(d.is_joined_recently), is_private=_to_bool(d.is_private), is_verified=_to_bool(d.is_verified), profile_pic_url=d.profile_pic_url or None, username=d.username or None, connected_fb_page=d.connected_fb_page or None, media_count=_to_int(d.counts.media), )
flickr = auth_flickr() # search_result = Addict( # flickr.photos.search( # user_id="me", # tags="andonnee", # tag_mode="all", # min_uploaded_date="2021-10-25 00:00:00", # per_page=500, # extras="tags,machine_tags", # ) # ) album = Addict( flickr.photosets.getPhotos( photoset_id="72157720209505213", per_page=500, )) start_id = "51730055105" end_id = "51730863735" is_process = False # TODO replace wiht walk https://stuvel.eu/flickrapi-doc/7-util.html for photo in album.photoset.photo: if photo.id == end_id: break if photo.id == start_id: is_process = True
from addict import Dict as Addict from api_auth import auth_flickr flickr = auth_flickr() taken_date = "2020-11-14" search_result = Addict( flickr.photos.search( user_id="me", min_taken_date=f"{taken_date} 00:00:00", max_taken_date=f"{taken_date} 23:59:59", per_page=5, )) print(f"Searching {taken_date}...") # TODO replace wiht walk https://stuvel.eu/flickrapi-doc/7-util.html for photo in search_result.photos.photo: photo = Addict(flickr.photos.getInfo(photo_id=photo.id)).photo title = photo.title._content owner = photo.owner.nsid id_ = photo.id url = f"https://flickr.com/photos/{owner}/{id_}" print(f"{url} {title}")
def gpx2flickr( ctx, gpx_filepath, flickr_album, delta, delta_tz, tolerance, is_clear, kml_output_path, kml_thumbnail_size, is_update_images, api_key, api_secret, ): """Add location information to Flickr images based on a GPX file""" try: logger.info("Parsing time shift...") delta = process_delta(delta) if delta_tz: delta_tz = process_delta([delta_tz]) delta_total = delta + delta_tz else: delta_tz = None delta_total = delta # tz delta no different from delta for flickr (update time not supported) # so do not print intermediate time deltas like for images print_delta(delta, "Time") tolerance = process_tolerance(tolerance) gpx_segments = process_gpx(gpx_filepath) token_cache_location = click.get_app_dir(DEFAULT_APP_DIR) logger.info("Logging in to Flickr...") flickr = create_flickr_api(api_key, api_secret, token_cache_location=token_cache_location) user = Addict(flickr.urls.lookupUser(url=flickr_album.url)).user logger.info("Synching Flickr Geo tags to GPX...") if not is_update_images: logger.warning( "The images will not be updated with the positions!") positions = synch_gps_flickr( flickr, user, flickr_album, gpx_segments, delta_total, tolerance, is_clear, is_update_images, ctx.obj["DEBUG"], ) def image_src(x): return x.url_m def image_name(x): return create_photopage_url(x, user) process_kml(positions, kml_output_path, kml_thumbnail_size, image_src, image_name) except Exception as ex: logger.error("*** An unrecoverable error occured ***") lf = logger.error if not ctx.obj["DEBUG"] else logger.exception lf(str(ex)) sys.exit(1)
def main(config_file): conf = Addict(yaml.safe_load(open(config_file, 'r'))) if conf.get("logging") is not None: logging.config.dictConfig(conf["logging"]) else: logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") start_time = time.time() logging.info("Part I Load city grid layer") grid_fp = conf.get("input").get("grid_file") grid_df = pd.read_csv(grid_fp) grid_df["id"] = grid_df["id"].apply(lambda grid_id: str(grid_id)) grid_df = grid_df.set_index("id") grid_df = grid_df.dropna() logging.info("Converting UTM coordinate system to geocode ...") inProj = pyproj.Proj(init='epsg:3857') outProj = pyproj.Proj(init='epsg:4326') grid_df[["left_lng", "top_lat"]] = grid_df.apply(lambda row: convert_utm_coords(row[["left", "top"]], inProj, outProj), axis=1) grid_df[["right_lng", "bottom_lat"]] = grid_df.apply(lambda row: convert_utm_coords(row[["right", "bottom"]], inProj, outProj), axis=1) grid_df["center_lng"] = (grid_df["left_lng"] + grid_df["right_lng"]) / 2 grid_df["center_lat"] = (grid_df["top_lat"] + grid_df["bottom_lat"]) / 2 logging.info("Write grid center geocode to file") grid_geocode_df = grid_df[["center_lng", "center_lat"]] grid_geocode_file = conf.get("output").get("grid_geocode_file") grid_geocode_df.to_csv(grid_geocode_file, index=True) logging.info("Elapsed time %s seconds ...", round(time.time() - start_time, 4)) logging.info("Part II Assign residential buildings to grids") grid_dict = grid_df.to_dict("index") buildings_fp = conf.get("input").get("residential_buildings_file") buildings_df = pd.read_csv(buildings_fp) logging.info("Range of longitude: %s - %s", buildings_df["center_lng"].min(), buildings_df["center_lng"].max()) logging.info("Range of latitude: %s - %s", buildings_df["center_lat"].min(), buildings_df["center_lat"].max()) buildings_df["grid"] = buildings_df.apply(lambda row: assign_grid(row[["center_lng", "center_lat"]], grid_dict), axis=1) buildings_df = buildings_df.set_index("id") logging.info("Elapsed time: %s seconds ...", round(time.time() - start_time, 4)) logging.info("Part III Compute gridwise total floor area") logging.info("Residential building types: %s", buildings_df["type"].unique()) buildings_df[["area", "area_bungalow"]] = buildings_df.apply(lambda row: check_bungalow(row["type"], row["area"]), axis=1) area_df = buildings_df.groupby(['grid'])['area', 'area_bungalow'].agg('sum') area_df = pd.merge(area_df, grid_df, left_index=True, right_index=True) area_df = area_df.drop(["left", "right", "top", "bottom", "left_lng", "top_lat", "right_lng", "bottom_lat", "center_lng", "center_lat"], axis=1) area_df = area_df.reset_index() logging.info("Shape of area_df: %s", area_df.shape) logging.info(area_df.head()) logging.info("Elapsed time: %s seconds ...", round(time.time() - start_time, 4)) logging.info("Part IV Distribute city population into grids") district_df = area_df.groupby(["district"])['area', 'area_bungalow'].agg('sum') district_df["total_population"] = conf.get("district_population") district_df["bungalow_population"] = district_df["total_population"] / 100 * 5 district_df["apartment_population"] = district_df["total_population"] - district_df["bungalow_population"] district_df = district_df.reset_index() logging.info(district_df) population_df = pd.merge(area_df, district_df[["district", "area", "apartment_population"]], on='district') population_df = population_df.rename(columns={ "index": "grid_id", "area_x": "area", "area_bungalow_x": "area_bungalow", "area_y": "area_apartment_district", "apartment_population": "apartment_population_district" }) population_df["population"] = population_df["apartment_population_district"] / population_df["area_apartment_district"] * population_df["area"] + \ population_df["area_bungalow"] / 100 * 5 population_df["grid_id"] = population_df["grid_id"].apply(lambda grid_id: int(grid_id)) logging.info("Shape of population_df: %s", population_df.shape) logging.info(population_df.head()) logging.info("Part V Incorporate grid population with shape file") grid_shape = conf.get("input").get("grid_shape_file") sf = shp.Reader(grid_shape) shp_df = read_shapefile(sf) logging.info("Shape of shp_df: %s", shp_df.shape) logging.info(shp_df.head()) population_shp_df = pd.merge(shp_df, population_df[["grid_id", "population"]], left_on='id', right_on='grid_id', how='outer') population_shp_df["population"].fillna(0, inplace=True) population_shp_df = population_shp_df.drop(["grid_id", "coords"], axis=1) logging.info("Export grid population to text file") grid_population_file = conf.get("output").get("grid_population_file") population_shp_df.to_csv(grid_population_file, index=False) gdf = gpd.read_file(grid_shape) gdf = gdf.to_crs({'init': 'epsg:3857'}) gdf["population"] = population_shp_df["population"] grid_population_shape_file = conf.get("output").get("grid_population_shape_file") gdf.to_file(grid_population_shape_file) logging.info("Population info added to the shape file of city grid layer")
import json from pathlib import Path import webbrowser from addict import Dict as Addict import flickrapi with open("api_key.json") as f: flickr_key = Addict(json.load(f)) api_key = flickr_key.key api_secret = flickr_key.secret def auth_flickr(): flickr = flickrapi.FlickrAPI( api_key, api_secret, format="parsed-json", token_cache_location=Path("./.flickr").resolve(), ) if not flickr.token_valid(perms="write"): flickr.get_request_token(oauth_callback="oob") authorize_url = flickr.auth_url(perms="write") webbrowser.open_new_tab(authorize_url) verifier = input("Verifier code: ") flickr.get_access_token(verifier) return flickr
from api_auth import auth_flickr from flickr_utils import get_photos def make_flickr_photo_url(photo, user_id): if photo.pathalias: user_path = photo.pathalias else: user_path = user_id return f"https://www.flickr.com/photos/{user_path}/{photo.id}" flickr = auth_flickr() user = Addict(flickr.auth.oauth.checkToken()) album_id = "72157719125798869" folder_path = ( "/Users/guilhem/Pictures/camera/___uploaded/20210508_foretlanfon_ok/autocorrect2" ) file_pattern = "DSCF*.JPG" photos = list( get_photos(flickr, album_id, extras="date_taken,url_o,path_alias")) flickr_time_index = {} for photo in photos: date_taken = photo.datetaken flickr_time_index[date_taken] = photo
from addict import Dict as Addict import dateutil from dateutil.relativedelta import relativedelta from api_auth import auth_flickr flickr = auth_flickr() search_result = Addict( flickr.photos.search( user_id="me", tags="summer,ete,2020", tag_mode="all", min_taken_date="2019-01-16 00:00:00", max_taken_date="2019-01-17 00:00:00", extras="date_taken", per_page=500, )) for photo in search_result.photos.photo: taken = photo.datetaken print(f"Original: {taken}") taken_dt = dateutil.parser.isoparse(taken) delta = relativedelta(years=1, months=8, hours=-8) taken_dt_corrected = taken_dt + delta taken_corrected = taken_dt_corrected.strftime("%Y-%m-%d %H:%M:%S") print(f"Corrected: {taken_corrected}") resp = flickr.photos.setDates(photo_id=photo.id, date_taken=taken_corrected)
def fit(self, df: pd.DataFrame) -> None: for column in self.categorical_columns: if df[column].dtype != "object": raise TypeError( f"We can process only string columns as categorical. " f"We got {df[column].dtype} for {column}. " f"Please cast 'str' on the column.") self.columns = [x for x in df.columns if x in self.column2type] self.encoders = Addict() for column_name, subcolumn_names in self.joined_encoders.items(): if column_name in self.encoders: raise ValueError( f"We should not have same column in two joined columns! " f"But we got it for {column_name}") category_type = self.column2type[column_name] encoder_class, parameters = self.type2encoder[category_type] encoder = encoder_class(**parameters) x = [df[column_name].values] if category_type == "categorical": for subcolumn_name in subcolumn_names: x += [df[subcolumn_name].values] x = np.concatenate(x) encoder.fit(x) self.encoders[column_name] = encoder for subcolumn_name in subcolumn_names: if subcolumn_name in self.encoders: raise ValueError( f"We should not have same subcolumn in two joined columns! " f"But we got it for {subcolumn_name}") self.encoders[subcolumn_name] = encoder for category_type, column_names in self.columns_map.items(): encoder_class, parameters = self.type2encoder[category_type] for column_name in column_names: if column_name in self.encoders: continue if category_type == "cyclical": parameters["amplitude"] = column_name[1] column = column_name[0] else: column = column_name encoder = encoder_class(**parameters) x = df[column].values encoder.fit(x) self.encoders[column] = encoder
def main(config_file): conf = Addict(yaml.safe_load(open(config_file, 'r'))) if conf.get("logging") is not None: logging.config.dictConfig(conf["logging"]) else: logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") raw_file = conf.get("output").get("grid_to_supermarket_dist_raw") dist_data = [] with open(raw_file, encoding='utf-8') as f: for dist_raw in json.loads(f.read()): dist_obj = {} dist_obj["grid_id"] = dist_raw["grid_id"] dist_obj["supermarket_id"] = dist_raw["supermarket_id"] dist_obj["status"] = dist_raw["status"] if dist_raw.get("rows"): row = dist_raw.get("rows")[0] if row.get("elements"): element = row.get("elements")[0] if element.get("distance"): dist_obj["distance"] = element.get("distance").get("value") else: dist_obj["distance"] = None if element.get("duration"): dist_obj["driving_time"] = element.get("duration").get("value") else: dist_obj["driving_time"] = None else: dist_obj["distance"] = None dist_obj["driving_time"] = None dist_data.append(dist_obj) dist_df = pd.DataFrame(dist_data) output_file = conf.get("output").get("grid_to_supermarket_dist_data") dist_df.to_csv(output_file, index=False) logging.info("%s distance query results written to %s", len(dist_data), output_file) supermarket_counts = {} max_driving_time = int(conf.get("max_driving_time")) for grid_id in dist_df["grid_id"].unique(): supermarket_counts[grid_id] = catch_supermarkets(grid_id, dist_df, max_driving_time) population_file = conf.get("input").get("grid_population_file") logging.info("Loading simulated population of city grids from %s", population_file) population_df = pd.read_csv(population_file) population_df["density"] = population_df.apply(lambda pop: \ compute_density(pop["id"], pop["population"], supermarket_counts), axis=1) density_df = population_df[["id", "density"]] grid_shape = conf.get("input").get("grid_shape_file") sf = shp.Reader(grid_shape) shp_df = read_shapefile(sf) logging.info("Shape of shp_df: %s", shp_df.shape) logging.info(shp_df.head()) density_shp_df = pd.merge(shp_df, density_df, left_on='id', right_on='id', how='outer') density_shp_df = density_shp_df.drop("coords", axis=1) logging.info("Export supermarket density to text file") supermarket_density_file = conf.get("output").get("supermarket_density_file") density_shp_df.to_csv(supermarket_density_file, index=False) logging.info(density_shp_df.head()) gdf = gpd.read_file(grid_shape) gdf = gdf.to_crs({'init': 'epsg:3857'}) gdf["density"] = density_shp_df["density"] supermarket_density_shape_file = conf.get("output").get("supermarket_density_shape_file") gdf.to_file(supermarket_density_shape_file) logging.info("Supermarket density added to the shape file of city grid layer")