def scrape_reports_and_decisions(since=None, page=None, everything=False, coder_type=settings.GEOCODER): logger = scrape_reports_and_decisions.get_logger() if coder_type == "google": geocoder = gmaps.GoogleGeocoder(settings.GOOGLE_API_KEY) geocoder.bounds = settings.GEO_BOUNDS geocoder.region = settings.GEO_REGION else: geocoder = arcgis.ArcGISCoder(settings.ARCGIS_CLIENT_ID, settings.ARCGIS_CLIENT_SECRET) if page is not None: proposals_json = scrape.get_proposals_for_page(page, geocoder) else: if not since: # If there was no last run, the scraper will fetch all # proposals. since = last_run() proposals_json = scrape.get_proposals_since(dt=since, geocoder=geocoder) proposals = [] for p_dict in proposals_json: p = create_proposal_from_json(p_dict) if p: p.save() proposals.append(p) else: logger.error("Could not create proposal from dictionary:", p_dict)
def fetch_proposals(since=None, coder_type=settings.GEOCODER, importers=Importers): """ Task that scrapes the reports and decisions page """ if coder_type == "google": geocoder = gmaps.GoogleGeocoder(settings.GOOGLE_API_KEY) geocoder.bounds = settings.GEO_BOUNDS else: geocoder = arcgis.ArcGISCoder(settings.ARCGIS_CLIENT_ID, settings.ARCGIS_CLIENT_SECRET) # TODO: If `since` is not provided explicitly, we should probably determine # the appropriate date on a per-importer basis. if since: since = datetime.fromtimestamp(since) else: latest_proposal = Proposal.objects.latest() if latest_proposal: since = latest_proposal.updated if not since: # If there is no record of a previous run, fetch # proposals posted since the previous Monday. now = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) since = now - timedelta(days=7 + now.weekday()) proposals_json = [] for importer in importers: importer_name = type(importer).__name__ try: found = list(importer.updated_since(since, geocoder)) except Exception as err: task_logger.warning("Error in importer: %s - %s", importer_name, err) continue task_logger.info("Fetched %i proposals from %s", len(found), type(importer).__name__) proposals_json += found proposals = [] for p_dict in proposals_json: try: (is_new, p) = Proposal.create_or_update_proposal_from_dict(p_dict) p.save() proposals.append(p) except Exception as exc: task_logger.error("Could not create proposal from dictionary: %s", p_dict) task_logger.error("%s", exc) return [p.id for p in proposals]
def scrape_reports_and_decisions(since=None, page=None, coder_type=settings.GEOCODER): """ Task that scrapes the reports and decisions page """ if coder_type == "google": geocoder = gmaps.GoogleGeocoder(settings.GOOGLE_API_KEY) geocoder.bounds = settings.GEO_BOUNDS geocoder.region = settings.GEO_REGION else: geocoder = arcgis.ArcGISCoder(settings.ARCGIS_CLIENT_ID, settings.ARCGIS_CLIENT_SECRET) if page is not None: proposals_json = scrape.get_proposals_for_page(page, geocoder) else: if not since: # If there was no last run, the scraper will fetch all # proposals. since = last_run() if not since: latest_proposal = Proposal.objects.latest() if latest_proposal: since = latest_proposal.updated if not since: # If there is no record of a previous run, fetch # proposals posted since the previous Monday. now = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) since = now - timedelta(days=7 + now.weekday()) proposals_json = scrape.get_proposals_since(dt=since, geocoder=geocoder) proposals = [] for p_dict in proposals_json: p = create_proposal_from_json(p_dict) if p: p.save() proposals.append(p) else: logger.error("Could not create proposal from dictionary:", p_dict) return proposals
from django.conf import settings from django.contrib.gis.geos import Point from scripts import gmaps, arcgis if settings.GEOCODER == "google": Geocoder = gmaps.GoogleGeocoder(settings.GOOGLE_API_KEY) Geocoder.bounds = settings.GEO_BOUNDS elif settings.GEOCODER == "arcgis": Geocoder = arcgis.ArcGISCoder(settings.ARCGIS_CLIENT_ID, settings.ARCGIS_CLIENT_SECRET) else: Geocoder = None def as_point(geo_response): return Point(x=geo_response["location"]["lng"], y=geo_response["location"]["lat"], srid=4326) def geocode_tuples(addrs, **kwargs): return [(addr, as_point(gr), gr["formatted_name"]) if gr else addr for addr, gr in zip(addrs, Geocoder.geocode(addrs, **kwargs))]