Ejemplo n.º 1
0
def scrape_reports_and_decisions(since=None, page=None, everything=False,
                                 coder_type=settings.GEOCODER):
    logger = scrape_reports_and_decisions.get_logger()

    if coder_type == "google":
        geocoder = gmaps.GoogleGeocoder(settings.GOOGLE_API_KEY)
        geocoder.bounds = settings.GEO_BOUNDS
        geocoder.region = settings.GEO_REGION
    else:
        geocoder = arcgis.ArcGISCoder(settings.ARCGIS_CLIENT_ID,
                                      settings.ARCGIS_CLIENT_SECRET)

    if page is not None:
        proposals_json = scrape.get_proposals_for_page(page, geocoder)
    else:
        if not since:
            # If there was no last run, the scraper will fetch all
            # proposals.
            since = last_run()
        proposals_json = scrape.get_proposals_since(dt=since, geocoder=geocoder)

    proposals = []

    for p_dict in proposals_json:
        p = create_proposal_from_json(p_dict)

        if p:
            p.save()
            proposals.append(p)
        else:
            logger.error("Could not create proposal from dictionary:",
                         p_dict)
Ejemplo n.º 2
0
 def setUp(self):
     self.region = "Somerville, MA"
     self.addresses = [
         "240 Elm Street", "100 Broadway", "115 Medford Street"
     ]
     self.geocoder = gmaps.GoogleGeocoder(settings.GOOGLE_API_KEY)
     self.proposal_importer_url = "https://58kr1azj04.execute-api.us-east-1.amazonaws.com/prod/somervillema"
Ejemplo n.º 3
0
def fetch_proposals(since=None,
                    coder_type=settings.GEOCODER,
                    importers=Importers):
    """
    Task that scrapes the reports and decisions page
    """
    if coder_type == "google":
        geocoder = gmaps.GoogleGeocoder(settings.GOOGLE_API_KEY)
        geocoder.bounds = settings.GEO_BOUNDS
    else:
        geocoder = arcgis.ArcGISCoder(settings.ARCGIS_CLIENT_ID,
                                      settings.ARCGIS_CLIENT_SECRET)

    # TODO: If `since` is not provided explicitly, we should probably determine
    # the appropriate date on a per-importer basis.
    if since:
        since = datetime.fromtimestamp(since)
    else:
        latest_proposal = Proposal.objects.latest()
        if latest_proposal:
            since = latest_proposal.updated

        if not since:
            # If there is no record of a previous run, fetch
            # proposals posted since the previous Monday.
            now = datetime.now().replace(hour=0,
                                         minute=0,
                                         second=0,
                                         microsecond=0)
            since = now - timedelta(days=7 + now.weekday())

    proposals_json = []
    for importer in importers:
        importer_name = type(importer).__name__
        try:
            found = list(importer.updated_since(since, geocoder))
        except Exception as err:
            task_logger.warning("Error in importer: %s - %s", importer_name,
                                err)
            continue

        task_logger.info("Fetched %i proposals from %s", len(found),
                         type(importer).__name__)
        proposals_json += found

    proposals = []

    for p_dict in proposals_json:
        try:
            (is_new, p) = Proposal.create_or_update_proposal_from_dict(p_dict)
            p.save()
            proposals.append(p)
        except Exception as exc:
            task_logger.error("Could not create proposal from dictionary: %s",
                              p_dict)
            task_logger.error("%s", exc)

    return [p.id for p in proposals]
Ejemplo n.º 4
0
def scrape_reports_and_decisions(since=None,
                                 page=None,
                                 coder_type=settings.GEOCODER):
    """
    Task that scrapes the reports and decisions page
    """
    if coder_type == "google":
        geocoder = gmaps.GoogleGeocoder(settings.GOOGLE_API_KEY)
        geocoder.bounds = settings.GEO_BOUNDS
        geocoder.region = settings.GEO_REGION
    else:
        geocoder = arcgis.ArcGISCoder(settings.ARCGIS_CLIENT_ID,
                                      settings.ARCGIS_CLIENT_SECRET)

    if page is not None:
        proposals_json = scrape.get_proposals_for_page(page, geocoder)
    else:
        if not since:
            # If there was no last run, the scraper will fetch all
            # proposals.
            since = last_run()

            if not since:
                latest_proposal = Proposal.objects.latest()
                if latest_proposal:
                    since = latest_proposal.updated

            if not since:
                # If there is no record of a previous run, fetch
                # proposals posted since the previous Monday.
                now = datetime.now().replace(hour=0,
                                             minute=0,
                                             second=0,
                                             microsecond=0)
                since = now - timedelta(days=7 + now.weekday())

        proposals_json = scrape.get_proposals_since(dt=since,
                                                    geocoder=geocoder)

    proposals = []

    for p_dict in proposals_json:
        p = create_proposal_from_json(p_dict)

        if p:
            p.save()
            proposals.append(p)
        else:
            logger.error("Could not create proposal from dictionary:", p_dict)

    return proposals
Ejemplo n.º 5
0
from django.conf import settings

from django.contrib.gis.geos import Point

from scripts import gmaps, arcgis

if settings.GEOCODER == "google":
    Geocoder = gmaps.GoogleGeocoder(settings.GOOGLE_API_KEY)
    Geocoder.bounds = settings.GEO_BOUNDS
elif settings.GEOCODER == "arcgis":
    Geocoder = arcgis.ArcGISCoder(settings.ARCGIS_CLIENT_ID,
                                  settings.ARCGIS_CLIENT_SECRET)
else:
    Geocoder = None


def as_point(geo_response):
    return Point(x=geo_response["location"]["lng"],
                 y=geo_response["location"]["lat"],
                 srid=4326)


def geocode_tuples(addrs, **kwargs):
    return [(addr, as_point(gr), gr["formatted_name"]) if gr else addr
            for addr, gr in zip(addrs, Geocoder.geocode(addrs, **kwargs))]