def augment_trip_plan(raw_trip_plan): location_latlng = raw_trip_plan.location_latlng.to_json_obj() if raw_trip_plan.location_latlng else None entities = utils.parallelize( utils.retryable(augment_entity, retries=3), [(e, location_latlng) for e in raw_trip_plan.entities]) trip_plan = raw_trip_plan.copy() for i, entity in enumerate(entities): # If there's an RPC error, some of these may come back as None. # So as a fallback make sure we at least save the incoming entity. # TODO: Return an error message here so the user can be notified # that not all entities were saved. if not entity: entities[i] = raw_trip_plan.entities[i] trip_plan.entities = entities return trip_plan
def build_scrapers(url, client_page_source=None, force_fetch_page=False, allow_expansion=True, for_guide=False): page_source_tree = html_parsing.parse_tree_from_string(client_page_source) if client_page_source else None if not page_source_tree and (url_requires_server_page_source(url) or force_fetch_page): page_source_tree = html_parsing.parse_tree(url) scraped_pages = [] for scraper_class in ALL_SCRAPERS: handleable_urls = scraper_class.handleable_urls(url, page_source_tree, allow_expansion) if handleable_urls: reqs = [html_parsing.make_request(u) for u in handleable_urls] resps = utils.parallelize(utils.retryable(urllib2.urlopen, 3), [(req,) for req in reqs]) for url, resp in zip(handleable_urls, resps): if not resp: print "Failed to fetch url: %s" % url continue tree = etree.parse(resp, html_parsing.htmlparser()) scraper = scraper_class(url, tree, for_guide) scraped_pages.append(scraper) break return scraped_pages