def fill_loop_by_room(config, survey_id): """ Master routine for looping over rooms (after a search) to fill in the properties. """ room_count = 0 while room_count < config.FILL_MAX_ROOM_COUNT: try: if not config.HTTP_PROXY_LIST: logging.info( "No proxies left: re-initialize after %s seconds", config.RE_INIT_SLEEP_TIME) time.sleep(config.RE_INIT_SLEEP_TIME) # be nice config = ABConfig() room_count += 1 listing = db_get_room_to_fill(config, survey_id) if listing is None: return None else: if listing.ws_get_room_info(config.FLAGS_ADD): pass else: # Airbnb now seems to return nothing if a room has gone listing.save_as_deleted() except AttributeError: logging.error("Attribute error: marking room as deleted.") listing.save_as_deleted() except Exception as e: logging.error("Error in fill_loop_by_room: %s", str(type(e))) raise
def main(): ab_config = ABConfig() parser = \ argparse.ArgumentParser( description="Create a spreadsheet of surveys from a city") parser.add_argument('-c', '--city', metavar='city', action='store', help="""set the city""") parser.add_argument('-p', '--project', metavar='project', action='store', default="public", help="""the project determines the table or view: public for room, gis for listing_city, default public""") parser.add_argument('-f', '--format', metavar='format', action='store', default="xlsx", help="""output format (xlsx or csv), default xlsx""") parser.add_argument('-s', '--summary', action='store_true', default=False, help="create a summary spreadsheet instead of raw data") args = parser.parse_args() if args.city: if args.summary: export_city_summary(ab_config, args.city, args.project.lower()) else: export_city_data(ab_config, args.city, args.project.lower(), args.format) else: parser.print_help()
def main(): (parser, args) = parse_args() logging.basicConfig(format='%(levelname)-8s%(message)s') ab_config = ABConfig(args) try: if args.search: survey = ABSurveyByNeighborhood(ab_config, args.search) survey.search(ab_config.FLAGS_ADD) elif args.search_by_neighborhood: survey = ABSurveyByNeighborhood(ab_config, args.search_by_neighborhood) survey.search(ab_config.FLAGS_ADD) elif args.search_by_zipcode: survey = ABSurveyByZipcode(ab_config, args.search_by_zipcode) survey.search(ab_config.FLAGS_ADD) elif args.search_by_bounding_box: survey = ABSurveyByBoundingBox(ab_config, args.search_by_bounding_box) survey.search(ab_config.FLAGS_ADD) elif args.fill is not None: fill_loop_by_room(ab_config, args.fill) elif args.addsearcharea: ws_get_city_info(ab_config, args.addsearcharea, ab_config.FLAGS_ADD) elif args.addsurvey: db_add_survey(ab_config, args.addsurvey) elif args.dbping: db_ping(ab_config) elif args.displayhost: display_host(ab_config, args.displayhost) elif args.displayroom: display_room(ab_config, args.displayroom) elif args.listsearcharea: list_search_area_info(ab_config, args.listsearcharea) elif args.listroom: listing = ABListing(ab_config, args.listroom, None) listing.print_from_db() elif args.listsurveys: list_surveys(ab_config) elif args.printsearcharea: ws_get_city_info(ab_config, args.printsearcharea, ab_config.FLAGS_PRINT) elif args.printroom: listing = ABListing(ab_config, args.printroom, None) listing.ws_get_room_info(ab_config.FLAGS_PRINT) elif args.printsearch: survey = ABSurveyByNeighborhood(ab_config, args.printsearch) survey.search(ab_config.FLAGS_PRINT) elif args.printsearch_by_neighborhood: survey = ABSurveyByNeighborhood(ab_config, args.printsearch_by_neighborhood) survey.search(ab_config.FLAGS_PRINT) elif args.printsearch_by_bounding_box: survey = ABSurveyByBoundingBox(ab_config, args.printsearch_by_bounding_box) survey.search(ab_config.FLAGS_PRINT) elif args.printsearch_by_zipcode: survey = ABSurveyByZipcode(ab_config, args.printsearch_by_zipcode) survey.search(ab_config.FLAGS_PRINT) else: parser.print_help() except (SystemExit, KeyboardInterrupt): sys.exit() except Exception: logging.exception("Top level exception handler: quitting.") sys.exit(0)
def main(): ab_config = ABConfig() survey_list = surveys(ab_config) city_views = cities(ab_config, survey_list) logging.debug(city_views) s3_dir = "s3_files" survey_counts = write_csv_files(ab_config, survey_list, city_views, s3_dir) zip_csv_files(city_views, s3_dir) upload_files(city_views, survey_list, s3_dir) write_html_file(survey_list, city_views, survey_counts)
def main(): parser = \ argparse.ArgumentParser( description="Create a spreadsheet of surveys from a city") parser.add_argument("-cfg", "--config_file", metavar="config_file", action="store", default=None, help="""explicitly set configuration file, instead of using the default <username>.config""") parser.add_argument('-c', '--city', metavar='city', action='store', help="""set the city""") parser.add_argument( '-p', '--project', metavar='project', action='store', default="public", help="""the project determines the table or view: public for room, gis for listing_city, default public""") parser.add_argument('-f', '--format', metavar='format', action='store', default="xlsx", help="""output format (xlsx or csv), default xlsx""") parser.add_argument( '-s', '--summary', action='store_true', default=False, help="create a summary spreadsheet instead of raw data") parser.add_argument( '-sd', '--start_date', metavar="start_date", action='store', default=DEFAULT_START_DATE, help="create a summary spreadsheet instead of raw data") args = parser.parse_args() ab_config = ABConfig(args) if args.city: if args.summary: export_city_summary(ab_config, args.city, args.project.lower(), args.start_date) else: export_city_data(ab_config, args.city, args.project.lower(), args.format, args.start_date) else: parser.print_help()
def main(): """ Controlling routine that calls the others """ config = ABConfig() parser = argparse.ArgumentParser(description='reverse geocode') # usage='%(prog)s [options]') # These arguments should be more carefully constructed. Right now there is # no defining what is required, and what is optional, and what contradicts # what. parser.add_argument("--sa", metavar="search_area", type=str, help="""search_area""") parser.add_argument("--lat", metavar="lat", type=float, help="""lat""") parser.add_argument("--lng", metavar="lng", type=float, help="""lng""") parser.add_argument("--bb_n_lat", metavar="bb_n_lat", type=float, help="""bb_n_lat""") parser.add_argument("--bb_s_lat", metavar="bb_s_lat", type=float, help="""bb_s_lat""") parser.add_argument("--bb_e_lng", metavar="bb_e_lng", type=float, help="""bb_e_lng""") parser.add_argument("--bb_w_lng", metavar="bb_w_lng", type=float, help="""bb_w_lng""") parser.add_argument("--count", metavar="count", type=int, help="""number_of_lookups""") args = parser.parse_args() search_area = args.sa if args.count: count = args.count else: count = 1000 if search_area: # bb = BoundingBox.from_db(config, search_area) # print(bb.bb_s_lat, bb.bb_n_lat, bb.bb_w_lng, bb.bb_e_lng) bounding_box = BoundingBox.from_google(config, search_area) logger.info("Bounding box for %s = (%s, %s, %s, %s)", search_area, bounding_box.bb_s_lat, bounding_box.bb_n_lat, bounding_box.bb_w_lng, bounding_box.bb_e_lng) if args.bb_n_lat: bounding_box = BoundingBox.from_args(config, args) if not count: sys.exit(0) for lookup in range(1, count): location = select_lat_lng(config, bounding_box) if location is None: logger.info("No more locations") sys.exit(0) location = reverse_geocode(config, location) logger.debug( "nbhd={}, subloc={}, loc={}, l2={}, l1={}, country={}.".format( location.neighborhood, location.sublocality, location.locality, location.level2, location.level1, location.country)) success = update_location(config, location) if success: logger.info("Update succeeded: %s, %s: %s of %s", location.lat_round, location.lng_round, lookup, count) else: logger.warn("Update failed: %s, %s: %s of %s", location.lat_round, location.lng_round, lookup, count)
def ws_request(config, url, params=None): """ Individual web request: returns a response object """ try: # wait sleep_time = config.REQUEST_SLEEP * random.random() logger.debug("sleeping " + str(sleep_time)[:7] + " seconds...") time.sleep(sleep_time) # be nice timeout = config.HTTP_TIMEOUT # If a list of user agent strings is supplied, use it if len(config.USER_AGENT_LIST) > 0: user_agent = random.choice(config.USER_AGENT_LIST) headers = {"User-Agent": user_agent} else: headers = {'User-Agent': 'Mozilla/5.0'} # If there is a list of proxies supplied, use it http_proxy = None logger.debug("Using " + str(len(config.HTTP_PROXY_LIST)) + " proxies.") if len(config.HTTP_PROXY_LIST) > 0: http_proxy = random.choice(config.HTTP_PROXY_LIST) proxies = { 'http': http_proxy, 'https': http_proxy, } logger.debug("Requesting page through proxy " + http_proxy) else: proxies = None # Now make the request response = requests.get(url, params, timeout=timeout, headers=headers, proxies=proxies) if response.status_code == 503: if http_proxy: logger.warning("503 error for proxy " + http_proxy) if random.choice([True, False]): logger.warning( "Removing {http_proxy} from proxy list. {n} proxies remaining" .format(http_proxy=http_proxy, n=len(config.HTTP_PROXY_LIST))) config.HTTP_PROXY_LIST.remove(http_proxy) if len(config.HTTP_PROXY_LIST) == 0: # fill proxy list again, wait a long time, then restart logger.error("No proxies in list. Re-initializing.") time.sleep(config.RE_INIT_SLEEP_TIME) config = ABConfig() else: logger.error( "Quitting on 503 error (no proxies left or not using proxies)" ) sys.exit() return response except (SystemExit, KeyboardInterrupt): raise except requests.exceptions.ConnectionError: # For requests error and exceptions, see # http://docs.python-requests.org/en/latest/user/quickstart/ # errors-and-exceptions logger.error("Network problem: ConnectionError") if random.choice([True, False]): if http_proxy is None or len(config.HTTP_PROXY_LIST) < 1: # fill the proxy list again, and wait a long time, then restart logger.error("No proxies left in the list. Re-initializing.") time.sleep(config.RE_INIT_SLEEP_TIME) # be nice else: # remove the proxy from the proxy list logger.warning("Removing " + http_proxy + " from proxy list.") config.HTTP_PROXY_LIST.remove(http_proxy) return None except requests.exceptions.HTTPError: logger.error("Invalid HTTP response: HTTPError") return None except requests.exceptions.Timeout: logger.error("Request timed out: Timeout") return None except requests.exceptions.TooManyRedirects: logger.error("Too many redirects: TooManyRedirects") return None except requests.exceptions.RequestException: logger.error("Unidentified Requests error: RequestException") return None except Exception as e: logger.exception("Exception type: " + type(e).__name__) return None