def main(roster_row): try: logger = get_logger(roster_row) # Get a standard logger # Here are standard variable values/how to initialize them. # These aren't initialized here since in the save_single_page # case, they can be done in the called function ########## # Begin core specific scraping code if roster_row['State'].lower() != THIS_STATE or roster_row['County'].lower() != THIS_COUNTY: raise Exception("Expected county definition info from _%s, %s_, but found info: _%s_" % (THIS_COUNTY, THIS_STATE, roster_row)) crawlers.omsweb_crawler(roster_row) # try to call a known crawler if possible # End core specific scraping code ########## #Close the browser logger.info('complete!') except Exception as errorMessage: try: browser.close() record_error(message=str(errorMessage), roster_row=roster_row, browser=browser) except: record_error(message=str(errorMessage), roster_row=roster_row) # Record error in S3 for a general error logger.error('Error: %s', errorMessage) # Log error sys.exit(1)
def main(roster_row): try: """ OLD URL: https://omsweb.public-safety-cloud.com/jtclientweb/(S(3zu0pgqfrm1j0s501pktqn15))/jailtracker/index/Burleigh_County_ND UPDATED URL: https://omsweb.public-safety-cloud.com/jtclientweb/jailtracker/index/Burleigh_County_ND""" logger = get_logger(roster_row) # Get a standard logger # Here are standard variable values/how to initialize them. # These aren't initialized here since in the save_single_page # case, they can be done in the called function #browser = get_browser() # Get a standard browser #urlAddress = roster_row['Working Link'] # Set the main URL from the spreadsheet #page_index = 0 # Set an initial value of "page_index", which we will use to separate output pages #logger.info('Set working link to _%s_', urlAddress) # Log the chosen URL ########## # Begin core specific scraping code if roster_row['State'].lower( ) != THIS_STATE or roster_row['County'].lower() != THIS_COUNTY: raise Exception( "Expected county definition info from _%s, %s_, but found info: _%s_" % (THIS_COUNTY, THIS_STATE, roster_row)) crawlers.omsweb_crawler( roster_row) # try to call a known crawler if possible ## Code to save a page and log appropriately #save_to_s3(store_source, page_index, roster_row) #logger.info('Saved page _%s_', page_index) # End core specific scraping code ########## #Close the browser logger.info('complete!') except Exception as errorMessage: try: browser.close() record_error(message=str(errorMessage), roster_row=roster_row, browser=browser) except: record_error(message=str(errorMessage), roster_row=roster_row) # Record error in S3 for a general error logger.error('Error: %s', errorMessage) # Log error sys.exit(1)
def main(roster_row): try: logger = get_logger(roster_row) # Get a standard logger ########## # Begin core specific scraping code if roster_row['State'].lower() != THIS_STATE or roster_row['County'].lower() != THIS_COUNTY: raise Exception("Expected county definition info from _%s, %s_, but found info: _%s_" % (THIS_COUNTY, THIS_STATE, roster_row)) crawlers.omsweb_crawler(roster_row) # End core specific scraping code ########## #Close the browser logger.info('complete!') except Exception as errorMessage: browser.close() record_error(message=str(errorMessage), roster_row=roster_row, browser=browser) # Record error in S3 for a general error logger.error('Error: %s', errorMessage) # Log error sys.exit(1)