def initialize(clear=True): if clear: clear_all_sources() curr_time = datetime.datetime.now() result1 = populate_static_menus() result2 = populate_static_directory() result3 = populate_static_filmseries() # really only need to update the directory here since # menus will be updated right after. # These static items really shouldn't fail... status = {"menus": None, "directory": None, "film_series": None} if result1: status["menus"] = curr_time if result2: status["directory"] = curr_time if result3: status["film_series"] = curr_time print status db.update_status(status) scrape_all_sources(continuous=True)
def scrape_all_sources(continuous=True): """ Calls all of the scraping methods from all of the scraping sources imported above. TODO: Multi-threading """ proceed = True while proceed: print "SCRAPER: Scraping all sources" print "SCRAPER: Scraping Wesleying" result1 = scrape_wesleying() print "SCRAPER: Scraping Wesleyan Events" result2 = scrape_wesleyan_events() events_time = datetime.datetime.now() print "SCRAPER: Scraping Usdan Menus" result3 = scrape_usdan_menus() menus_time = datetime.datetime.now() # status: if None, failed to update, will be # noted as an offline API until it works. Otherwise, # last updated time will update to time given as value. status = {"events": None, "menus": None} if result1 and result2: status["events"] = events_time if result3: status["menus"] = menus_time print status db.update_status(status) if not result1 and result2 and result3: print "SCRAPER: ERROR, UNABLE TO SCRAPE ALL SOURCES" continue # TODO: Update status db print "SCRAPER: Successfully scraped all sources at:", datetime.datetime.today() if not continuous: proceed = False else: print "SCRAPER: Waiting..." time.sleep(SLEEP_TIME)