Beispiel #1
0
def initialize(clear=True):
    if clear:
        clear_all_sources()
    curr_time = datetime.datetime.now()
    result1 = populate_static_menus()
    result2 = populate_static_directory()
    result3 = populate_static_filmseries()
    # really only need to update the directory here since
    # menus will be updated right after.
    # These static items really shouldn't fail...
    status = {"menus": None, "directory": None, "film_series": None}
    if result1:
        status["menus"] = curr_time
    if result2:
        status["directory"] = curr_time
    if result3:
        status["film_series"] = curr_time
    print status
    db.update_status(status)
    scrape_all_sources(continuous=True)
Beispiel #2
0
def scrape_all_sources(continuous=True):
    """
    Calls all of the scraping methods from all 
    of the scraping sources imported above.
    TODO: Multi-threading
    """
    proceed = True
    while proceed:
        print "SCRAPER: Scraping all sources"
        print "SCRAPER: Scraping Wesleying"
        result1 = scrape_wesleying()
        print "SCRAPER: Scraping Wesleyan Events"
        result2 = scrape_wesleyan_events()
        events_time = datetime.datetime.now()
        print "SCRAPER: Scraping Usdan Menus"
        result3 = scrape_usdan_menus()
        menus_time = datetime.datetime.now()

        # status: if None, failed to update, will be
        # noted as an offline API until it works. Otherwise,
        # last updated time will update to time given as value.
        status = {"events": None, "menus": None}
        if result1 and result2:
            status["events"] = events_time
        if result3:
            status["menus"] = menus_time
        print status
        db.update_status(status)

        if not result1 and result2 and result3:
            print "SCRAPER: ERROR, UNABLE TO SCRAPE ALL SOURCES"
            continue

        # TODO: Update status db
        print "SCRAPER: Successfully scraped all sources at:", datetime.datetime.today()
        if not continuous:
            proceed = False
        else:
            print "SCRAPER: Waiting..."
            time.sleep(SLEEP_TIME)