def run_scrape(year=None, order_number=None): END_YEAR = 2016 if year is None: START_YEAR = 1990 else: START_YEAR = year if order_number is None: START_NUM = 1 else: START_NUM = order_number for year in range( START_YEAR, END_YEAR ) : END_NUM = limits[year] + 1 for ORDER_NUMBER in range( START_NUM, END_NUM ): ORDER_ID = str(year) + appendzeros( ORDER_NUMBER ) print ORDER_ID try: scrape_order( ORDER_ID ) except Exception as e: with open('scrape.log', 'a') as scrapelog: err_str = str(year) + "-"+ str(ORDER_NUMBER) + " : " + e.message + str( datetime.datetime.now() ) scrapelog.write( err_str+u'\n' ) START_NUM = 1
if max_id is not None: max_id = max_id['max'] return max_id SCRAPE_RANGE = True YEAR_INT = datetime.datetime.utcnow().year old_max = get_max_id_this_year() ### NEW YEAR CASE - case where old_max is of last years if old_max == None: # try to get 0001 for this year; min_id = str(YEAR_INT) + "0001" scrape_order(min_id) new_max = get_max_id_this_year() if new_max == None: SCRAPE_RANGE = False else: old_max = new_max if SCRAPE_RANGE == True: # get 9999 for year max_id = str(YEAR_INT) + "9999" scrape_order(max_id) new_max = get_max_id_this_year() target_list = range( old_max+1, new_max ) # hit all in the middle