コード例 #1
0
ファイル: scrape_all.py プロジェクト: structur/open-cabinet
def run_scrape(year=None, order_number=None):

    END_YEAR = 2016
    if year is None:
        START_YEAR = 1990
    else:
        START_YEAR = year

    if order_number is None:
        START_NUM = 1
    else:
        START_NUM = order_number

    for year in range( START_YEAR, END_YEAR ) :
        END_NUM = limits[year] + 1

        for ORDER_NUMBER in range( START_NUM, END_NUM ):

            ORDER_ID = str(year) + appendzeros( ORDER_NUMBER )
            print ORDER_ID

            try:
                scrape_order( ORDER_ID )

            except Exception as e:
                with open('scrape.log', 'a') as scrapelog:
                    err_str = str(year) + "-"+ str(ORDER_NUMBER) + " : " + e.message + str( datetime.datetime.now() )
                    scrapelog.write( err_str+u'\n' ) 

        START_NUM = 1
コード例 #2
0
ファイル: dailyscrape.py プロジェクト: sfprime/sfpopenorders
    if max_id is not None:
        max_id = max_id['max']
    return max_id



SCRAPE_RANGE = True
YEAR_INT = datetime.datetime.utcnow().year

old_max = get_max_id_this_year()

### NEW YEAR CASE - case where old_max is of last years
if old_max == None:
    # try to get 0001 for this year; 
    min_id = str(YEAR_INT) + "0001"
    scrape_order(min_id)
    new_max = get_max_id_this_year()
    if new_max == None:
        SCRAPE_RANGE = False
    else:
        old_max = new_max



if SCRAPE_RANGE == True:
    # get 9999 for year
    max_id = str(YEAR_INT) + "9999"
    scrape_order(max_id)
    new_max = get_max_id_this_year()
    target_list = range( old_max+1, new_max ) 
    # hit all in the middle
コード例 #3
0
    if max_id is not None:
        max_id = max_id['max']
    return max_id



SCRAPE_RANGE = True
YEAR_INT = datetime.datetime.utcnow().year

old_max = get_max_id_this_year()

### NEW YEAR CASE - case where old_max is of last years
if old_max == None:
    # try to get 0001 for this year; 
    min_id = str(YEAR_INT) + "0001"
    scrape_order(min_id)
    new_max = get_max_id_this_year()
    if new_max == None:
        SCRAPE_RANGE = False
    else:
        old_max = new_max



if SCRAPE_RANGE == True:
    # get 9999 for year
    max_id = str(YEAR_INT) + "9999"
    scrape_order(max_id)
    new_max = get_max_id_this_year()
    target_list = range( old_max+1, new_max ) 
    # hit all in the middle