Ejemplo n.º 1
0
def _crawl_dig():
    the_url = 'http://61.60.124.185/tpctempdig/InfoAllList.asp'
    start_timestamp = 946684800
    end_timestamp = util.get_timestamp() + 86400 * 366

    start_datetime = util.timestamp_to_datetime(start_timestamp)
    end_datetime = util.timestamp_to_datetime(end_timestamp)

    params = {
        'sortflag': '',
        'sorttype': '',
        'TargetLB': '',
        'qry2': 1,
        'startyear': start_datetime.year,
        'startmonth': start_datetime.month,
        'startday': start_datetime.day,
        'endyear': end_datetime.year,
        'endmonth': end_datetime.month,
        'endday': end_datetime.day,
    }

    http_data = util.http_multipost({the_url: params})
    #cfg.logger.debug('http_data: %s', http_data)
    (latest_timestamp, dig_data) = _parse_dig(http_data[the_url])

    [_put_to_db(each_data) for each_data in dig_data]

    util.save_cache('cron_new_taipei_city_latest_dig', {'latest_timestamp': latest_timestamp})
Ejemplo n.º 2
0
def _crawl_dig(last_dig):
    the_url = 'http://61.60.124.185/tpctempdig/InfoAllList.asp'
    last_timestamp = last_dig.get('latest_timestamp', 10000)
    start_timestamp = last_timestamp - 86400
    end_timestamp = last_timestamp + 86400

    (start_year, start_month, start_day) = util.timestamp_to_datetime(start_timestamp)
    (end_year, end_month, end_day) = util.timestamp_to_datetime(end_timestamp)

    params = {
        'sortflag': '',
        'sorttype': '',
        'TargetLB': '',
        'qry2': 1,
        'startyear': start_year,
        'startmonth': start_month,
        'endyear': end_year,
        'endmonth': end_month,
        'endday': end_day
    }

    http_data = util.http_multipost({the_url: params})
    #cfg.logger.debug('http_data: %s', http_data)
    (latest_timestamp, dig_data) = _parse_dig(http_data[the_url])

    [_put_to_db(each_data) for each_data in dig_data]

    util.save_cache('cron_new_taipei_city_latest_dig', {'latest_timestamp': latest_timestamp})
Ejemplo n.º 3
0
def _crawl_dig_point(first_dig_point):
    count_fail = 0
    latest_dig_point = first_dig_point
    offset_dig_point = first_dig_point
    while True:
        end_dig_point = offset_dig_point + N_DIG_POINT
        dig_points = range(offset_dig_point, end_dig_point)
        offset_dig_point += N_DIG_POINT

        (the_urls, results) = _get_http_results(
            dig_points,
            'http://www.road.tcg.gov.tw/ROADRCIS/GetDigPoint.ashx?AP_NO=%08d')

        (error_code, latest_dig_point) = _process_http_results(
            the_urls, results, latest_dig_point, 'taipei_city_dig_point',
            'CB_DATEpro')
        cfg.logger.debug('dig_points: %s error_code: %s latest_dig_point: %s',
                         dig_points, error_code, latest_dig_point)

        if error_code != S_OK:
            count_fail += 1

        if count_fail >= N_COUNT_FAIL_DIG_POINT:
            break

        util.save_cache('cron_taipei_city_latest_dig_point', latest_dig_point)

        cfg.logger.debug('to sleep 30')
        time.sleep(30)

    return latest_dig_point
Ejemplo n.º 4
0
def _crawl_dig():
    the_url = 'http://61.60.124.185/tpctempdig/InfoAllList.asp'
    start_timestamp = 946684800
    end_timestamp = util.get_timestamp() + 86400 * 366

    start_datetime = util.timestamp_to_datetime(start_timestamp)
    end_datetime = util.timestamp_to_datetime(end_timestamp)

    params = {
        'sortflag': '',
        'sorttype': '',
        'TargetLB': '',
        'qry2': 1,
        'startyear': start_datetime.year,
        'startmonth': start_datetime.month,
        'startday': start_datetime.day,
        'endyear': end_datetime.year,
        'endmonth': end_datetime.month,
        'endday': end_datetime.day,
    }

    http_data = util.http_multipost({the_url: params})
    #cfg.logger.debug('http_data: %s', http_data)
    (latest_timestamp, dig_data) = _parse_dig(http_data[the_url])

    [_put_to_db(each_data) for each_data in dig_data]

    util.save_cache('cron_new_taipei_city_latest_dig',
                    {'latest_timestamp': latest_timestamp})
Ejemplo n.º 5
0
def _crawl_road_case(first_road_case):
    count_fail = 0
    latest_road_case = first_road_case
    offset_road_case = first_road_case
    while True:
        end_road_case = offset_road_case + N_ROAD_CASE
        road_cases = range(offset_road_case, end_road_case)
        offset_road_case += N_ROAD_CASE

        (the_urls, results) = _get_http_results(
            road_cases,
            'http://www.road.tcg.gov.tw/ROADRCIS/GetCaseGeo.ashx?CASE_ID=%04d')

        (error_code, latest_road_case) = _process_http_results(
            the_urls, results, latest_road_case, 'taipei_city_road_case',
            'WORK_DATEpro')

        cfg.logger.debug('road_cases: %s error_code: %s latest_road_case: %s',
                         road_cases, error_code, latest_road_case)

        if error_code != S_OK:
            count_fail += 1

        if count_fail >= N_COUNT_FAIL_ROAD_CASE:
            break

        util.save_cache('cron_taipei_city_latest_road_case', latest_road_case)

        cfg.logger.debug('to sleep 30')
        time.sleep(30)

    return latest_road_case
Ejemplo n.º 6
0
def _crawl_dig_point(first_dig_point):
    count_fail = 0
    latest_dig_point = first_dig_point
    offset_dig_point = first_dig_point
    while True:
        end_dig_point = offset_dig_point + N_DIG_POINT
        dig_points = range(offset_dig_point, end_dig_point)
        offset_dig_point += N_DIG_POINT

        (the_urls, results) = _get_http_results(dig_points, 'http://www.road.tcg.gov.tw/ROADRCIS/GetDigPoint.ashx?AP_NO=%08d')

        (error_code, latest_dig_point) = _process_http_results(the_urls, results, latest_dig_point, 'taipei_city_dig_point', 'CB_DATEpro')
        cfg.logger.debug('dig_points: %s error_code: %s latest_dig_point: %s', dig_points, error_code, latest_dig_point)

        if error_code != S_OK:
            count_fail += 1

        if count_fail >= N_COUNT_FAIL_DIG_POINT:
            break

        util.save_cache('cron_taipei_city_latest_dig_point',  latest_dig_point)

        cfg.logger.debug('to sleep 30')
        time.sleep(30)

    return latest_dig_point
Ejemplo n.º 7
0
def _crawl_road_case(first_road_case):
    count_fail = 0
    latest_road_case = first_road_case
    offset_road_case = first_road_case
    while True:
        end_road_case = offset_road_case + N_ROAD_CASE
        road_cases = range(offset_road_case, end_road_case)
        offset_road_case += N_ROAD_CASE

        (the_urls, results) = _get_http_results(road_cases, 'http://www.road.tcg.gov.tw/ROADRCIS/GetCaseGeo.ashx?CASE_ID=%04d')

        (error_code, latest_road_case) = _process_http_results(the_urls, results, latest_road_case, 'taipei_city_road_case', 'WORK_DATEpro')

        cfg.logger.debug('road_cases: %s error_code: %s latest_road_case: %s', road_cases, error_code, latest_road_case)

        if error_code != S_OK:
            count_fail += 1

        if count_fail >= N_COUNT_FAIL_ROAD_CASE:
            break

        util.save_cache('cron_taipei_city_latest_road_case',  latest_road_case)

        cfg.logger.debug('to sleep 30')
        time.sleep(30)

    return latest_road_case
Ejemplo n.º 8
0
def _crawl_road_case(first_road_case):
    count_fail = 0
    latest_road_case = first_road_case
    offset_road_case = first_road_case
    while True:
        end_road_case = offset_road_case + N_ROAD_CASE
        cfg.logger.debug('offset_road_case: %s end_road_case: %s', offset_road_case, end_road_case)
        road_cases = range(offset_road_case, end_road_case)
        offset_road_case += N_ROAD_CASE

        the_urls = {idx: 'http://www.road.tcg.gov.tw/ROADRCIS/GetCaseGeo.ashx?CASE_ID=%04d' % (idx) for idx in road_cases}
        results = util.http_multiget(the_urls.values())
        cfg.logger.debug('road_case: after http_multiget: results: %s', results)
        if not results:
            results = {}

        is_success = False
        for idx in road_cases:
            the_url = the_urls[idx]
            the_val = results[the_url]

            if not the_val:
                continue

            if the_val == '-1':
                continue

            if the_val == u'-1':
                continue

            is_success = True

            the_val = util.json_loads(the_val)
            cfg.logger.debug('with_data: the_url: %s the_val: %s', the_url, the_val)
            latest_road_case = idx
            _process_data(the_val, 'taipei_city_road_case', idx)

        if not is_success:
            count_fail += 1

        if count_fail >= N_COUNT_FAIL_ROAD_CASE:
            break

        util.save_cache('cron_taipei_city_latest_road_case',  latest_road_case)

    return latest_road_case
Ejemplo n.º 9
0
def _crawl_dig_point(first_dig_point):
    count_fail = 0
    latest_dig_point = first_dig_point
    offset_dig_point = first_dig_point
    while True:
        end_dig_point = offset_dig_point + N_DIG_POINT
        cfg.logger.debug('offset_dig_point: %s end_dig_point: %s', offset_dig_point, end_dig_point)
        dig_points = range(offset_dig_point, end_dig_point)
        offset_dig_point += N_DIG_POINT

        the_urls = {idx: 'http://www.road.tcg.gov.tw/ROADRCIS/GetDigPoint.ashx?AP_NO=%08d' % (idx) for idx in dig_points}
        results = util.http_multiget(the_urls.values())
        cfg.logger.debug('dig_point: after http_multiget: results: %s', results)
        if not results:
            results = {}

        is_success = False
        for idx in dig_points:
            the_url = the_urls[idx]
            the_val = results[the_url]
            if not the_val:
                continue

            if the_val == '-1':
                continue

            if the_val == u'-1':
                continue

            is_success = True

            the_val = util.json_loads(the_val)
            cfg.logger.debug('with_data: the_url: %s the_val: %s', the_url, the_val)
            latest_dig_point = idx
            _process_data(the_val, 'taipei_city_dig_point', idx)

        if not is_success:
            count_fail += 1

        if count_fail >= N_COUNT_FAIL_DIG_POINT:
            break

        util.save_cache('cron_taipei_city_latest_dig_point',  latest_dig_point)

    return latest_dig_point