def _crawl_dig(): the_url = 'http://61.60.124.185/tpctempdig/InfoAllList.asp' start_timestamp = 946684800 end_timestamp = util.get_timestamp() + 86400 * 366 start_datetime = util.timestamp_to_datetime(start_timestamp) end_datetime = util.timestamp_to_datetime(end_timestamp) params = { 'sortflag': '', 'sorttype': '', 'TargetLB': '', 'qry2': 1, 'startyear': start_datetime.year, 'startmonth': start_datetime.month, 'startday': start_datetime.day, 'endyear': end_datetime.year, 'endmonth': end_datetime.month, 'endday': end_datetime.day, } http_data = util.http_multipost({the_url: params}) #cfg.logger.debug('http_data: %s', http_data) (latest_timestamp, dig_data) = _parse_dig(http_data[the_url]) [_put_to_db(each_data) for each_data in dig_data] util.save_cache('cron_new_taipei_city_latest_dig', {'latest_timestamp': latest_timestamp})
def _crawl_dig(last_dig): the_url = 'http://61.60.124.185/tpctempdig/InfoAllList.asp' last_timestamp = last_dig.get('latest_timestamp', 10000) start_timestamp = last_timestamp - 86400 end_timestamp = last_timestamp + 86400 (start_year, start_month, start_day) = util.timestamp_to_datetime(start_timestamp) (end_year, end_month, end_day) = util.timestamp_to_datetime(end_timestamp) params = { 'sortflag': '', 'sorttype': '', 'TargetLB': '', 'qry2': 1, 'startyear': start_year, 'startmonth': start_month, 'endyear': end_year, 'endmonth': end_month, 'endday': end_day } http_data = util.http_multipost({the_url: params}) #cfg.logger.debug('http_data: %s', http_data) (latest_timestamp, dig_data) = _parse_dig(http_data[the_url]) [_put_to_db(each_data) for each_data in dig_data] util.save_cache('cron_new_taipei_city_latest_dig', {'latest_timestamp': latest_timestamp})
def _crawl_dig_point(first_dig_point): count_fail = 0 latest_dig_point = first_dig_point offset_dig_point = first_dig_point while True: end_dig_point = offset_dig_point + N_DIG_POINT dig_points = range(offset_dig_point, end_dig_point) offset_dig_point += N_DIG_POINT (the_urls, results) = _get_http_results( dig_points, 'http://www.road.tcg.gov.tw/ROADRCIS/GetDigPoint.ashx?AP_NO=%08d') (error_code, latest_dig_point) = _process_http_results( the_urls, results, latest_dig_point, 'taipei_city_dig_point', 'CB_DATEpro') cfg.logger.debug('dig_points: %s error_code: %s latest_dig_point: %s', dig_points, error_code, latest_dig_point) if error_code != S_OK: count_fail += 1 if count_fail >= N_COUNT_FAIL_DIG_POINT: break util.save_cache('cron_taipei_city_latest_dig_point', latest_dig_point) cfg.logger.debug('to sleep 30') time.sleep(30) return latest_dig_point
def _crawl_road_case(first_road_case): count_fail = 0 latest_road_case = first_road_case offset_road_case = first_road_case while True: end_road_case = offset_road_case + N_ROAD_CASE road_cases = range(offset_road_case, end_road_case) offset_road_case += N_ROAD_CASE (the_urls, results) = _get_http_results( road_cases, 'http://www.road.tcg.gov.tw/ROADRCIS/GetCaseGeo.ashx?CASE_ID=%04d') (error_code, latest_road_case) = _process_http_results( the_urls, results, latest_road_case, 'taipei_city_road_case', 'WORK_DATEpro') cfg.logger.debug('road_cases: %s error_code: %s latest_road_case: %s', road_cases, error_code, latest_road_case) if error_code != S_OK: count_fail += 1 if count_fail >= N_COUNT_FAIL_ROAD_CASE: break util.save_cache('cron_taipei_city_latest_road_case', latest_road_case) cfg.logger.debug('to sleep 30') time.sleep(30) return latest_road_case
def _crawl_dig_point(first_dig_point): count_fail = 0 latest_dig_point = first_dig_point offset_dig_point = first_dig_point while True: end_dig_point = offset_dig_point + N_DIG_POINT dig_points = range(offset_dig_point, end_dig_point) offset_dig_point += N_DIG_POINT (the_urls, results) = _get_http_results(dig_points, 'http://www.road.tcg.gov.tw/ROADRCIS/GetDigPoint.ashx?AP_NO=%08d') (error_code, latest_dig_point) = _process_http_results(the_urls, results, latest_dig_point, 'taipei_city_dig_point', 'CB_DATEpro') cfg.logger.debug('dig_points: %s error_code: %s latest_dig_point: %s', dig_points, error_code, latest_dig_point) if error_code != S_OK: count_fail += 1 if count_fail >= N_COUNT_FAIL_DIG_POINT: break util.save_cache('cron_taipei_city_latest_dig_point', latest_dig_point) cfg.logger.debug('to sleep 30') time.sleep(30) return latest_dig_point
def _crawl_road_case(first_road_case): count_fail = 0 latest_road_case = first_road_case offset_road_case = first_road_case while True: end_road_case = offset_road_case + N_ROAD_CASE road_cases = range(offset_road_case, end_road_case) offset_road_case += N_ROAD_CASE (the_urls, results) = _get_http_results(road_cases, 'http://www.road.tcg.gov.tw/ROADRCIS/GetCaseGeo.ashx?CASE_ID=%04d') (error_code, latest_road_case) = _process_http_results(the_urls, results, latest_road_case, 'taipei_city_road_case', 'WORK_DATEpro') cfg.logger.debug('road_cases: %s error_code: %s latest_road_case: %s', road_cases, error_code, latest_road_case) if error_code != S_OK: count_fail += 1 if count_fail >= N_COUNT_FAIL_ROAD_CASE: break util.save_cache('cron_taipei_city_latest_road_case', latest_road_case) cfg.logger.debug('to sleep 30') time.sleep(30) return latest_road_case
def _crawl_road_case(first_road_case): count_fail = 0 latest_road_case = first_road_case offset_road_case = first_road_case while True: end_road_case = offset_road_case + N_ROAD_CASE cfg.logger.debug('offset_road_case: %s end_road_case: %s', offset_road_case, end_road_case) road_cases = range(offset_road_case, end_road_case) offset_road_case += N_ROAD_CASE the_urls = {idx: 'http://www.road.tcg.gov.tw/ROADRCIS/GetCaseGeo.ashx?CASE_ID=%04d' % (idx) for idx in road_cases} results = util.http_multiget(the_urls.values()) cfg.logger.debug('road_case: after http_multiget: results: %s', results) if not results: results = {} is_success = False for idx in road_cases: the_url = the_urls[idx] the_val = results[the_url] if not the_val: continue if the_val == '-1': continue if the_val == u'-1': continue is_success = True the_val = util.json_loads(the_val) cfg.logger.debug('with_data: the_url: %s the_val: %s', the_url, the_val) latest_road_case = idx _process_data(the_val, 'taipei_city_road_case', idx) if not is_success: count_fail += 1 if count_fail >= N_COUNT_FAIL_ROAD_CASE: break util.save_cache('cron_taipei_city_latest_road_case', latest_road_case) return latest_road_case
def _crawl_dig_point(first_dig_point): count_fail = 0 latest_dig_point = first_dig_point offset_dig_point = first_dig_point while True: end_dig_point = offset_dig_point + N_DIG_POINT cfg.logger.debug('offset_dig_point: %s end_dig_point: %s', offset_dig_point, end_dig_point) dig_points = range(offset_dig_point, end_dig_point) offset_dig_point += N_DIG_POINT the_urls = {idx: 'http://www.road.tcg.gov.tw/ROADRCIS/GetDigPoint.ashx?AP_NO=%08d' % (idx) for idx in dig_points} results = util.http_multiget(the_urls.values()) cfg.logger.debug('dig_point: after http_multiget: results: %s', results) if not results: results = {} is_success = False for idx in dig_points: the_url = the_urls[idx] the_val = results[the_url] if not the_val: continue if the_val == '-1': continue if the_val == u'-1': continue is_success = True the_val = util.json_loads(the_val) cfg.logger.debug('with_data: the_url: %s the_val: %s', the_url, the_val) latest_dig_point = idx _process_data(the_val, 'taipei_city_dig_point', idx) if not is_success: count_fail += 1 if count_fail >= N_COUNT_FAIL_DIG_POINT: break util.save_cache('cron_taipei_city_latest_dig_point', latest_dig_point) return latest_dig_point