def _get_params(): server = cfg.config.get('web_server', 'http://106.187.101.193:5346') the_url = server + '/get/taipei_city_road_case_next_road_case' http_result = util.http_multiget([the_url]) next_road_case = util._int(util.json_loads(http_result.get(the_url, ''), ''), START_TAIPEI_CITY_ROAD_CASE) cfg.logger.debug('after http_multiget: http_result: %s next_road_case: %s', http_result, next_road_case) return {'next_road_case': next_road_case}
def _get_http_results(dig_points): the_urls = {dig_point: 'http://www.road.tcg.gov.tw/ROADRCIS/GetDigPoint.ashx?AP_NO=%d' % dig_point for dig_point in dig_points} http_results = util.http_multiget(the_urls.values()) if not http_results: return (S_ERR, {}) results = {dig_point: http_results.get(the_url, '') for (dig_point, the_url) in the_urls.iteritems()} return (S_OK, results)
def _get_params(): server = cfg.config.get('web_server', 'http://106.187.101.193:5346') the_url = server + '/get/taipei_city_road_case_next_road_case' http_result = util.http_multiget([the_url]) next_road_case = util._int( util.json_loads(http_result.get(the_url, ''), ''), START_TAIPEI_CITY_ROAD_CASE) cfg.logger.debug('after http_multiget: http_result: %s next_road_case: %s', http_result, next_road_case) return {'next_road_case': next_road_case}
def _crawl_road_case(first_road_case): count_fail = 0 latest_road_case = first_road_case offset_road_case = first_road_case while True: end_road_case = offset_road_case + N_ROAD_CASE cfg.logger.debug('offset_road_case: %s end_road_case: %s', offset_road_case, end_road_case) road_cases = range(offset_road_case, end_road_case) offset_road_case += N_ROAD_CASE the_urls = {idx: 'http://www.road.tcg.gov.tw/ROADRCIS/GetCaseGeo.ashx?CASE_ID=%04d' % (idx) for idx in road_cases} results = util.http_multiget(the_urls.values()) cfg.logger.debug('road_case: after http_multiget: results: %s', results) if not results: results = {} is_success = False for idx in road_cases: the_url = the_urls[idx] the_val = results[the_url] if not the_val: continue if the_val == '-1': continue if the_val == u'-1': continue is_success = True the_val = util.json_loads(the_val) cfg.logger.debug('with_data: the_url: %s the_val: %s', the_url, the_val) latest_road_case = idx _process_data(the_val, 'taipei_city_road_case', idx) if not is_success: count_fail += 1 if count_fail >= N_COUNT_FAIL_ROAD_CASE: break util.save_cache('cron_taipei_city_latest_road_case', latest_road_case) return latest_road_case
def _crawl_dig_point(first_dig_point): count_fail = 0 latest_dig_point = first_dig_point offset_dig_point = first_dig_point while True: end_dig_point = offset_dig_point + N_DIG_POINT cfg.logger.debug('offset_dig_point: %s end_dig_point: %s', offset_dig_point, end_dig_point) dig_points = range(offset_dig_point, end_dig_point) offset_dig_point += N_DIG_POINT the_urls = {idx: 'http://www.road.tcg.gov.tw/ROADRCIS/GetDigPoint.ashx?AP_NO=%08d' % (idx) for idx in dig_points} results = util.http_multiget(the_urls.values()) cfg.logger.debug('dig_point: after http_multiget: results: %s', results) if not results: results = {} is_success = False for idx in dig_points: the_url = the_urls[idx] the_val = results[the_url] if not the_val: continue if the_val == '-1': continue if the_val == u'-1': continue is_success = True the_val = util.json_loads(the_val) cfg.logger.debug('with_data: the_url: %s the_val: %s', the_url, the_val) latest_dig_point = idx _process_data(the_val, 'taipei_city_dig_point', idx) if not is_success: count_fail += 1 if count_fail >= N_COUNT_FAIL_DIG_POINT: break util.save_cache('cron_taipei_city_latest_dig_point', latest_dig_point) return latest_dig_point
def _get_http_results(road_cases): the_urls = { road_case: 'http://www.road.tcg.gov.tw/ROADRCIS/GetCaseGeo.ashx?CASE_ID=%04d' % road_case for road_case in road_cases } http_results = util.http_multiget(the_urls.values()) if not http_results: return (S_ERR, {}) results = { road_case: http_results.get(the_url, '') for (road_case, the_url) in the_urls.iteritems() } return (S_OK, results)
def _get_http_results(dig_points): the_urls = { dig_point: 'http://www.road.tcg.gov.tw/ROADRCIS/GetDigPoint.ashx?AP_NO=%d' % dig_point for dig_point in dig_points } http_results = util.http_multiget(the_urls.values()) if not http_results: return (S_ERR, {}) results = { dig_point: http_results.get(the_url, '') for (dig_point, the_url) in the_urls.iteritems() } return (S_OK, results)
def _get_params(params): if not params: server = cfg.config.get('web_server', 'http://106.187.101.193:5346') the_url = server + '/get/taipei_city_dig_point_next_dig_point' http_result = util.http_multiget([the_url]) next_dig_point = util._int(util.json_loads(http_result.get(the_url, ''), ''), START_TAIPEI_CITY_DIG_POINT) cfg.logger.debug('after http_multiget: http_result: %s next_dig_point: %s', http_result, next_dig_point) return (S_OK, {'next_dig_point': next_dig_point}) next_dig_point = params.get('next_dig_point', START_TAIPEI_CITY_DIG_POINT) year = next_dig_point // MAX_TAIPEI_CITY_DIG_POINTS_BY_YEAR the_timestamp = util.get_timestamp() tw_year = util.timestamp_to_tw_year(the_timestamp) if tw_year <= year: return (S_ERR, None) next_dig_point = (year + 1) * MAX_TAIPEI_CITY_DIG_POINTS_BY_YEAR return (S_OK, {'next_dig_point': next_dig_point})
def _get_geo(county_name, each_road): query_string = county_name.encode('utf-8') + '+' + each_road.encode('utf-8') quote_qs = urllib.quote(query_string) url = 'http://maps.googleapis.com/maps/api/geocode/json' the_url = url + '?sensor=false&address=' + quote_qs results = util.http_multiget([the_url]) cfg.logger.debug('county_name: %s each_road: %s results: %s', county_name, each_road, results) result = util.json_loads(results.get(the_url, '')) cfg.logger.debug('county_name: %s each_road: %s result: %s', county_name, each_road, result) status = result.get('status', '') if status != 'OK': cfg.logger.error('unable to retrieve geo info now') return (S_ERR, [])
def _get_params(params): if not params: server = cfg.config.get('web_server', 'http://106.187.101.193:5346') the_url = server + '/get/new_taipei_city_dig_point_next_year' http_result = util.http_multiget([the_url]) next_year = util._int(util.json_loads(http_result.get(the_url, ''), ''), START_NEW_TAIPEI_CITY_DIG_POINT_YEAR) this_year = _get_this_year() next_year = min(next_year, this_year) cfg.logger.debug('after http_multiget: http_result: %s next_year: %s', http_result, next_year) return (S_OK, {'next_year': next_year}) next_year = params.get('next_year', START_NEW_TAIPEI_CITY_DIG_POINT_YEAR) stop_year = _get_stop_year() if next_year == stop_year: return (S_ERR, {'next_year': next_year}) next_year += 1 return (S_OK, {'next_year': next_year})
def _get_http_results(idx_list, url_tmpl): the_urls = {idx: url_tmpl % (idx) for idx in idx_list} results = util.http_multiget(the_urls.values()) return (the_urls, results)
def _get_http_results(idx_list, url_tmpl): the_urls = {idx: url_tmpl % (idx) for idx in idx_list} results = util.http_multiget(the_urls.values()) return (the_urls, results)