def _load(det_name, year, month, day, traffic_type, missing_data): # faverolles 1/18/2020: Reworked the downloading operation # No longer saves all of the "fail" files # Checks if global option to download data files is "TRUE" """ Return raw data of the detector as list """ if det_name is None: raise Exception("Detector number must be passed") dirname = str(year) + str(month).zfill(2) + str(day).zfill(2) remote_file = cfg.TRAFFIC_DATA_URL + '/' + str( year) + '/' + dirname + '/' + det_name + traffic_type.extension date = datetime.date(year, month, day) data = _read_cached_data_file(det_name, date, traffic_type) if data is not None: return _convert_to_list(data, traffic_type) if global_settings.DOWNLOAD_TRAFFIC_DATA_FILES: print(f"Downloading traffic data file [{remote_file}]") try: with http.get_url_opener(remote_file, timeout=30) as res: bin_data = res.read() data = _convert_to_list(bin_data, traffic_type) if not data: return missing_data _save_file_to_cache(det_name, date, bin_data, traffic_type) return data except Exception as e: print( f"Exception downloading traffic data(file=[{remote_file}], reason=[{str(e)}]" ) return missing_data return missing_data
def _load(device_name, year, month, day, opt, only_download=False, n_try=1): """ Return raw data of the device as list """ date = datetime.date(year, month, day) data = _cached_data(device_name, date, opt) if data != None: return _convert_to_list(data, opt) if only_download == False else None # not cached, check if failed device if _is_failed(device_name, date, opt): logging.debug("Device " + str(device_name) + " is failed device") return [-1] * (cfg.SAMPLES_PER_DAY) if only_download == False else None dirname = str(year) + str(month).zfill(2) + str(day).zfill(2) remote_file = cfg.TRAFFIC_DATA_URL + '/' + str( year) + '/' + dirname + '/' + str(device_name) + opt['ext'] try: with http.get_url_opener(remote_file) as res: binData = res.read() except url_error.HTTPError as e: logging.debug( 'Could not get the weather sensor data (sensor={}, reason={}, http_code={})' .format(device_name, str(e.reason), e.code)) if e.code == 404: open(_cache_path(_CACHE_FAIL, device_name, date, opt), 'w').close() return [-1] * (cfg.SAMPLES_PER_DAY) if only_download == False else None except url_error.URLError as e: logging.critical( 'Could not connect to weather sensor {} (reason={})'.format( device_name, str(e.reason))) return [-1] * (cfg.SAMPLES_PER_DAY) if only_download == False else None except ConnectionResetError as e: logging.critical( 'HTTP Connection has been reset. (file={}, reason={})'.format( remote_file, e.errno)) if n_try <= MAX_TRY_NUM: logging.critical('Retrying...') time.sleep(1) return _load(device_name, year, month, day, opt, only_download=only_download, n_try=n_try + 1) return [-1] * (cfg.SAMPLES_PER_DAY) if only_download == False else None _cache(device_name, date, binData, opt) if only_download: return data = _convert_to_list(binData, opt) return data
def _load_testing(det_name, year, month, day, traffic_type, only_download=False, n_try=1): def _cached_data_local(detector_name, date_val, traffic_type_val): """ :rtype: None or String """ cache_path = _cache_path(CACHE_TYPE_DET, detector_name, date_val, traffic_type_val) if not os.path.exists(cache_path): return None with open(cache_path, 'rb') as cache_file: return cache_file.read() if det_name is None: raise Exception("Detector number must be passed") missing_data = [cfg.MISSING_VALUE] * cfg.SAMPLES_PER_DAY date = datetime.date(year, month, day) cached_data = _cached_data_local(det_name, date, traffic_type) if cached_data is not None: ret_val = _convert_to_list( cached_data, traffic_type) if only_download is False else None return ret_val # not cached, check if failed detector if _is_failed(det_name, date, traffic_type): # logging.debug("Detector " + det_name + " is failed detector") return missing_data if only_download is False else None dir_name = str(year) + str(month).zfill(2) + str(day).zfill(2) remote_file = cfg.TRAFFIC_DATA_URL + '/' + str( year) + '/' + dir_name + '/' + det_name + traffic_type.extension try: if n_try > 1: logging.critical('Retrying... (n_try=%d)' % n_try) with http.get_url_opener(remote_file, timeout=30) as res: binData = res.read() data = _convert_to_list(binData, traffic_type) if not data: return missing_data _cache(det_name, date, binData, traffic_type) return data except url_error.HTTPError as e: logging.debug( 'Could not get the remote file (file={}, reason={}, http_code={})'. format(remote_file, str(e.reason), e.code)) if e.code == 404: open(_cache_path(CACHE_TYPE_FAIL, det_name, date, traffic_type), 'w').close() return missing_data if only_download is False else None except url_error.URLError as e: logging.critical( 'Could not connect to the server (file={}, reason={})'.format( remote_file, str(e.reason))) return missing_data if only_download is False else None except ConnectionResetError as e: logging.critical( 'HTTP Connection has been reset. (file={}, reason={})'.format( remote_file, e.errno)) if n_try <= MAX_TRY_NUM: time.sleep(1) return _load_testing(det_name, year, month, day, traffic_type, only_download=False, n_try=n_try + 1) else: logging.critical(' - fail to get data') return missing_data if only_download is False else None except Exception as e: logging.critical( 'Exception occured while downloading traffic data(file={}, reason={})' .format(remote_file, str(e))) return missing_data if only_download is False else None
def _load(det_name, year, month, day, traffic_type, only_download=False, n_try=1): """ Return raw data of the detector as list """ if det_name == None: raise Exception("Detector number must be passed") missing_data = [cfg.MISSING_VALUE] * cfg.SAMPLES_PER_DAY date = datetime.date(year, month, day) data = _cached_data(det_name, date, traffic_type) if data != None: return _convert_to_list( data, traffic_type) if only_download == False else None # not cached, check if failed detector if _is_failed(det_name, date, traffic_type): # logging.debug("Detector " + det_name + " is failed detector") return missing_data if only_download == False else None dirname = str(year) + str(month).zfill(2) + str(day).zfill(2) remote_file = cfg.TRAFFIC_DATA_URL + '/' + str( year) + '/' + dirname + '/' + det_name + traffic_type.extension # faverolles 1/13/2020 NOTE: DATA DOWNLOAD TEMPORARY FIX # Temporary fix to stop the server from trying to download # the traffic data files past the year specified by dataloader.py # The server config while developing is Aug 1~31 2018 but the server # continually tries to download 2019 traffic data (hundred+ GB) so # the travel time calculation never finishes if str(year) > "2018": return missing_data try: if n_try > 1: logging.critical('Retrying... (n_try=%d)' % n_try) with http.get_url_opener(remote_file, timeout=30) as res: binData = res.read() data = _convert_to_list(binData, traffic_type) if not data: return missing_data _cache(det_name, date, binData, traffic_type) return data except url_error.HTTPError as e: logging.debug( 'Could not get the remote file (file={}, reason={}, http_code={})'. format(remote_file, str(e.reason), e.code)) if e.code == 404: open(_cache_path(CACHE_TYPE_FAIL, det_name, date, traffic_type), 'w').close() return missing_data if only_download == False else None except url_error.URLError as e: logging.critical( 'Could not connect to the server (file={}, reason={})'.format( remote_file, str(e.reason))) return missing_data if only_download == False else None except ConnectionResetError as e: logging.critical( 'HTTP Connection has been reset. (file={}, reason={})'.format( remote_file, e.errno)) if n_try <= MAX_TRY_NUM: time.sleep(1) return _load(det_name, year, month, day, traffic_type, only_download=False, n_try=n_try + 1) else: logging.critical(' - fail to get data') return missing_data if only_download == False else None except Exception as e: logging.critical( 'Exception occured while downloading traffic data(file={}, reason={})' .format(remote_file, str(e))) return missing_data if only_download == False else None
def _read_from_web(page_name, group_id, site_id, sen_id, date, e_date, n_try=1): """ thread worker to get data from web page :param page_name: web page name of scanWeb site :type page_name: str :type group_id: str :type site_id: str :param sen_id: sensor id (?), required param for uri request :type sen_id: int :param date: target date :type date: datetime.datetime :param date: end date :type e_date: datetime.datetime """ remote_file = '{0}{1}?Units=English&Siteid={2}&Senid={3}&HEndDate={4}'.format( cfg.SCANWEB_URL, page_name, site_id, sen_id, '{0}%2F{1}%2F{2}'.format(e_date.month, e_date.day, e_date.year)) logging.debug('RWIS(Export) URL : %s' % remote_file) try: with http.get_url_opener(remote_file) as res: html = res.read() except url_error.HTTPError as e: logging.debug( 'Could not access html of ScanWeb site (reason={}, http_code={})'. format(str(e.reason), e.code)) logging.debug('URL : {}'.format(remote_file)) if e.code == 404: rwis_cache.fail(group_id, site_id, date, src_type=SRC_TYPE) return None except url_error.URLError as e: logging.critical( 'Could not connect to ScanWeb site (reason={})'.format( str(e.reason))) logging.debug('URL : {}'.format(remote_file)) return None except ConnectionResetError as e: logging.critical( 'HTTP Connection has been reset. (file={}, reason={})'.format( remote_file, e.errno)) if n_try <= MAX_TRY_NUM: logging.critical('Retrying...') time.sleep(1) return _read_from_web(page_name, group_id, site_id, sen_id, date, e_date, n_try=n_try + 1) return None try: return _parse(html.decode()) except TypeError as ex: logging.critical('Could not parse the exported data from ScanWeb site') tb.traceback(ex) return None
def __init__(self, extension): self.extension = extension ticas.initialize(global_settings.DATA_PATH) infra = Infra.get_infra() sdt_str = input('# Enter start date to load data (e.g. 2015-01-01) : ') edt_str = input('# Enter end date to load data (e.g. 2017-12-31) : ') days = gen_days(start_date_string=sdt_str, end_date_string=edt_str) formats = ("v30", "c30") success_count = 0 fail_count = 0 for day in days: dirname = str(day.year) + str(day.month).zfill(2) + str( day.day).zfill(2) print("Downloading traffic data for {}".format(dirname)) for detector in infra.detectors: for format in formats: remote_file = cfg.TRAFFIC_DATA_URL + '/' + str( day.year) + '/' + dirname + '/' + detector + "." + format try: with http.get_url_opener(remote_file, timeout=30) as res: binData = res.read() if binData: _save_file_to_cache(detector, day, binData, TrafficType("." + format)) success_count += 1 except Exception as e: fail_count += 1 print("Success: {}".format(success_count)) print("Fail: {}".format(fail_count))