def import_file(filepath: Path, namespace: Optional[str] = None) -> None: content = None with open(filepath, mode="r") as fh: # with mmap.mmap(fh.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj: content = fh.read() logger.info(f"Reading file: {filepath}") if namespace: ts = parse_aemo_mms_csv(content, namespace_filter=[namespace]) else: ts = parse_aemo_mms_csv(content) logger.debug("Loaded {} tables".format(len(ts.table_names))) for table in ts.tables: if namespace and table.namespace != namespace: continue logger.debug("Storing table: {} {}".format(table.namespace, table.full_name)) try: store_mms_table(table) except Exception as e: logger.error("Could not store for table: {}: {}".format( table.full_name, e)) raise e
def process_item(self, item, spider): if "link" not in item: return item url = item["link"] fh = None content = None _, file_extension = os.path.splitext(url) try: _bytes_obj = _fallback_download_handler(url) content = decode_bytes(_bytes_obj) except Exception as e: logger.error(e) if content: item["content"] = content item["extension"] = file_extension return item try: logger.info("Grabbing: {}".format(url)) fh = open(url) except RequestException: logger.info("Bad link: {}".format(url)) except Exception as e: logger.error("Error: {}".format(e)) if fh: content = fh.read() item["content"] = content item["extension"] = file_extension return item
def bom_get_historic(station_code: str, obs_type: ObservationTypes) -> None: params = BOM_DIRECTORY_PARAMS.copy() params["p_stn_num"] = station_code params["p_nccObsCode"] = obs_type.value url = urljoin(BOM_BASE_URL, urlencode(params)) r = http.get(BOM_BASE_URL, params=urlencode(params)) if not r.ok: logger.error("Could not fetch url: {}".format(url)) dc = _parse_directory(r.content) # Get observation directory_codes_fetched = [] # if year not in dc.keys(): # raise Exception("Could not find year {} for station {}".format(year, station_code)) for directory_code in dc.values(): if directory_code in directory_codes_fetched: continue params = BOM_RESOURCE_PARAMS.copy() params["p_stn_num"] = station_code params["p_c"] = directory_code r = http.get(BOM_BASE_URL, params=urlencode(params), headers=headers) if not r.ok: raise Exception("Url error in getting observation file") content = _unzip_content(r.content).decode("utf-8") if "Weather Data temporarily unavailable" in content: directory_codes_fetched.append(directory_code) logger.error("Could not get {}?{}".format(BOM_BASE_URL, urlencode(params))) continue file_name = "bom_{}_{}_{}.txt".format( station_code, obs_type.value, directory_code.lstrip("-") ) with open(OUPUT_DIRECTORY / file_name, "w") as fh: fh.write(content) logger.info("Wrote file: {}".format(file_name)) directory_codes_fetched.append(directory_code)
def process_item(self, item, spider): if not "link" in item: return item url = item["link"] fh = None content = None _, file_extension = os.path.splitext(url) try: fh = open(url) except RequestException as e: logger.error("Bad link: {}".format(url)) except Exception as e: logger.error("Error: {}".format(e)) content = fh.read() item["content"] = content item["extension"] = file_extension return item