Exemple #1
0
def import_file(filepath: Path, namespace: Optional[str] = None) -> None:
    content = None

    with open(filepath, mode="r") as fh:
        # with mmap.mmap(fh.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
        content = fh.read()

    logger.info(f"Reading file: {filepath}")

    if namespace:
        ts = parse_aemo_mms_csv(content, namespace_filter=[namespace])
    else:
        ts = parse_aemo_mms_csv(content)

    logger.debug("Loaded {} tables".format(len(ts.table_names)))

    for table in ts.tables:

        if namespace and table.namespace != namespace:
            continue

        logger.debug("Storing table: {} {}".format(table.namespace,
                                                   table.full_name))

        try:
            store_mms_table(table)
        except Exception as e:
            logger.error("Could not store for table: {}: {}".format(
                table.full_name, e))
            raise e
Exemple #2
0
    def process_item(self, item, spider):
        if "link" not in item:
            return item

        url = item["link"]
        fh = None
        content = None
        _, file_extension = os.path.splitext(url)

        try:
            _bytes_obj = _fallback_download_handler(url)
            content = decode_bytes(_bytes_obj)
        except Exception as e:
            logger.error(e)

        if content:
            item["content"] = content
            item["extension"] = file_extension
            return item

        try:
            logger.info("Grabbing: {}".format(url))
            fh = open(url)
        except RequestException:
            logger.info("Bad link: {}".format(url))
        except Exception as e:
            logger.error("Error: {}".format(e))

        if fh:
            content = fh.read()

            item["content"] = content
            item["extension"] = file_extension
            return item
Exemple #3
0
def bom_get_historic(station_code: str, obs_type: ObservationTypes) -> None:

    params = BOM_DIRECTORY_PARAMS.copy()
    params["p_stn_num"] = station_code
    params["p_nccObsCode"] = obs_type.value

    url = urljoin(BOM_BASE_URL, urlencode(params))

    r = http.get(BOM_BASE_URL, params=urlencode(params))

    if not r.ok:
        logger.error("Could not fetch url: {}".format(url))

    dc = _parse_directory(r.content)

    # Get observation

    directory_codes_fetched = []

    # if year not in dc.keys():
    # raise Exception("Could not find year {} for station {}".format(year, station_code))

    for directory_code in dc.values():

        if directory_code in directory_codes_fetched:
            continue

        params = BOM_RESOURCE_PARAMS.copy()
        params["p_stn_num"] = station_code
        params["p_c"] = directory_code

        r = http.get(BOM_BASE_URL, params=urlencode(params), headers=headers)

        if not r.ok:
            raise Exception("Url error in getting observation file")

        content = _unzip_content(r.content).decode("utf-8")

        if "Weather Data temporarily unavailable" in content:
            directory_codes_fetched.append(directory_code)
            logger.error("Could not get {}?{}".format(BOM_BASE_URL, urlencode(params)))
            continue

        file_name = "bom_{}_{}_{}.txt".format(
            station_code, obs_type.value, directory_code.lstrip("-")
        )

        with open(OUPUT_DIRECTORY / file_name, "w") as fh:
            fh.write(content)

        logger.info("Wrote file: {}".format(file_name))
        directory_codes_fetched.append(directory_code)
Exemple #4
0
    def process_item(self, item, spider):
        if not "link" in item:
            return item

        url = item["link"]
        fh = None
        content = None
        _, file_extension = os.path.splitext(url)

        try:
            fh = open(url)
        except RequestException as e:
            logger.error("Bad link: {}".format(url))
        except Exception as e:
            logger.error("Error: {}".format(e))

        content = fh.read()

        item["content"] = content
        item["extension"] = file_extension

        return item