Beispiel #1
0
    def start_requests(self) -> Generator[scrapy.Request, None, None]:
        start_month = datetime(2020, 1, 1)
        end_month = datetime(2009, 7, 1)

        for date in month_series(start_month, end_month):
            for table in self.tables:
                url_params = {
                    "month": get_date_component("%m", dt=date),
                    "year": get_date_component("%Y", dt=date),
                    "table": table.upper(),
                }

                req_url = MMS_URL.format(**url_params)

                yield scrapy.Request(req_url)
Beispiel #2
0
class NemXLSSpider(scrapy.Spider):

    start_url: Optional[str] = None

    url_params = {
        "day": get_date_component("%d"),
        "month": get_date_component("%m"),
        "year": get_date_component("%Y"),
    }

    def start_requests(self) -> Generator[scrapy.Request, None, None]:
        if not self.start_url:
            return None

        request_url = self.start_url.format(**self.url_params)

        yield scrapy.Request(request_url)

    def parse(self,
              response: Response) -> Generator[Dict[str, Any], None, None]:
        yield {"content": response.text}
Beispiel #3
0
class WemCurrentSpider(scrapy.Spider):

    start_url = None

    url_params = {
        "day": get_date_component("%d"),
        "month": get_date_component("%m"),
        "year": get_date_component("%Y"),
    }

    def start_requests(self):

        cache_bust = datetime.now().strftime("%Y%M%d%H%M%S%f")

        if self.start_url:
            request_url = self.start_url.format(**self.url_params)
            request_url += "?{}".format(cache_bust)

            yield scrapy.Request(request_url)

    def parse(self, response) -> Generator[Dict, None, None]:
        yield {"content": response.text}
Beispiel #4
0
def get_apvi_rooftop_data(
        day: Optional[datetime] = None) -> Optional[APVIForecastSet]:
    """Obtains and parses APVI forecast data"""

    if not day:
        day = get_today_opennem()

    day_string = get_date_component(format_str=APVI_DATE_QUERY_FORMAT, dt=day)

    apvi_endpoint_url = get_apvi_uri(today=False)

    logger.info("Getting APVI data for day {} from {}".format(
        day_string, apvi_endpoint_url))

    _resp = _apvi_request_session.post(apvi_endpoint_url,
                                       data={"day": day_string})

    if not _resp.ok:
        logger.error("Invalid APVI Return: {}".format(_resp.status_code))
        return None

    _resp_json = None

    try:
        _resp_json = _resp.json()
    except JSONDecodeError as e:
        logger.error("Error decoding APVI response: {}".format(e))
        return None

    _required_keys = ["postcode", "postcodeCapacity", "installations"]

    for _req_key in _required_keys:
        if _req_key not in _resp_json:
            logger.error(f"Invalid APVI response: {_req_key} field not found")
            return None

    postcode_gen = _resp_json["postcode"]
    postcode_capacity = _resp_json["postcodeCapacity"]
    installations = _resp_json["installations"]

    # brisbane has no DST so its effectively NEM time
    _run_at = get_today_opennem()
    _interval_records = []

    for record in postcode_gen:
        for state, prefix in STATE_POSTCODE_PREFIXES.items():

            generated = sum([
                float(v) / 100 * postcode_capacity[k]
                for k, v in record.items() if k.startswith(prefix) and v
                and k in postcode_capacity and k[:2] not in WA_NON_SWIS
            ])

            if not generated:
                continue

            _interval_records.append(
                APVIForecastInterval(
                    **{
                        "network_id": "APVI",
                        "trading_interval": record["ts"],
                        "state": state,
                        "generated": generated,
                    }))

    _state_capacities = {}

    # Calcualte state capacities
    for postcode_prefix, capacity_val in postcode_capacity.items():
        for state, prefix in STATE_POSTCODE_PREFIXES.items():
            if state not in _state_capacities:
                _state_capacities[state] = 0

            if postcode_prefix.startswith(prefix):
                _state_capacities[state] += capacity_val

    # derive state capacity models
    _state_capacity_models = []

    for state, state_capacity in _state_capacities.items():
        capacity_registered = state_capacity

        if state.lower() in installations:
            unit_number = installations[state.lower()]

        _state_capacity_models.append(
            APVIStateRooftopCapacity(state=state,
                                     capacity_registered=capacity_registered,
                                     unit_number=unit_number))

    apvi_server_latest: Optional[datetime] = None

    trading_intervals = list(
        set([i.trading_interval for i in _interval_records]))

    if trading_intervals:
        apvi_server_latest = max(trading_intervals)

    apvi_forecast_set = APVIForecastSet(crawled=_run_at,
                                        intervals=_interval_records,
                                        capacities=_state_capacity_models)

    try:
        apvi_forecast_set.server_latest = apvi_server_latest
    except ValidationError:
        logger.error("APVI validation error for server_latest: {} <{}>".format(
            apvi_server_latest, repr(apvi_server_latest)))

    return apvi_forecast_set