def bom_get_historic(station_code: str, obs_type: ObservationTypes) -> None: params = BOM_DIRECTORY_PARAMS.copy() params["p_stn_num"] = station_code params["p_nccObsCode"] = obs_type.value url = urljoin(BOM_BASE_URL, urlencode(params)) r = http.get(BOM_BASE_URL, params=urlencode(params)) if not r.ok: logger.error("Could not fetch url: {}".format(url)) dc = _parse_directory(r.content) # Get observation directory_codes_fetched = [] # if year not in dc.keys(): # raise Exception("Could not find year {} for station {}".format(year, station_code)) for directory_code in dc.values(): if directory_code in directory_codes_fetched: continue params = BOM_RESOURCE_PARAMS.copy() params["p_stn_num"] = station_code params["p_c"] = directory_code r = http.get(BOM_BASE_URL, params=urlencode(params), headers=headers) if not r.ok: raise Exception("Url error in getting observation file") content = _unzip_content(r.content).decode("utf-8") if "Weather Data temporarily unavailable" in content: directory_codes_fetched.append(directory_code) logger.error("Could not get {}?{}".format(BOM_BASE_URL, urlencode(params))) continue file_name = "bom_{}_{}_{}.txt".format( station_code, obs_type.value, directory_code.lstrip("-") ) with open(OUPUT_DIRECTORY / file_name, "w") as fh: fh.write(content) logger.info("Wrote file: {}".format(file_name)) directory_codes_fetched.append(directory_code)
def get_aemo_wem_live_facility_intervals_recent_date() -> datetime: req = http.get(LIVE_FACILITIES, headers=REQ_HEADERS) if req.status_code != 200: logger.error( "WEM live facility intervals returning non 200: {} {}".format( LIVE_FACILITIES, req.status_code)) csv_content = req.content csvreader = csv.DictReader(csv_content.decode("utf-8").split("\n")) if not csvreader.fieldnames or len(csvreader.fieldnames) < 1: logger.error( "WEM live facility intervals returning bad CSV: {}".format( LIVE_FACILITIES)) records = unit_scada_generate_facility_scada( records=csvreader, interval_field="PERIOD", facility_code_field="FACILITY_CODE", power_field="ACTUAL_MW", network=NetworkWEM, ) max_date = max([i["trading_interval"] for i in records]) return max_date
def get_aemo_wem_live_facility_intervals_recent_date() -> datetime: """Returns the latest interval date from the WEM live feed. Used in monitors.""" req = http.get(LIVE_FACILITIES, headers=REQ_HEADERS) if req.status_code != 200: logger.error( "WEM live facility intervals returning non 200: {} {}".format( LIVE_FACILITIES, req.status_code)) csv_content = req.content csvreader = csv.DictReader(csv_content.decode("utf-8").split("\n")) if not csvreader.fieldnames or len(csvreader.fieldnames) < 1: logger.error( "WEM live facility intervals returning bad CSV: {}".format( LIVE_FACILITIES)) records = unit_scada_generate_facility_scada( records=csvreader, interval_field="PERIOD", facility_code_field="FACILITY_CODE", power_field="ACTUAL_MW", network=NetworkWEM, ) trading_intervals = [i["trading_interval"] for i in records] if not trading_intervals: raise Exception("Error parsing AEMO WEM live facility intervals") max_date = max(trading_intervals) return max_date
def fetch_au_cpi() -> List[AUCpiData]: """Gets australian CPI figures and parses into JSON""" r = http.get(AU_CPI_URL) if not r.ok: raise Exception("Problem grabbing CPI source: {}".format( r.status_code)) wb = xlrd.open_workbook(file_contents=r.content) if "Data" not in wb.sheet_names(): raise Exception("No Data sheet in CPI workbook") wb_data = wb.sheet_by_index(0) records = [] for i in range(11, wb_data.nrows): row = wb_data.row_values(i, 0, 2) cpi_record = None # skip empty values if not row[1]: continue try: cpi_record = AUCpiData(quarter_date=row[0], cpi_value=row[1]) except ValidationError as e: logger.info("Invalid CPI data: {}".format(e)) continue if cpi_record: records.append(cpi_record) return records
def write_photo_to_s3(file_path: str, data: Any, overwrite: bool = False) -> int: """Writes a photo blob to the S3 photo bucket. Will by default not overwrite any existing paths""" # @TODO move this to aws.py s3_save_path = os.path.join(settings.photos_bucket_path, file_path) write_count = 0 http_save_path = bucket_to_website(s3_save_path) # check if it already exits # @TODO check hashes if not overwrite: r = http.get(http_save_path) if r.ok: return len(r.content) s3_client = boto3.client("s3") # with open( # s3_save_path, # "wb", # transport_params=dict(multipart_upload_kwargs=UPLOAD_ARGS), # ) as fh: # write_count = fh.write(data) # s3_client.upload_file(s3_save_path) logger.info("Wrote {} to {}".format(len(data), s3_save_path)) return write_count
def write_photo_to_s3(file_path: str, data, overwrite: bool = False) -> int: # @TODO move this to aws.py s3_save_path = os.path.join(S3_EXPORT_DEFAULT_BUCKET, file_path) write_count = 0 http_save_path = bucket_to_website(s3_save_path) # check if it already exits # @TODO check hashes if not overwrite: r = http.get(http_save_path) if r.ok: return len(r.content) with open( s3_save_path, "wb", transport_params=dict(multipart_upload_kwargs=UPLOAD_ARGS), ) as fh: write_count = fh.write(data) logger.info("Wrote {} to {}".format(len(data), s3_save_path)) return write_count
def get_dataset( stat_type: StatType, network_region: str, bucket_size: str = "daily", year: Optional[int] = None, testing: bool = True, ) -> OpennemDataSetV2: req_url = get_v2_url(stat_type, network_region, bucket_size, year, testing=testing) r = http.get(req_url) logger.debug("Loading: {}".format(req_url)) if not r.ok: raise Exception("Could not parse URL: {}".format(req_url)) json_data = r.json() statset = load_statset_v2(json_data) statset = statset_patch(statset, bucket_size=bucket_size) return statset
def get_osm_way(way_id: str) -> Dict: """ Returns the xml payload from the osm api""" way_url = get_osm_way_url(way_id) way_resp = http.get(way_url) if not way_resp.ok: logger.error("No way") raise Exception("Could not get way: {}".format(way_resp.status_code)) way_resp_content = way_resp.text geojson_response = osm2geojson.xml2geojson(way_resp_content, filter_used_refs=False, log_level="INFO") if not isinstance(geojson_response, dict): raise Exception("Did not get a valid server response from OSM API") if "type" not in geojson_response: raise Exception("Did not get a valid server response from OSM API") if geojson_response["type"] != "FeatureCollection": raise Exception("Did not get a valid FeatureCollection from OSM API") if "features" not in geojson_response: raise Exception("GeoJSON has no features") return geojson_response
def get_jobs() -> Dict[str, Any]: job_url = urljoin( settings.scrapyd_url, "listjobs.json?project={}".format(settings.scrapyd_project_name), ) jobs = http.get(job_url).json() return jobs
def get_image_from_web(image_url: str) -> Image: """ Gets an image from an URL """ img = None try: img = Image.open(http.get(image_url, stream=True).raw) except Exception: logger.error("Error parsing: %s", image_url) return None return img
def check_opennem_interval_delays(network_code: str) -> bool: """Runs periodically and alerts if there is a current delay in output of power intervals""" network = network_from_network_code(network_code) env = "" if settings.debug: env = ".dev" url = f"https://data{env}.opennem.org.au/v3/stats/au/{network.code}/power/7d.json" resp = http.get(url) if resp.status_code != 200 or not resp.ok: logger.error("Error retrieving: {}".format(url)) return False resp_json = resp.json() if "data" not in resp_json: logger.error("Error retrieving wem power: malformed response") return False data = resp_json["data"] fueltech_data = data.pop() history_end_date = fueltech_data["history"]["last"] history_date = parse_date(history_end_date, dayfirst=False) if not history_date: logger.error( "Could not read history date for opennem interval monitor") return False now_date = datetime.now().astimezone( network.get_timezone()) # type: ignore time_delta = chop_delta_microseconds(now_date - history_date) - timedelta( minutes=network.interval_size) logger.debug("Live time: {}, delay: {}".format(history_date, time_delta)) alert_threshold = network.monitor_interval_alert_threshold or settings.monitor_interval_alert_threshold or 60 if time_delta > timedelta(minutes=alert_threshold): slack_message( f"*WARNING*: OpenNEM {network.code} interval delay on {settings.env} currently: {time_delta}.\n", tag_users=settings.monitoring_alert_slack_user, ) return True
def validate_url_map(url_map: List[DiffComparisonSet]) -> bool: success = True for us in url_map: for version in ["v2", "v3"]: req_url = getattr(us, f"url{version}") r = http.get(req_url) if not r.ok: logger.error("invalid {} url: {}".format(version, req_url)) success = False return success
def check_opennem_interval_delays(network_code: str) -> bool: network = network_from_network_code(network_code) if settings.debug: env = ".dev" url = ( f"https://data.opennem.org.au/v3/stats/au/{network.code}/power/7d.json" f"https://data{env}.opennem.org.au/v3/stats/au/{network.code}/power/7d.json" ) resp = http.get(url) if resp.status_code != 200 or not resp.ok: logger.error("Error retrieving: {}".format(url)) return False resp_json = resp.json() if "data" not in resp_json: logger.error("Error retrieving wem power: malformed response") return False data = resp_json["data"] fueltech_data = data.pop() history_end_date = fueltech_data["history"]["last"] history_date = parse_date(history_end_date, dayfirst=False) if not history_date: logger.error( "Could not read history date for opennem interval monitor") return False now_date = datetime.now().astimezone(network.get_timezone()) time_delta = chop_microseconds(now_date - history_date) logger.debug("Live time: {}, delay: {}".format(history_date, time_delta)) if time_delta > timedelta(hours=3): slack_message( "*WARNING*: OpenNEM {} interval delay on {} currently: {}\n". format(network.code, settings.env, time_delta)) return True
def _req(self, url: str, params: Dict[str, Any]) -> Dict: resp = http.get(url, params=params) print(resp.request.body) if not resp.ok: logger.error("ERROR: {} Response code: {}".format( url, resp.status_code)) _j = resp.json() if "success" in _j and _j["success"] is False: raise Exception("{}: {}".format(_j["error"]["code"], _j["error"]["description"])) return _j
def load_maps(self) -> None: for version in ["v2", "v3"]: req_url = getattr(self, f"url{version}") r = http.get(req_url) logger.debug("Loading: {}".format(req_url)) if not r.ok: logger.error("invalid {} url: {}".format(version, req_url)) statset = None if version == "v2": statset = load_statset_v2(r.json()) self.v2 = statset else: statset = load_statset(r.json()) self.v3 = statset
def _fallback_download_handler(url: str) -> bytes: """ This was previously a fallback download handler but the name is redundant as it's now the primary and takes precedence over the legacy downloader """ r = http.get(url) if not r.ok: raise Exception("Bad link returned {}: {}".format(r.status_code, url)) content = BytesIO(r.content) file_mime = mime_from_content(content) if not file_mime: file_mime = mime_from_url(url) # @TODO handle all this in utils/archive.py # and make it all generic to handle other # mime types if file_mime == "application/zip": with ZipFile(content) as zf: if len(zf.namelist()) == 1: return zf.open(zf.namelist()[0]).read() c = [] stream_count = 0 for filename in zf.namelist(): if filename.endswith(".zip"): c.append(_handle_zip(zf.open(filename), "r")) stream_count += 1 else: c.append(zf.open(filename)) return chain_streams(c).read() return content.getvalue()
def load_url_map(url_map: List[DiffComparisonSet]) -> List[DiffComparisonSet]: for us in url_map: for version in ["v2", "v3"]: req_url = getattr(us, f"url{version}") r = http.get(req_url) if not r.ok: logger.error("invalid {} url: {}".format(version, req_url)) statset = None if version == "v2": statset = load_statset_v2(r.json()) us.v2 = statset else: statset = load_statset(r.json()) us.v3 = statset return url_map
def url_downloader(url: str) -> bytes: """Downloads a URL and returns content, handling embedded zips and other MIME's""" logger.debug("Downloading: {}".format(url)) r = http.get(url, verify=settings.http_verify_ssl) if not r.ok: raise Exception("Bad link returned {}: {}".format(r.status_code, url)) content = BytesIO(r.content) file_mime = mime_from_content(content) if not file_mime: file_mime = mime_from_url(url) # @TODO handle all this in utils/archive.py # and make it all generic to handle other # mime types if file_mime == "application/zip": with ZipFile(content) as zf: if len(zf.namelist()) == 1: return zf.open(zf.namelist()[0]).read() c = [] stream_count = 0 for filename in zf.namelist(): if filename.endswith(".zip"): c.append(_handle_zip(zf.open(filename), "r")) stream_count += 1 else: c.append(zf.open(filename)) return chain_streams(c).read() return content.getvalue()
def _get(self, endpoint: str, params: Optional[Dict] = None) -> Union[Dict, List]: """ Perform an GET request to an endpoint optionally with parameters for querystring. :param endpoint: Endpoint path :type endpoint: str :param params: Query string parameters, defaults to None :type params: Optional[Dict], optional :raises Exception: [description] :return: JSON response :rtype: Union[Dict, List] """ url = self._get_endpoint(endpoint) resp = http.get(url, params=params) logger.debug("GET [%d] %s", resp.status_code, resp.url) if not resp.ok: raise Exception("Error from API: {}".format(resp.status_code)) return resp.json()