def clean_hostname(name: typing.AnyStr) -> str: """ Converts from short to long hostname, if no domain found. """ # bytes? if not isinstance(name, (str, bytes)): cli_warning("Invalid input for hostname: {}".format(name)) name = name.lower() # Assume user is happy with domain, but strip the dot. if name.endswith("."): return name[:-1] # If a dot in name, assume long name. if '.' in name: return name # Append domain name if in config and it does not end with it if 'domain' in config and not name.endswith(config['domain']): return "{}.{}".format(name, config['domain']) return name
def in_hsts_preload(host: typing.AnyStr) -> bool: """Determines if an IDNA-encoded host is on the HSTS preload list""" if isinstance(host, str): host = host.encode("ascii") labels = host.lower().split(b".") # Fast-branch for gTLDs that are registered to preload all sub-domains. if labels[-1] in _GTLD_INCLUDE_SUBDOMAINS: return True with open_pkg_binary("hstspreload.bin") as f: for layer, label in enumerate(labels[::-1]): # None of our layers are greater than 4 deep. if layer > 3: return False # Read the jump table for the layer and label jump_info = _JUMPTABLE[layer][_crc8(label)] if jump_info is None: # No entry: host is not preloaded return False # Read the set of entries for that layer and label f.seek(jump_info[0]) data = bytearray(jump_info[1]) f.readinto(data) for is_leaf, include_subdomains, ent_label in _iter_entries(data): # We found a potential leaf if is_leaf: if ent_label == host: return True if include_subdomains and host.endswith(b"." + ent_label): return True # Continue traversing as we're not at a leaf. elif label == ent_label: break else: return False return False
def in_hsts_preload(host: typing.AnyStr) -> bool: """Determines if an IDNA-encoded host is on the HSTS preload list""" if isinstance(host, str): host = host.encode("ascii") labels = host.lower().split(b".") # Fast-branch for gTLDs that are registered to preload all sub-domains. if labels[-1] in _GTLD_INCLUDE_SUBDOMAINS: return True with open(_HSTSPRELOAD_BIN_PATH, "rb") as f: for layer, label in enumerate(labels[::-1]): # None of our layers are greater than 4 deep. if layer > 3: return False # Read the jump table for the layer and label offset, size = _get_offset_and_size(f, layer, label) if offset == 0: return False # Read the set of entries for that layer f.seek(offset, 1) data = bytearray(size) f.readinto(data) for is_leaf, include_subdomains, ent_label in _iter_entries(data): # We found a potential leaf if is_leaf: if ent_label == host: return True if include_subdomains and host.endswith(b"." + ent_label): return True # Continue traversing as we're not at a leaf. elif label == ent_label: break else: return False return False
def viz_predictions( predictions_path: typing.AnyStr, dataframe_path: typing.AnyStr, test_config_path: typing.AnyStr, ): """Displays a looping visualization of the GHI predictions saved by the evaluation script. This visualization requires OpenCV3+ ('cv2'), and will loop while refreshing a local window until the program is killed, or 'q' is pressed. The arrow keys allow the user to change which day is being shown. """ assert os.path.isfile(test_config_path) and test_config_path.endswith( ".json"), "invalid test config" with open(test_config_path, "r") as fd: test_config = json.load(fd) stations = test_config["stations"] target_datetimes = test_config["target_datetimes"] start_bound = datetime.datetime.fromisoformat(test_config["start_bound"]) end_bound = datetime.datetime.fromisoformat(test_config["end_bound"]) horiz_deltas = [ pd.Timedelta(d).to_pytimedelta() for d in test_config["target_time_offsets"] ] assert os.path.isfile( predictions_path), f"invalid preds file path: {predictions_path}" with open(predictions_path, "r") as fd: predictions = fd.readlines() assert len(predictions) == len(target_datetimes) * len(stations), \ "predicted ghi sequence count mistmatch wrt target datetimes x station count" assert len(predictions) % len(stations) == 0 predictions = np.asarray( [float(ghi) for p in predictions for ghi in p.split(",")]) predictions = predictions.reshape( (len(stations), len(target_datetimes), -1)) pred_horiz = predictions.shape[-1] target_datetimes = pd.DatetimeIndex( [datetime.datetime.fromisoformat(t) for t in target_datetimes]) assert os.path.isfile( dataframe_path), f"invalid dataframe path: {dataframe_path}" dataframe = pd.read_pickle(dataframe_path) dataframe = dataframe[dataframe.index >= start_bound] dataframe = dataframe[dataframe.index < end_bound] assert dataframe.index.get_loc( start_bound) == 0, "invalid start bound (should land at first index)" assert len(dataframe.index.intersection(target_datetimes)) == len(target_datetimes), \ "bad dataframe target datetimes overlap, index values are missing" # we will display 24-hour slices with some overlap (configured via hard-coded param below) time_window, time_overlap, time_sample = \ datetime.timedelta(hours=24), datetime.timedelta(hours=3), datetime.timedelta(minutes=15) assert len(dataframe.asfreq("15min").index) == len(dataframe.index), \ "invalid dataframe index padding (should have an entry every 15 mins)" sample_count = ((time_window + 2 * time_overlap) // time_sample) + 1 day_count = int(math.ceil((end_bound - start_bound) / time_window)) clearsky_ghi_data = np.full((day_count, len(stations), sample_count), fill_value=float("nan"), dtype=np.float32) station_ghi_data = np.full((day_count, len(stations), sample_count), fill_value=float("nan"), dtype=np.float32) pred_ghi_data = np.full( (day_count, len(stations), pred_horiz, sample_count), fill_value=float("nan"), dtype=np.float32) days_range = pd.date_range(start_bound, end_bound, freq=time_window, closed="left") for day_idx, day_start in enumerate( tqdm.tqdm(days_range, desc="preparing daytime GHI intervals")): window_start, window_end = day_start - time_overlap, day_start + time_window + time_overlap sample_start, sample_end = ( window_start - start_bound) // time_sample, ( window_end - start_bound) // time_sample for sample_iter_idx, sample_idx in enumerate( range(sample_start, sample_end + 1)): if sample_idx < 0 or sample_idx >= len(dataframe.index): continue sample_row = dataframe.iloc[sample_idx] sample_time = window_start + sample_iter_idx * time_sample target_iter_idx = target_datetimes.get_loc( sample_time) if sample_time in target_datetimes else None for station_idx, station_name in enumerate(stations): clearsky_ghi_data[day_idx, station_idx, sample_iter_idx] = sample_row[ station_name + "_CLEARSKY_GHI"] station_ghi_data[day_idx, station_idx, sample_iter_idx] = sample_row[station_name + "_GHI"] if target_iter_idx is not None: pred_ghi_data[day_idx, station_idx, :, sample_iter_idx] = predictions[ station_idx, target_iter_idx] displays = [] for day_idx, day_start in enumerate( tqdm.tqdm(days_range, desc="preparing plots")): displays.append( draw_daily_ghi( clearsky_ghi=clearsky_ghi_data[day_idx], station_ghi=station_ghi_data[day_idx], pred_ghi=pred_ghi_data[day_idx], stations=stations, horiz_deltas=horiz_deltas, window_start=(day_start - time_overlap), window_end=(day_start + time_window + time_overlap), sample_step=time_sample, )) display = np.stack(displays) day_idx = 0 while True: cv.imshow("ghi", display[day_idx]) ret = cv.waitKey(100) if ret == ord('q') or ret == 27: # q or ESC break elif ret == 81 or ret == 84: # UNIX: left or down arrow day_idx = max(day_idx - 1, 0) elif ret == 82 or ret == 83: # UNIX: right or up arrow day_idx = min(day_idx + 1, len(displays) - 1)
def __init__(self, pat: ty.AnyStr, *, period_special: bool = True): """ Arguments --------- pat The glob pattern to use for matching period_special Whether a leading period in file/directory names should be matchable by ``*``, ``?`` and ``[…]`` – traditionally they are not, but many modern shells allow one to disable this behaviour """ self.period_special = period_special # type: bool self._sep = utils.maybe_fsencode(os.path.sep, pat) # type: ty.AnyStr dblstar = utils.maybe_fsencode("**", pat) # type: ty.AnyStr dot = utils.maybe_fsencode(".", pat) # type: ty.AnyStr pat_ndot = utils.maybe_fsencode(r"(?![.])", pat) # type: ty.AnyStr # Normalize path separator if os.path.altsep: pat = pat.replace(utils.maybe_fsencode(os.path.altsep, pat), self._sep) # Sanity checks for stuff that will definitely NOT EVER match # (there is another one in the loop below) assert not os.path.isabs( pat), "Absolute matching patterns will never match" # Note the extra final slash for its effect of only matching directories # # (TBH, I find it hard to see how that is useful, but everybody does it # and it keeps things consistent overall – something to only match files # would be nice however.) self._dir_only = pat.endswith(self._sep) # type: bool self._pat = [] # type: ty.List[ty.Optional[re_pattern_t]] for label in pat.split(self._sep): # Skip over useless path components if len(label) < 1 or label == dot: continue assert label != dot + dot, 'Matching patterns containing ".." will never match' if label == dblstar: self._pat.append(None) elif dblstar in label: raise NotImplementedError( "Using double-star (**) and other characters in the same glob " "path label ({0}) is not currently supported – please do file " "an issue if you need this!".format(os.fsdecode(label))) else: #re_expr: ty.AnyStr if not isinstance(label, bytes): re_expr = fnmatch.translate(label) else: re_expr = fnmatch.translate( label.decode("latin-1")).encode("latin-1") if period_special and not label.startswith(dot): re_expr = pat_ndot + re_expr self._pat.append(re.compile(re_expr))