Exemple #1
0
def clean_hostname(name: typing.AnyStr) -> str:
    """ Converts from short to long hostname, if no domain found. """
    # bytes?
    if not isinstance(name, (str, bytes)):
        cli_warning("Invalid input for hostname: {}".format(name))

    name = name.lower()
    # Assume user is happy with domain, but strip the dot.
    if name.endswith("."):
        return name[:-1]

    # If a dot in name, assume long name.
    if '.' in name:
        return name

    # Append domain name if in config and it does not end with it
    if 'domain' in config and not name.endswith(config['domain']):
        return "{}.{}".format(name, config['domain'])
    return name
def in_hsts_preload(host: typing.AnyStr) -> bool:
    """Determines if an IDNA-encoded host is on the HSTS preload list"""

    if isinstance(host, str):
        host = host.encode("ascii")
    labels = host.lower().split(b".")

    # Fast-branch for gTLDs that are registered to preload all sub-domains.
    if labels[-1] in _GTLD_INCLUDE_SUBDOMAINS:
        return True

    with open_pkg_binary("hstspreload.bin") as f:
        for layer, label in enumerate(labels[::-1]):
            # None of our layers are greater than 4 deep.
            if layer > 3:
                return False

            # Read the jump table for the layer and label
            jump_info = _JUMPTABLE[layer][_crc8(label)]
            if jump_info is None:
                # No entry: host is not preloaded
                return False

            # Read the set of entries for that layer and label
            f.seek(jump_info[0])
            data = bytearray(jump_info[1])
            f.readinto(data)

            for is_leaf, include_subdomains, ent_label in _iter_entries(data):
                # We found a potential leaf
                if is_leaf:
                    if ent_label == host:
                        return True
                    if include_subdomains and host.endswith(b"." + ent_label):
                        return True

                # Continue traversing as we're not at a leaf.
                elif label == ent_label:
                    break
            else:
                return False
    return False
def in_hsts_preload(host: typing.AnyStr) -> bool:
    """Determines if an IDNA-encoded host is on the HSTS preload list"""

    if isinstance(host, str):
        host = host.encode("ascii")
    labels = host.lower().split(b".")

    # Fast-branch for gTLDs that are registered to preload all sub-domains.
    if labels[-1] in _GTLD_INCLUDE_SUBDOMAINS:
        return True

    with open(_HSTSPRELOAD_BIN_PATH, "rb") as f:
        for layer, label in enumerate(labels[::-1]):
            # None of our layers are greater than 4 deep.
            if layer > 3:
                return False

            # Read the jump table for the layer and label
            offset, size = _get_offset_and_size(f, layer, label)
            if offset == 0:
                return False

            # Read the set of entries for that layer
            f.seek(offset, 1)
            data = bytearray(size)
            f.readinto(data)

            for is_leaf, include_subdomains, ent_label in _iter_entries(data):
                # We found a potential leaf
                if is_leaf:
                    if ent_label == host:
                        return True
                    if include_subdomains and host.endswith(b"." + ent_label):
                        return True

                # Continue traversing as we're not at a leaf.
                elif label == ent_label:
                    break
            else:
                return False
    return False
Exemple #4
0
def viz_predictions(
    predictions_path: typing.AnyStr,
    dataframe_path: typing.AnyStr,
    test_config_path: typing.AnyStr,
):
    """Displays a looping visualization of the GHI predictions saved by the evaluation script.
    This visualization requires OpenCV3+ ('cv2'), and will loop while refreshing a local window until the program
    is killed, or 'q' is pressed. The arrow keys allow the user to change which day is being shown.
    """
    assert os.path.isfile(test_config_path) and test_config_path.endswith(
        ".json"), "invalid test config"
    with open(test_config_path, "r") as fd:
        test_config = json.load(fd)
    stations = test_config["stations"]
    target_datetimes = test_config["target_datetimes"]
    start_bound = datetime.datetime.fromisoformat(test_config["start_bound"])
    end_bound = datetime.datetime.fromisoformat(test_config["end_bound"])
    horiz_deltas = [
        pd.Timedelta(d).to_pytimedelta()
        for d in test_config["target_time_offsets"]
    ]
    assert os.path.isfile(
        predictions_path), f"invalid preds file path: {predictions_path}"
    with open(predictions_path, "r") as fd:
        predictions = fd.readlines()
    assert len(predictions) == len(target_datetimes) * len(stations), \
        "predicted ghi sequence count mistmatch wrt target datetimes x station count"
    assert len(predictions) % len(stations) == 0
    predictions = np.asarray(
        [float(ghi) for p in predictions for ghi in p.split(",")])
    predictions = predictions.reshape(
        (len(stations), len(target_datetimes), -1))
    pred_horiz = predictions.shape[-1]
    target_datetimes = pd.DatetimeIndex(
        [datetime.datetime.fromisoformat(t) for t in target_datetimes])
    assert os.path.isfile(
        dataframe_path), f"invalid dataframe path: {dataframe_path}"
    dataframe = pd.read_pickle(dataframe_path)
    dataframe = dataframe[dataframe.index >= start_bound]
    dataframe = dataframe[dataframe.index < end_bound]
    assert dataframe.index.get_loc(
        start_bound) == 0, "invalid start bound (should land at first index)"
    assert len(dataframe.index.intersection(target_datetimes)) == len(target_datetimes), \
        "bad dataframe target datetimes overlap, index values are missing"
    # we will display 24-hour slices with some overlap (configured via hard-coded param below)
    time_window, time_overlap, time_sample = \
        datetime.timedelta(hours=24), datetime.timedelta(hours=3), datetime.timedelta(minutes=15)
    assert len(dataframe.asfreq("15min").index) == len(dataframe.index), \
        "invalid dataframe index padding (should have an entry every 15 mins)"
    sample_count = ((time_window + 2 * time_overlap) // time_sample) + 1
    day_count = int(math.ceil((end_bound - start_bound) / time_window))
    clearsky_ghi_data = np.full((day_count, len(stations), sample_count),
                                fill_value=float("nan"),
                                dtype=np.float32)
    station_ghi_data = np.full((day_count, len(stations), sample_count),
                               fill_value=float("nan"),
                               dtype=np.float32)
    pred_ghi_data = np.full(
        (day_count, len(stations), pred_horiz, sample_count),
        fill_value=float("nan"),
        dtype=np.float32)
    days_range = pd.date_range(start_bound,
                               end_bound,
                               freq=time_window,
                               closed="left")
    for day_idx, day_start in enumerate(
            tqdm.tqdm(days_range, desc="preparing daytime GHI intervals")):
        window_start, window_end = day_start - time_overlap, day_start + time_window + time_overlap
        sample_start, sample_end = (
            window_start - start_bound) // time_sample, (
                window_end - start_bound) // time_sample
        for sample_iter_idx, sample_idx in enumerate(
                range(sample_start, sample_end + 1)):
            if sample_idx < 0 or sample_idx >= len(dataframe.index):
                continue
            sample_row = dataframe.iloc[sample_idx]
            sample_time = window_start + sample_iter_idx * time_sample
            target_iter_idx = target_datetimes.get_loc(
                sample_time) if sample_time in target_datetimes else None
            for station_idx, station_name in enumerate(stations):
                clearsky_ghi_data[day_idx, station_idx,
                                  sample_iter_idx] = sample_row[
                                      station_name + "_CLEARSKY_GHI"]
                station_ghi_data[day_idx, station_idx,
                                 sample_iter_idx] = sample_row[station_name +
                                                               "_GHI"]
                if target_iter_idx is not None:
                    pred_ghi_data[day_idx, station_idx, :,
                                  sample_iter_idx] = predictions[
                                      station_idx, target_iter_idx]
    displays = []
    for day_idx, day_start in enumerate(
            tqdm.tqdm(days_range, desc="preparing plots")):
        displays.append(
            draw_daily_ghi(
                clearsky_ghi=clearsky_ghi_data[day_idx],
                station_ghi=station_ghi_data[day_idx],
                pred_ghi=pred_ghi_data[day_idx],
                stations=stations,
                horiz_deltas=horiz_deltas,
                window_start=(day_start - time_overlap),
                window_end=(day_start + time_window + time_overlap),
                sample_step=time_sample,
            ))
    display = np.stack(displays)
    day_idx = 0
    while True:
        cv.imshow("ghi", display[day_idx])
        ret = cv.waitKey(100)
        if ret == ord('q') or ret == 27:  # q or ESC
            break
        elif ret == 81 or ret == 84:  # UNIX: left or down arrow
            day_idx = max(day_idx - 1, 0)
        elif ret == 82 or ret == 83:  # UNIX: right or up arrow
            day_idx = min(day_idx + 1, len(displays) - 1)
Exemple #5
0
    def __init__(self, pat: ty.AnyStr, *, period_special: bool = True):
        """
		Arguments
		---------
		pat
			The glob pattern to use for matching
		period_special
			Whether a leading period in file/directory names should be matchable by
			``*``, ``?`` and ``[…]`` – traditionally they are not, but many modern
			shells allow one to disable this behaviour
		"""
        self.period_special = period_special  # type: bool

        self._sep = utils.maybe_fsencode(os.path.sep, pat)  # type: ty.AnyStr
        dblstar = utils.maybe_fsencode("**", pat)  # type: ty.AnyStr
        dot = utils.maybe_fsencode(".", pat)  # type: ty.AnyStr
        pat_ndot = utils.maybe_fsencode(r"(?![.])", pat)  # type: ty.AnyStr

        # Normalize path separator
        if os.path.altsep:
            pat = pat.replace(utils.maybe_fsencode(os.path.altsep, pat),
                              self._sep)

        # Sanity checks for stuff that will definitely NOT EVER match
        # (there is another one in the loop below)
        assert not os.path.isabs(
            pat), "Absolute matching patterns will never match"

        # Note the extra final slash for its effect of only matching directories
        #
        # (TBH, I find it hard to see how that is useful, but everybody does it
        #  and it keeps things consistent overall – something to only match files
        #  would be nice however.)
        self._dir_only = pat.endswith(self._sep)  # type: bool

        self._pat = []  # type: ty.List[ty.Optional[re_pattern_t]]
        for label in pat.split(self._sep):
            # Skip over useless path components
            if len(label) < 1 or label == dot:
                continue

            assert label != dot + dot, 'Matching patterns containing ".." will never match'

            if label == dblstar:
                self._pat.append(None)
            elif dblstar in label:
                raise NotImplementedError(
                    "Using double-star (**) and other characters in the same glob "
                    "path label ({0}) is not currently supported – please do file "
                    "an issue if you need this!".format(os.fsdecode(label)))
            else:
                #re_expr: ty.AnyStr
                if not isinstance(label, bytes):
                    re_expr = fnmatch.translate(label)
                else:
                    re_expr = fnmatch.translate(
                        label.decode("latin-1")).encode("latin-1")

                if period_special and not label.startswith(dot):
                    re_expr = pat_ndot + re_expr
                self._pat.append(re.compile(re_expr))