Exemple #1
0
    def add_class_phase_markers(
        self, cycle: pd.DataFrame, wltc_parts: Iterable[int], *, right_edge=True
    ) -> pd.DataFrame:
        """
        Adds low/mid/hight/extra high boolean index into cycle, named as p1, ...

        :param cycle:
            assumes indexed by time
        :param wltc_parts:
            must include edges (see :func:`~.datamodel.get_class_parts_limits()`)
        """
        assert all(i is not None for i in (cycle, wltc_parts)), (
            "Null in inputs:",
            cycle,
            wltc_parts,
        )
        assert isinstance(wltc_parts, Iterable), wltc_parts

        for n, (start, end) in enumerate(itz.sliding_window(2, wltc_parts), 1):
            idx = start <= cycle.index
            if right_edge:
                idx &= cycle.index <= end
            else:
                idx &= cycle.index < end
            cycle[wio.class_part_name(n)] = idx

        return cycle
def insert_whitespace_token(
        labeled_tokens,
        *,
        prob=0.01,
        nrange=(1, 2),
        field_labels=("field_sep", "item_sep"),
):
    """
    Randomly insert a whitespace token between any given two tokens with a probability
    of ``prob``, provided neither token is already whitespace.

    Args:
        labeled_tokens (List[Tuple[str, str]])
        prob (float)
        ngrange (Tuple[int, int])
        field_labels (Tuple[str, str])

    Returns:
        List[Tuple[str, str]]
    """
    aug_labeled_tokens = []
    for tl1, tl2 in itertoolz.sliding_window(2, labeled_tokens):
        aug_labeled_tokens.append(tl1)
        if random.random() < prob and not (tl1[0].isspace()
                                           or tl2[0].isspace()):
            ws_label = field_labels[tl1[1] == tl2[1]]
            aug_labeled_tokens.append(
                (" " * random.randint(*nrange), ws_label))
    aug_labeled_tokens.append(labeled_tokens[-1])
    return aug_labeled_tokens
Exemple #3
0
def seq2grams(seq: Iterable[T],
              n: int,
              pad: Any = no_default) -> Iterable[Iterable[T]]:
    if pad is not no_default:
        seq = chain(repeat(pad, n - 1), seq, repeat(pad, n - 1))

    return sliding_window(n, seq)
Exemple #4
0
def issorted(seq: Iterable[T], key: Callable[[T], Any] = None) -> bool:
    if key is None:
        key = lambda v: v

    return all(
        operator.le(key(prev), key(curr))
        for prev, curr in sliding_window(2, seq))
Exemple #5
0
 def __call__(self, losses, **kwargs):
     if len(losses) <= self.n:
         return False
     return all(
         map(
             curry(__lt__)(-self.threshold),
             starmap(self.stat, sliding_window(2, losses[-(self.n + 1):]))))
Exemple #6
0
def validate_adaptor_chain(adaptor_chain: List[int]) -> bool:
    def valid_step(ab: Tuple[int]) -> bool:
        a, b = ab
        if b < a or b > a + 3:
            return False
        return True
    return all(map(valid_step, sliding_window(2, adaptor_chain)))
Exemple #7
0
def get_filtered_text_lines(text, *, delim=r" ?\n"):
    """
    Split ``text`` into lines, filtering out some superfluous lines if context allows.

    Args:
        text (str)
        delim (str)

    Returns:
        List[str]

    Note:
        This should be applied to normalized text -- see :func:`normalize_text()`.
    """
    lines = []
    all_lines = ["<START>"] + re.split(delim, text) + ["<END>"]
    for prev_line, line, next_line in itertoolz.sliding_window(3, all_lines):
        line = line.strip()
        # ignore empty lines between bulleted list items -- probably just a parsing error
        if not line and prev_line.startswith("- ") and next_line.startswith(
                "- "):
            continue
        # ignore resume-ending numbers -- probably just page numbering
        elif line.isdigit() and next_line == "<END>":
            continue
        lines.append(line)
    return lines
Exemple #8
0
def get_class_phase_boundaries(part_lengths: tuple,
                               V_cycle) -> Tuple[Tuple[int, int], ...]:
    """
    Serve ``[low, high)`` boundaries from class-data, as `Dijkstra demands it`__.

    :return:
        a tuple of tuple-pairs of *time indices* (low/hight) part-boundaries
        (ie for class-3a these are 5 pairs of numbers, see example below),
        that may be used as ``Series.loc[slice(*pair)]``.

    Like :func:`.datamodel.get_class_parts_limits` with ``edges=true``,
    suited for pipelines.

    __  https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html

    **Example:**

        >>> from wltp import datamodel, cycles
        >>> wcd = datamodel.get_wltc_data()
        >>> cd = cycles.get_wltc_class_data(wcd["classes"], "class3b")
        >>> cycles.get_class_phase_boundaries(cd["lengths"], cd["V_cycle"])
        ((0, 589), (589, 1022), (1022, 1477), (1477, 1800))


    """
    part_breaks = np.cumsum(part_lengths)
    return tuple(itz.sliding_window(2, (0, *part_breaks)))
Exemple #9
0
def assert_property_xsorted_produces_ordered_iterable(_xsorted, things,
                                                      reverse):
    """
    Assert the property that xsorted should produce an ordered iterable.
    """
    actual = list(_xsorted(things, reverse=reverse))
    actual = reversed(actual) if reverse else actual
    assert all(a <= b for a, b in sliding_window(2, actual))
Exemple #10
0
def featurize(tokens):
    """
    Extract features from individual tokens as well as those that are dependent on
    the sequence thereof.

    Args:
        tokens (List[:class:`spacy.tokens.Token`])

    Returns:
        List[Dict[str, obj]]
    """
    tokens_features = [get_token_features(token) for token in tokens]
    if len(tokens_features) == 1:
        tokens_features[0]["_singleton"] = True
        return tokens_features
    else:
        feature_sequence = []
        line_idx_windows = parse_utils.get_line_token_idxs(tokens_features)
        prev_line_idx, next_line_idx = next(line_idx_windows)
        follows_bullet = False
        n_pad_l, n_pad_r = 3, 2
        tokens_features = parse_utils.pad_tokens_features(tokens_features,
                                                          n_left=n_pad_l,
                                                          n_right=n_pad_r)
        tf_windows = itertoolz.sliding_window(n_pad_l + n_pad_r + 1,
                                              tokens_features)
        for ppprev_tf, pprev_tf, prev_tf, curr_tf, next_tf, nnext_tf in tf_windows:
            tf = curr_tf.copy()
            # add features from surrounding tokens, for context
            tf["ppprev"] = ppprev_tf
            tf["pprev"] = pprev_tf
            tf["prev"] = prev_tf
            tf["next"] = next_tf
            tf["nnext"] = nnext_tf
            # add features dependent on this token's position within the sequence
            # and its relationship to other tokens
            tok_idx = tf["idx"]
            line_tfs = tokens_features[prev_line_idx + n_pad_l:next_line_idx +
                                       n_pad_l]
            if tf["is_newline"] and tf["idx"] > 0:
                prev_line_idx, next_line_idx = next(line_idx_windows)
                follows_bullet = False
            tf["tok_line_idx"] = tok_idx - prev_line_idx
            tf["follows_bullet"] = follows_bullet
            # is this token a bullet? i.e. "- " token starting a new line
            if tf["shape"] == "-" and tf["tok_line_idx"] == 1:
                follows_bullet = True
            if tf["like_year"] is True:
                year = int(curr_tf["prefix"] + curr_tf["suffix"])
                other_years = [
                    int(_tf["prefix"] + _tf["suffix"]) for _tf in line_tfs
                    if _tf["like_year"] is True and _tf["idx"] != tok_idx
                ]
                if other_years:
                    tf["is_max_line_year"] = all(year > oyr
                                                 for oyr in other_years)
            feature_sequence.append(tf)
        return feature_sequence
Exemple #11
0
 def __init__(self, directory, pmm):
     self.directory = directory
     self.velocities = np.load(directory + '/para_velocity_input.npy')
     self.vec_addresses = np.load(directory + '/para_vec_address.npy')
     self.spacing = np.load(directory + '/spacing.npy')
     self.non_nan_velocity_indices = np.load(directory + '/no_nan_inds.npy')
     self.model = pmm.model
     self.accelerations = [[
         divide_arrays(a, b) for a, b in sliding_window(2, v)
     ] for v in self.velocities]
Exemple #12
0
def findallsubseqs(a: Iterable[T], b: Iterable[T], overlap: bool = False) -> Iterable[int]:
    x = iter2seq(a)
    if len(x) == 0:
        return

    start = 0

    for pos, y in enumerate(sliding_window(len(x), b)):
        if (overlap or pos >= start) and all(m == n for m, n in zip(x, y)):
            yield pos
            start = pos + len(x)
Exemple #13
0
def stim_conditionals(data, conditioner_stat, stat, n_smallest):
    hunt_id_list = data["Hunt ID"]
    para_stat = data[stat]
    para_cstat = data[conditioner_stat]
    hunt_id_limits = np.where(np.diff(hunt_id_list) != 0)[0] + 1
    stat_per_hunt = []
    for firstind, secondind in sliding_window(2, hunt_id_limits):
        minstat_args = np.argsort(
            para_cstat[firstind:secondind])[0:n_smallest] + firstind
        stat_per_hunt += para_stat[minstat_args].tolist()
    stat_per_hunt = np.array(stat_per_hunt)
    return stat_per_hunt[~np.isnan(stat_per_hunt)]
Exemple #14
0
def featurize(tokens):
    """
    Extract features from individual tokens as well as those that are dependent on
    the sequence thereof.

    Args:
        tokens (List[:class:`spacy.tokens.Token`])

    Returns:
        List[Dict[str, obj]]
    """
    tokens_features = [get_token_features(token) for token in tokens]
    if len(tokens_features) == 1:
        tokens_features[0]["_singleton"] = True
        return tokens_features
    else:
        feature_sequence = []
        line_idx_windows = parse_utils.get_line_token_idxs(tokens_features)
        prev_line_idx, next_line_idx = next(line_idx_windows)
        follows_bullet = False
        n_pad_l, n_pad_r = 3, 2
        tokens_features = parse_utils.pad_tokens_features(tokens_features,
                                                          n_left=n_pad_l,
                                                          n_right=n_pad_r)
        tf_windows = itertoolz.sliding_window(n_pad_l + n_pad_r + 1,
                                              tokens_features)
        for ppprev_tf, pprev_tf, prev_tf, curr_tf, next_tf, nnext_tf in tf_windows:
            tf = curr_tf.copy()
            # add features from surrounding tokens, for context
            tf["ppprev"] = ppprev_tf
            tf["pprev"] = pprev_tf
            tf["prev"] = prev_tf
            tf["next"] = next_tf
            tf["nnext"] = nnext_tf
            # add features dependent on this token's position within the sequence
            # and its relationship to other tokens
            tok_idx = tf["idx"]
            if tf["is_newline"] and tf["idx"] > 0:
                prev_line_idx, next_line_idx = next(line_idx_windows)
                follows_bullet = False
            tf["tok_line_idx"] = tok_idx - prev_line_idx
            tf["follows_bullet"] = follows_bullet
            # bullets have is_group_sep_text, but they aren't group separators
            # at least not in the sense we want here; so, +2 to the previous newline idx
            # ensures that bullets are not counted in this feature
            line_tfs_so_far = tokens_features[prev_line_idx + n_pad_l +
                                              2:tok_idx + n_pad_l]
            if any(_tf["is_group_sep_text"] for _tf in line_tfs_so_far):
                tf["follows_group_sep"] = True
            else:
                tf["follows_group_sep"] = False
            feature_sequence.append(tf)
        return feature_sequence
Exemple #15
0
    def calc_class_sums(cl):
        V = datamodel.get_class_v_cycle(cl)
        cycle_parts = datamodel.get_class_parts_limits(cl, edges=True)

        prev = (0, 0)
        for partno, (start,
                     end) in enumerate(itz.sliding_window(2, cycle_parts), 1):
            start += start_offset
            end += end_offset
            sums = calc_v_sums(V.loc[start:end])
            cums = calc_v_sums(V.loc[start:end], prev)
            results.append((cl, f"part-{partno}", *sums, *cums))
            prev = cums

        return results
Exemple #16
0
def sliding_window_filled(seq,
                          n,
                          pad_before=False,
                          pad_after=False,
                          fillvalue=None):
    """ A sliding window with optional padding on either end..

        Args:
            seq(iter):                 an iterator or something that can
                                            be turned into an iterator

            n(int):                         number of generators to create as
                                            lagged

            pad_before(bool):               whether to continue zipping along
                                            the longest generator

            pad_after(bool):               whether to continue zipping along
                                            the longest generator

            fillvalue:                      value to use to fill generators
                                            shorter than the longest.

        Returns:
            generator object:               a generator object that will return
                                            values from each iterator.

        Examples:

            >>> list(sliding_window_filled(range(5), 2))
            [(0, 1), (1, 2), (2, 3), (3, 4)]

            >>> list(sliding_window_filled(range(5), 2, pad_after=True))
            [(0, 1), (1, 2), (2, 3), (3, 4), (4, None)]

            >>> list(sliding_window_filled(range(5), 2, pad_before=True, pad_after=True))
            [(None, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, None)]
    """

    if pad_before and pad_after:
        seq = pad(seq, before=(n - 1), after=(n - 1), fill=fillvalue)
    elif pad_before:
        seq = pad(seq, before=(n - 1), fill=fillvalue)
    elif pad_after:
        seq = pad(seq, after=(n - 1), fill=fillvalue)

    return (sliding_window(n, seq))
Exemple #17
0
def padded_sliding_window(
    func: Callable[[Iterable[int]], float], cycle_times: List[int]
) -> List[float]:
    if len(cycle_times) < 5:
        return [func(cycle_times) for _ in cycle_times]
    window_size = max(int(len(cycle_times) * 0.2), 5)
    if window_size % 2 == 0:
        window_size += 1
    sliding_windows = [
        func(window) for window in it.sliding_window(window_size, cycle_times)
    ]
    subwindow = int(window_size // 2)
    return (
        [sliding_windows[0]] * subwindow
        + sliding_windows
        + [sliding_windows[-1]] * subwindow
    )
Exemple #18
0
    def get_tlines(self, key, centrality, temp):
        from toolz.itertoolz import sliding_window
        from ROOT import TLine
        try:
            return self.saved_tlines[(key, centrality, temp)]
        except KeyError:
            pass

        self.saved_tlines[(key, centrality, temp)] = tlines = []

        line_dict = self.data[key][centrality][temp]
        for p1, p2 in sliding_window(2, zip(*line_dict.values())):
            tlines.append(TLine(*p1, *p2))


#         line_dict = self.data[key][centrality][temp]
#             tlines.extend(TLine(*p1, *p2)
#                           for p1, p2 in sliding_window(2, zip(*line_dict.values())))

        return tlines
Exemple #19
0
 def __init__(self, drcts):
     self.all_para_velocities = concat_para_velocities(drcts)
     self.all_para_accelerations = [[
         divide_arrays(a, b) for a, b in sliding_window(2, v)
     ] for v in self.all_para_velocities]
     self.all_accel_mags = [[magvector(a) for a in accs]
                            for accs in self.all_para_accelerations]
     self.velocity_mags = [[magvector(v) for v in vecs]
                           for vecs in self.all_para_velocities]
     self.mag_limit = np.percentile(np.concatenate(self.all_accel_mags), 99)
     self.len_simulation = 500
     self.accels_filt = []
     self.model = []
     for ac, ac_m in zip(self.all_para_accelerations, self.all_accel_mags):
         if not (np.array(ac_m) > self.mag_limit).any():
             self.accels_filt.append(ac)
         else:
             filt_acvec = []
             for ac_vec, ac_mag in zip(ac, ac_m):
                 if ac_mag < self.mag_limit:
                     filt_acvec.append(ac_vec)
             self.accels_filt.append(filt_acvec)
Exemple #20
0
def subrange(start, stop=None, step=None, substep=None):
    """
        Generates start and stop values for each subrange.

        Args:
            start(int):           First value in range (or last if only
                                  specified value)

            stop(int):            Last value in range

            step(int):            Step between each range

            substep(int):         Step within each range

        Yields:
            range:             A subrange within the larger range.

        Examples:
            >>> list(map(list, subrange(5)))
            [[0], [1], [2], [3], [4]]

            >>> list(map(list, subrange(0, 12, 3, 2)))
            [[0, 2], [3, 5], [6, 8], [9, 11]]
    """

    if stop is None:
        stop = start
        start = 0

    if step is None:
        step = 1

    if substep is None:
        substep = 1

    range_ends = itertools.chain(range(start, stop, step), [stop])

    for i, j in sliding_window(2, range_ends):
        yield (range(i, j, substep))
Exemple #21
0
def plot_xy_experiment(b_list, b_diams, exp_type, drc):
    fig = pl.figure()
    axes = fig.add_subplot(111, axisbg='.75')
    axes.grid(False)
    for br, bd in zip(b_list, b_diams):
        barrier_x = br[0]
        barrier_y = br[1]
        barrier_diameter = bd
        barrier_plot = pl.Circle((barrier_x, barrier_y),
                                 barrier_diameter / 2,
                                 fc='r')
        axes.add_artist(barrier_plot)
    xcoords, ycoords = get_xy(exp_type, drc)
    v_from_center = []
    for crd in zip(xcoords, ycoords):
        vector = np.array(crd)
        center_mag = magvector_center(vector)
        v_from_center.append(center_mag)
    delta_center_mag = [b - a for a, b in sliding_window(2, v_from_center)]
    axes.plot(xcoords, ycoords)
    axes.axis('equal')
    pl.show()
    return xcoords, ycoords, v_from_center, delta_center_mag
Exemple #22
0
def weekchunks(start, until=None, until_days_ago=0, date_format='%Y-%m-%d'):
    '''Generate date strings in weekly chunks between two dates.
    Args:
        start (str): Sensibly formatted datestring (format to be guessed by pd)
        until (str): Another datestring. Default=today.
        until_days_ago (str): if until is not specified, this indicates how many days ago to consider. Default=0.
                              if until is specified, this indicates an offset of days before the until date.
        date_format (str): Date format of the output date strings.
    Returns:
        chunk_pairs (list): List of pairs of string, representing the start and end of weeks.
    '''
    until = Timestamp(until).to_pydatetime(
    ) if until is not None else dt.now() - timedelta(days=until_days_ago)
    start = Timestamp(start).to_pydatetime()
    chunks = [
        dt.strftime(_date, date_format)
        for _date in rrule.rrule(rrule.WEEKLY, dtstart=start, until=until)
    ]
    if len(chunks) == 1:  # the less-than-one-week case
        _until = dt.strftime(until, date_format)
        chunks.append(_until)
    chunk_pairs = list(sliding_window(2, chunks))
    return chunk_pairs
Exemple #23
0
def get_line_token_idxs(tokens_features):
    """
    Get the [start, stop) indexes for all lines in ``tokens_features``,
    such that ``tokens_features[start : stop]`` corresponds to one line
    of featurized tokens.

    Args:
        tokens_features (List[Dict[str, obj]]: Sequence of featurized tokens, as produced
            by :func:`get_token_features_base()`.

    Yields:
        Tuple[int, int]
    """
    idxs_newlines = [tf["idx"] for tf in tokens_features if tf["is_newline"]]
    if not idxs_newlines:
        yield (0, len(tokens_features))
    else:
        if idxs_newlines[0] != 0:
            idxs_newlines.insert(0, 0)
        if idxs_newlines[-1] != len(tokens_features):
            idxs_newlines.append(len(tokens_features))
        for idx1, idx2 in itertoolz.sliding_window(2, idxs_newlines):
            yield (idx1, idx2)
def filtered_window(seq, n):
    return filter(filter_func, sliding_window(n, seq))
 def curry(points):
     return \
     agg(
         function(pointA, pointB)
         for pointA, pointB
         in sliding_window(2, points))
                elem, ['predictions'],
                compose(list, partial(take, args.predictions_limit))),
            ujson.load(args.dataset_file)))

    sections = groupby(lambda x: tuple(map(x.get, ['make', 'model'])),
                       dataset).items()

    evaluation_base_url = f'https://storage.cloud.google.com/dev_visual_search/evaluations/output/by-id/{args.evaluation_id}'

    def link_to_page(key):
        if key is None:
            return None
        make, model = key
        return f'{evaluation_base_url}/prediction-{make}-{model}.html'

    for prev, current, next in sliding_window(
            3, cons(None, concat([sections, [None]]))):
        key, section = current
        make, model = key

        prev_key, _ = prev if prev is not None else (None, None)
        next_key, _ = next if next is not None else (None, None)

        page = to_page(
            section, {
                'prev': link_to_page(prev_key),
                'parent': '',
                'next': link_to_page(next_key)
            }, {
                'title': f'Prediction report for {make} / {model}',
                'evaluation_id': args.evaluation_id,
                'image_base_path': args.image_base_path,
def filtered_window(seq, n):
    return filter(filter_func, sliding_window(n, seq))
Exemple #28
0
 def class_boundaries(wltc_class, offset_start, offset_end):
     cycle_parts = datamodel.get_class_parts_limits(wltc_class, edges=True)
     return (f"[{start + offset_start}, {end + offset_end}]"
             for start, end in itz.sliding_window(2, cycle_parts))
Exemple #29
0
def part_one():
    result = {1: 0, 2: 0, 3: 0}
    for x, y in itertoolz.sliding_window(2, active_input):
        result[y - x] = result[y - x] + 1
    print(result[1] * result[3])
Exemple #30
0
def test_sliding_window():
    assert list(sliding_window(2, [1, 2, 3, 4])) == [(1, 2), (2, 3), (3, 4)]
    assert list(sliding_window(3, [1, 2, 3, 4])) == [(1, 2, 3), (2, 3, 4)]
Exemple #31
0
def test_sliding_window_of_short_iterator():
    assert list(sliding_window(3, [1, 2])) == []
Exemple #32
0
def filter_altitude_speed(positions):
    return (
        p1 for p1, p2 in sliding_window(2, positions)
        if check_altitude_speed(p2, p1)
    )