def add_class_phase_markers( self, cycle: pd.DataFrame, wltc_parts: Iterable[int], *, right_edge=True ) -> pd.DataFrame: """ Adds low/mid/hight/extra high boolean index into cycle, named as p1, ... :param cycle: assumes indexed by time :param wltc_parts: must include edges (see :func:`~.datamodel.get_class_parts_limits()`) """ assert all(i is not None for i in (cycle, wltc_parts)), ( "Null in inputs:", cycle, wltc_parts, ) assert isinstance(wltc_parts, Iterable), wltc_parts for n, (start, end) in enumerate(itz.sliding_window(2, wltc_parts), 1): idx = start <= cycle.index if right_edge: idx &= cycle.index <= end else: idx &= cycle.index < end cycle[wio.class_part_name(n)] = idx return cycle
def insert_whitespace_token( labeled_tokens, *, prob=0.01, nrange=(1, 2), field_labels=("field_sep", "item_sep"), ): """ Randomly insert a whitespace token between any given two tokens with a probability of ``prob``, provided neither token is already whitespace. Args: labeled_tokens (List[Tuple[str, str]]) prob (float) ngrange (Tuple[int, int]) field_labels (Tuple[str, str]) Returns: List[Tuple[str, str]] """ aug_labeled_tokens = [] for tl1, tl2 in itertoolz.sliding_window(2, labeled_tokens): aug_labeled_tokens.append(tl1) if random.random() < prob and not (tl1[0].isspace() or tl2[0].isspace()): ws_label = field_labels[tl1[1] == tl2[1]] aug_labeled_tokens.append( (" " * random.randint(*nrange), ws_label)) aug_labeled_tokens.append(labeled_tokens[-1]) return aug_labeled_tokens
def seq2grams(seq: Iterable[T], n: int, pad: Any = no_default) -> Iterable[Iterable[T]]: if pad is not no_default: seq = chain(repeat(pad, n - 1), seq, repeat(pad, n - 1)) return sliding_window(n, seq)
def issorted(seq: Iterable[T], key: Callable[[T], Any] = None) -> bool: if key is None: key = lambda v: v return all( operator.le(key(prev), key(curr)) for prev, curr in sliding_window(2, seq))
def __call__(self, losses, **kwargs): if len(losses) <= self.n: return False return all( map( curry(__lt__)(-self.threshold), starmap(self.stat, sliding_window(2, losses[-(self.n + 1):]))))
def validate_adaptor_chain(adaptor_chain: List[int]) -> bool: def valid_step(ab: Tuple[int]) -> bool: a, b = ab if b < a or b > a + 3: return False return True return all(map(valid_step, sliding_window(2, adaptor_chain)))
def get_filtered_text_lines(text, *, delim=r" ?\n"): """ Split ``text`` into lines, filtering out some superfluous lines if context allows. Args: text (str) delim (str) Returns: List[str] Note: This should be applied to normalized text -- see :func:`normalize_text()`. """ lines = [] all_lines = ["<START>"] + re.split(delim, text) + ["<END>"] for prev_line, line, next_line in itertoolz.sliding_window(3, all_lines): line = line.strip() # ignore empty lines between bulleted list items -- probably just a parsing error if not line and prev_line.startswith("- ") and next_line.startswith( "- "): continue # ignore resume-ending numbers -- probably just page numbering elif line.isdigit() and next_line == "<END>": continue lines.append(line) return lines
def get_class_phase_boundaries(part_lengths: tuple, V_cycle) -> Tuple[Tuple[int, int], ...]: """ Serve ``[low, high)`` boundaries from class-data, as `Dijkstra demands it`__. :return: a tuple of tuple-pairs of *time indices* (low/hight) part-boundaries (ie for class-3a these are 5 pairs of numbers, see example below), that may be used as ``Series.loc[slice(*pair)]``. Like :func:`.datamodel.get_class_parts_limits` with ``edges=true``, suited for pipelines. __ https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html **Example:** >>> from wltp import datamodel, cycles >>> wcd = datamodel.get_wltc_data() >>> cd = cycles.get_wltc_class_data(wcd["classes"], "class3b") >>> cycles.get_class_phase_boundaries(cd["lengths"], cd["V_cycle"]) ((0, 589), (589, 1022), (1022, 1477), (1477, 1800)) """ part_breaks = np.cumsum(part_lengths) return tuple(itz.sliding_window(2, (0, *part_breaks)))
def assert_property_xsorted_produces_ordered_iterable(_xsorted, things, reverse): """ Assert the property that xsorted should produce an ordered iterable. """ actual = list(_xsorted(things, reverse=reverse)) actual = reversed(actual) if reverse else actual assert all(a <= b for a, b in sliding_window(2, actual))
def featurize(tokens): """ Extract features from individual tokens as well as those that are dependent on the sequence thereof. Args: tokens (List[:class:`spacy.tokens.Token`]) Returns: List[Dict[str, obj]] """ tokens_features = [get_token_features(token) for token in tokens] if len(tokens_features) == 1: tokens_features[0]["_singleton"] = True return tokens_features else: feature_sequence = [] line_idx_windows = parse_utils.get_line_token_idxs(tokens_features) prev_line_idx, next_line_idx = next(line_idx_windows) follows_bullet = False n_pad_l, n_pad_r = 3, 2 tokens_features = parse_utils.pad_tokens_features(tokens_features, n_left=n_pad_l, n_right=n_pad_r) tf_windows = itertoolz.sliding_window(n_pad_l + n_pad_r + 1, tokens_features) for ppprev_tf, pprev_tf, prev_tf, curr_tf, next_tf, nnext_tf in tf_windows: tf = curr_tf.copy() # add features from surrounding tokens, for context tf["ppprev"] = ppprev_tf tf["pprev"] = pprev_tf tf["prev"] = prev_tf tf["next"] = next_tf tf["nnext"] = nnext_tf # add features dependent on this token's position within the sequence # and its relationship to other tokens tok_idx = tf["idx"] line_tfs = tokens_features[prev_line_idx + n_pad_l:next_line_idx + n_pad_l] if tf["is_newline"] and tf["idx"] > 0: prev_line_idx, next_line_idx = next(line_idx_windows) follows_bullet = False tf["tok_line_idx"] = tok_idx - prev_line_idx tf["follows_bullet"] = follows_bullet # is this token a bullet? i.e. "- " token starting a new line if tf["shape"] == "-" and tf["tok_line_idx"] == 1: follows_bullet = True if tf["like_year"] is True: year = int(curr_tf["prefix"] + curr_tf["suffix"]) other_years = [ int(_tf["prefix"] + _tf["suffix"]) for _tf in line_tfs if _tf["like_year"] is True and _tf["idx"] != tok_idx ] if other_years: tf["is_max_line_year"] = all(year > oyr for oyr in other_years) feature_sequence.append(tf) return feature_sequence
def __init__(self, directory, pmm): self.directory = directory self.velocities = np.load(directory + '/para_velocity_input.npy') self.vec_addresses = np.load(directory + '/para_vec_address.npy') self.spacing = np.load(directory + '/spacing.npy') self.non_nan_velocity_indices = np.load(directory + '/no_nan_inds.npy') self.model = pmm.model self.accelerations = [[ divide_arrays(a, b) for a, b in sliding_window(2, v) ] for v in self.velocities]
def findallsubseqs(a: Iterable[T], b: Iterable[T], overlap: bool = False) -> Iterable[int]: x = iter2seq(a) if len(x) == 0: return start = 0 for pos, y in enumerate(sliding_window(len(x), b)): if (overlap or pos >= start) and all(m == n for m, n in zip(x, y)): yield pos start = pos + len(x)
def stim_conditionals(data, conditioner_stat, stat, n_smallest): hunt_id_list = data["Hunt ID"] para_stat = data[stat] para_cstat = data[conditioner_stat] hunt_id_limits = np.where(np.diff(hunt_id_list) != 0)[0] + 1 stat_per_hunt = [] for firstind, secondind in sliding_window(2, hunt_id_limits): minstat_args = np.argsort( para_cstat[firstind:secondind])[0:n_smallest] + firstind stat_per_hunt += para_stat[minstat_args].tolist() stat_per_hunt = np.array(stat_per_hunt) return stat_per_hunt[~np.isnan(stat_per_hunt)]
def featurize(tokens): """ Extract features from individual tokens as well as those that are dependent on the sequence thereof. Args: tokens (List[:class:`spacy.tokens.Token`]) Returns: List[Dict[str, obj]] """ tokens_features = [get_token_features(token) for token in tokens] if len(tokens_features) == 1: tokens_features[0]["_singleton"] = True return tokens_features else: feature_sequence = [] line_idx_windows = parse_utils.get_line_token_idxs(tokens_features) prev_line_idx, next_line_idx = next(line_idx_windows) follows_bullet = False n_pad_l, n_pad_r = 3, 2 tokens_features = parse_utils.pad_tokens_features(tokens_features, n_left=n_pad_l, n_right=n_pad_r) tf_windows = itertoolz.sliding_window(n_pad_l + n_pad_r + 1, tokens_features) for ppprev_tf, pprev_tf, prev_tf, curr_tf, next_tf, nnext_tf in tf_windows: tf = curr_tf.copy() # add features from surrounding tokens, for context tf["ppprev"] = ppprev_tf tf["pprev"] = pprev_tf tf["prev"] = prev_tf tf["next"] = next_tf tf["nnext"] = nnext_tf # add features dependent on this token's position within the sequence # and its relationship to other tokens tok_idx = tf["idx"] if tf["is_newline"] and tf["idx"] > 0: prev_line_idx, next_line_idx = next(line_idx_windows) follows_bullet = False tf["tok_line_idx"] = tok_idx - prev_line_idx tf["follows_bullet"] = follows_bullet # bullets have is_group_sep_text, but they aren't group separators # at least not in the sense we want here; so, +2 to the previous newline idx # ensures that bullets are not counted in this feature line_tfs_so_far = tokens_features[prev_line_idx + n_pad_l + 2:tok_idx + n_pad_l] if any(_tf["is_group_sep_text"] for _tf in line_tfs_so_far): tf["follows_group_sep"] = True else: tf["follows_group_sep"] = False feature_sequence.append(tf) return feature_sequence
def calc_class_sums(cl): V = datamodel.get_class_v_cycle(cl) cycle_parts = datamodel.get_class_parts_limits(cl, edges=True) prev = (0, 0) for partno, (start, end) in enumerate(itz.sliding_window(2, cycle_parts), 1): start += start_offset end += end_offset sums = calc_v_sums(V.loc[start:end]) cums = calc_v_sums(V.loc[start:end], prev) results.append((cl, f"part-{partno}", *sums, *cums)) prev = cums return results
def sliding_window_filled(seq, n, pad_before=False, pad_after=False, fillvalue=None): """ A sliding window with optional padding on either end.. Args: seq(iter): an iterator or something that can be turned into an iterator n(int): number of generators to create as lagged pad_before(bool): whether to continue zipping along the longest generator pad_after(bool): whether to continue zipping along the longest generator fillvalue: value to use to fill generators shorter than the longest. Returns: generator object: a generator object that will return values from each iterator. Examples: >>> list(sliding_window_filled(range(5), 2)) [(0, 1), (1, 2), (2, 3), (3, 4)] >>> list(sliding_window_filled(range(5), 2, pad_after=True)) [(0, 1), (1, 2), (2, 3), (3, 4), (4, None)] >>> list(sliding_window_filled(range(5), 2, pad_before=True, pad_after=True)) [(None, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, None)] """ if pad_before and pad_after: seq = pad(seq, before=(n - 1), after=(n - 1), fill=fillvalue) elif pad_before: seq = pad(seq, before=(n - 1), fill=fillvalue) elif pad_after: seq = pad(seq, after=(n - 1), fill=fillvalue) return (sliding_window(n, seq))
def padded_sliding_window( func: Callable[[Iterable[int]], float], cycle_times: List[int] ) -> List[float]: if len(cycle_times) < 5: return [func(cycle_times) for _ in cycle_times] window_size = max(int(len(cycle_times) * 0.2), 5) if window_size % 2 == 0: window_size += 1 sliding_windows = [ func(window) for window in it.sliding_window(window_size, cycle_times) ] subwindow = int(window_size // 2) return ( [sliding_windows[0]] * subwindow + sliding_windows + [sliding_windows[-1]] * subwindow )
def get_tlines(self, key, centrality, temp): from toolz.itertoolz import sliding_window from ROOT import TLine try: return self.saved_tlines[(key, centrality, temp)] except KeyError: pass self.saved_tlines[(key, centrality, temp)] = tlines = [] line_dict = self.data[key][centrality][temp] for p1, p2 in sliding_window(2, zip(*line_dict.values())): tlines.append(TLine(*p1, *p2)) # line_dict = self.data[key][centrality][temp] # tlines.extend(TLine(*p1, *p2) # for p1, p2 in sliding_window(2, zip(*line_dict.values()))) return tlines
def __init__(self, drcts): self.all_para_velocities = concat_para_velocities(drcts) self.all_para_accelerations = [[ divide_arrays(a, b) for a, b in sliding_window(2, v) ] for v in self.all_para_velocities] self.all_accel_mags = [[magvector(a) for a in accs] for accs in self.all_para_accelerations] self.velocity_mags = [[magvector(v) for v in vecs] for vecs in self.all_para_velocities] self.mag_limit = np.percentile(np.concatenate(self.all_accel_mags), 99) self.len_simulation = 500 self.accels_filt = [] self.model = [] for ac, ac_m in zip(self.all_para_accelerations, self.all_accel_mags): if not (np.array(ac_m) > self.mag_limit).any(): self.accels_filt.append(ac) else: filt_acvec = [] for ac_vec, ac_mag in zip(ac, ac_m): if ac_mag < self.mag_limit: filt_acvec.append(ac_vec) self.accels_filt.append(filt_acvec)
def subrange(start, stop=None, step=None, substep=None): """ Generates start and stop values for each subrange. Args: start(int): First value in range (or last if only specified value) stop(int): Last value in range step(int): Step between each range substep(int): Step within each range Yields: range: A subrange within the larger range. Examples: >>> list(map(list, subrange(5))) [[0], [1], [2], [3], [4]] >>> list(map(list, subrange(0, 12, 3, 2))) [[0, 2], [3, 5], [6, 8], [9, 11]] """ if stop is None: stop = start start = 0 if step is None: step = 1 if substep is None: substep = 1 range_ends = itertools.chain(range(start, stop, step), [stop]) for i, j in sliding_window(2, range_ends): yield (range(i, j, substep))
def plot_xy_experiment(b_list, b_diams, exp_type, drc): fig = pl.figure() axes = fig.add_subplot(111, axisbg='.75') axes.grid(False) for br, bd in zip(b_list, b_diams): barrier_x = br[0] barrier_y = br[1] barrier_diameter = bd barrier_plot = pl.Circle((barrier_x, barrier_y), barrier_diameter / 2, fc='r') axes.add_artist(barrier_plot) xcoords, ycoords = get_xy(exp_type, drc) v_from_center = [] for crd in zip(xcoords, ycoords): vector = np.array(crd) center_mag = magvector_center(vector) v_from_center.append(center_mag) delta_center_mag = [b - a for a, b in sliding_window(2, v_from_center)] axes.plot(xcoords, ycoords) axes.axis('equal') pl.show() return xcoords, ycoords, v_from_center, delta_center_mag
def weekchunks(start, until=None, until_days_ago=0, date_format='%Y-%m-%d'): '''Generate date strings in weekly chunks between two dates. Args: start (str): Sensibly formatted datestring (format to be guessed by pd) until (str): Another datestring. Default=today. until_days_ago (str): if until is not specified, this indicates how many days ago to consider. Default=0. if until is specified, this indicates an offset of days before the until date. date_format (str): Date format of the output date strings. Returns: chunk_pairs (list): List of pairs of string, representing the start and end of weeks. ''' until = Timestamp(until).to_pydatetime( ) if until is not None else dt.now() - timedelta(days=until_days_ago) start = Timestamp(start).to_pydatetime() chunks = [ dt.strftime(_date, date_format) for _date in rrule.rrule(rrule.WEEKLY, dtstart=start, until=until) ] if len(chunks) == 1: # the less-than-one-week case _until = dt.strftime(until, date_format) chunks.append(_until) chunk_pairs = list(sliding_window(2, chunks)) return chunk_pairs
def get_line_token_idxs(tokens_features): """ Get the [start, stop) indexes for all lines in ``tokens_features``, such that ``tokens_features[start : stop]`` corresponds to one line of featurized tokens. Args: tokens_features (List[Dict[str, obj]]: Sequence of featurized tokens, as produced by :func:`get_token_features_base()`. Yields: Tuple[int, int] """ idxs_newlines = [tf["idx"] for tf in tokens_features if tf["is_newline"]] if not idxs_newlines: yield (0, len(tokens_features)) else: if idxs_newlines[0] != 0: idxs_newlines.insert(0, 0) if idxs_newlines[-1] != len(tokens_features): idxs_newlines.append(len(tokens_features)) for idx1, idx2 in itertoolz.sliding_window(2, idxs_newlines): yield (idx1, idx2)
def filtered_window(seq, n): return filter(filter_func, sliding_window(n, seq))
def curry(points): return \ agg( function(pointA, pointB) for pointA, pointB in sliding_window(2, points))
elem, ['predictions'], compose(list, partial(take, args.predictions_limit))), ujson.load(args.dataset_file))) sections = groupby(lambda x: tuple(map(x.get, ['make', 'model'])), dataset).items() evaluation_base_url = f'https://storage.cloud.google.com/dev_visual_search/evaluations/output/by-id/{args.evaluation_id}' def link_to_page(key): if key is None: return None make, model = key return f'{evaluation_base_url}/prediction-{make}-{model}.html' for prev, current, next in sliding_window( 3, cons(None, concat([sections, [None]]))): key, section = current make, model = key prev_key, _ = prev if prev is not None else (None, None) next_key, _ = next if next is not None else (None, None) page = to_page( section, { 'prev': link_to_page(prev_key), 'parent': '', 'next': link_to_page(next_key) }, { 'title': f'Prediction report for {make} / {model}', 'evaluation_id': args.evaluation_id, 'image_base_path': args.image_base_path,
def class_boundaries(wltc_class, offset_start, offset_end): cycle_parts = datamodel.get_class_parts_limits(wltc_class, edges=True) return (f"[{start + offset_start}, {end + offset_end}]" for start, end in itz.sliding_window(2, cycle_parts))
def part_one(): result = {1: 0, 2: 0, 3: 0} for x, y in itertoolz.sliding_window(2, active_input): result[y - x] = result[y - x] + 1 print(result[1] * result[3])
def test_sliding_window(): assert list(sliding_window(2, [1, 2, 3, 4])) == [(1, 2), (2, 3), (3, 4)] assert list(sliding_window(3, [1, 2, 3, 4])) == [(1, 2, 3), (2, 3, 4)]
def test_sliding_window_of_short_iterator(): assert list(sliding_window(3, [1, 2])) == []
def filter_altitude_speed(positions): return ( p1 for p1, p2 in sliding_window(2, positions) if check_altitude_speed(p2, p1) )