def _find_sea_monsters_in_single_orientation(image, regex_mode): image = [''.join(row) for row in image] if regex_mode == 'chunked': matches = 0 for rows in more_itertools.windowed(image, 3): window_iters = [ more_itertools.windowed(row, 20) for row in rows ] for section in zip(*window_iters): section_str = [''.join(line) for line in section] pattern_line = zip(JurassicJigsaw.SEA_MONSTER_PATTERN, section_str) if all( regex.fullmatch(pattern, line) for pattern, line in pattern_line): matches += 1 return matches elif regex_mode == 'full': image_str = '\n'.join(image) len_pattern = len(JurassicJigsaw.SEA_MONSTER_PATTERN[0]) spaces_between_rows = '.{{{}}}'.format( len(image) - len_pattern + 1) pattern = f'{spaces_between_rows}'.join( JurassicJigsaw.SEA_MONSTER_PATTERN) return len( regex.findall(pattern, image_str, flags=regex.DOTALL, overlapped=True))
def main(input, part): # Iterator of lines lines = map(lambda x: x.strip(), input.readlines()) # Iterator of integers integers = list(map(int, lines)) # Iterator of windows windows = windowed(integers, 26) # Iterator of invalid windows invalid = filterfalse(check_validity, windows) # Iterator of invalid numbers invalid = map(itemgetter(25), invalid) # First invalid number first_invalid = next(invalid) print(first_invalid) for num_factors in count(2): windows = list(windowed(integers, num_factors)) sums = map(sum, windows) decorated_sums = zip(sums, windows) decorated_sums = filter(partial(check_decorated_sum, first_invalid), decorated_sums) passed_windows = map(itemgetter(1), decorated_sums) solution = next(passed_windows, None) if solution: minimum, maximum = min(solution), max(solution) print(minimum + maximum) exit(1)
def rulefinder(grid): """Find all the rules in the grid""" N, M = len(grid), len(grid[0]) rules = [] # Check every candidate against the grammar # Noun is (Noun OR Property) isrule = lambda t: (isnoun(t[0]) and isis(t[1]) and (isnoun(t[2]) or isproperty(t[2]))) # Horizontal rules if M >= 3: for row in grid: for t in windowed(row, 3): if isrule(t): rules.append((t[0], t[2])) # Vertical rules if N >= 3: for col in zip(*grid): for t in windowed(col, 3): if isrule(t): rules.append((t[0], t[2])) # Sort according to the first letter # rules = sorted(rules,key=lambda x:x[0]) rules = sorted(rules) return rules
def part_2(raw: str, ints: list[int], strs: list[str]): return sum([ int(b > a) for a, b in windowed( [sum(triple) for triple in windowed(ints, 3, fillvalue=0)], 2, fillvalue=0, ) ])
def ring_system_decomposed_atom_keys(rsy, rng_keys=None, check=True): """ decomposed atom keys for a polycyclic ring system in a graph The ring system is decomposed into a ring and a series of arcs that can be used to successively construct the system :param rsy: the ring system :param rng_keys: keys for the first ring in the decomposition; if None, the smallest ring in the system will be chosen """ if rng_keys is None: rng = sorted(rings(rsy), key=atom_count)[0] rng_keys = sorted_ring_atom_keys(rng) # check the arguments, if requested if check: # check that the graph is connected assert is_connected(rsy), "Ring system can't be disconnected." # check that the graph is actually a ring system assert is_ring_system(rsy), ( f"This is not a ring system graph:\n{string(rsy):s}") # check that rng is a subgraph of rsy assert set(rng_keys) <= atom_keys(rsy), ( f"{string(rsy, one_indexed=False)}\n^ " "Rings system doesn't contain ring as subgraph:\n" f"{str(rng_keys)}") bnd_keys = list(mit.windowed(rng_keys + rng_keys[:1], 2)) # Remove bonds for the ring rsy = remove_bonds(rsy, bnd_keys) keys_lst = [rng_keys] done_keys = set(rng_keys) while bond_keys(rsy): # Determine shortest paths for the graph with one more ring/arc deleted sp_dct = atom_shortest_paths(rsy) # The shortest path will be the next shortest arc in the system arc_keys = min( (sp_dct[i][j] for i, j in itertools.combinations(done_keys, 2) if j in sp_dct[i]), key=len) # Add this arc to the list keys_lst.append(arc_keys) # Add these keys to the list of done keys done_keys |= set(arc_keys) # Delete tbond keys for the new arc and continue to the next iteration bnd_keys = list(map(frozenset, mit.windowed(arc_keys, 2))) rsy = remove_bonds(rsy, bnd_keys) keys_lst = tuple(map(tuple, keys_lst)) return keys_lst
def largest_power_3_by_3(self): powers = {} for square_split_coords in itertools.product( more_itertools.windowed(range(1, 300), 3), more_itertools.windowed(range(1, 300), 3)): square_coords = itertools.product(*square_split_coords) (top_left, ), square_coords = more_itertools.spy(square_coords) power = sum(self.fuel_cells.map[Coords(x, y)] for x, y in square_coords) powers[Coords(*top_left)] = power largest_power = max(powers, key=lambda coords: powers[coords]) return largest_power.x, largest_power.y
def _board_iterator_helper(num_cols, num_rows, length): cols, rows = range(num_cols), range(num_rows) for col in cols: yield from (tuple((col, r) for r in row) for row in windowed(rows, length)) for row in rows: yield from (tuple((c, row) for c in col) for col in windowed(cols, WINNING_LENGTH)) for row_diag, col_diag in itertools.product(range(num_rows - length + 1), range(num_cols - length + 1)): yield tuple((col_diag + d, row_diag + d) for d in range(length)) yield tuple((col_diag + d, ~row_diag - d) for d in range(length))
def _find_sea_monsters(image): image = [''.join(row) for row in image] matches = 0 for rows in more_itertools.windowed(image, 3): window_iters = [more_itertools.windowed(row, 20) for row in rows] for section in zip(*window_iters): section_str = [''.join(line) for line in section] pattern_line = zip(JurassicJigsaw.SEA_MONSTER_PATTERN, section_str) match = all( re.fullmatch(pattern, line) for pattern, line in pattern_line) if match: matches += 1 return matches
def extend(self, nodes: Sequence[Hashable], start: Union[Hashable, Sequence[Hashable]] = None) -> None: """Adds 'nodes' to the stored data structure. Args: nodes (Sequence[Hashable]): names of items to add. start (Union[Hashable, Sequence[Hashable]]): where to add new node to. If there are multiple nodes in 'start', 'node' will be added to each of the starting points. If 'start' is None, 'endpoints' will be used. Defaults to None. """ if any(isinstance(n, (list, tuple)) for n in nodes): nodes = tuple(more_itertools.collapse(nodes)) if start is None: start = self.endpoints if start: for starting in more_itertools.always_iterable(start): self.connect(start=starting, stop=nodes[0]) else: self.add(nodes[0]) edges = more_itertools.windowed(nodes, 2) for edge_pair in edges: self.connect(start=edge_pair[0], stop=edge_pair[1]) return self
def __call__(self, data: np.ndarray) -> np.ndarray: res = [] for window in more_itertools.windowed(data, self.size, step=1): h = int(len(window) / 2) y1 = window[0:h + 1] y2 = np.flip(window[h:2 * h + 1]) x = np.arange(0, h + 1) * 1.0 y1x1 = np.dot(y1, x) y2x2 = np.dot(y2, x) norm = np.dot(x, x) alpha = (y1x1 + y2x2) / norm y_min = np.mean(window) - alpha * len(window) / 4.0 if alpha < 0: alpha = 0 triangle = np.asarray( [min(i, 2 * h - i) * alpha for i in range(2 * h + 1)] ) res.append(np.dot(triangle, window - y_min)) return np.asarray(res)
def create_split(df, study_ids, config): x = [] gt = [] for study_id in tqdm(study_ids): # Get slices for current study_id study_df = df[df.study_id == study_id].sort_values('slice_num') study_preds = study_df[config.pred_columns].to_numpy() study_gt = study_df[config.gt_columns].to_numpy() study_preds = np.pad(study_preds, ((config.num_slices // 2, config.num_slices // 2), (0, 0))) new_indices = list( flatten(windowed(range(study_preds.shape[0]), config.num_slices))) study_x = study_preds[new_indices].reshape(study_gt.shape[0], config.predictions_in) x.append(study_x) gt.append(study_gt) x = np.concatenate(x) gt = np.concatenate(gt) return x, gt
def roll_window(array, window_length, fn=None): """ Takes in a list and returns a numpy vstack holding rolling windows of length ``window_length``. :param array: A list, tuple, numpy array, etc. :param int window_length: Size of the window :param lambda fn: A function evaluating a bool –– used for prime contour calculations. :return: A rolling windows of array, each of length `window_length`. :rtype: numpy.vstack >>> composers = np.array(['Mozart', 'Monteverdi', 'Messiaen', 'Mahler', 'MacDowell', 'Massenet']) >>> for window in roll_window(array=composers, window_length=3): ... print(window) ('Mozart', 'Monteverdi', 'Messiaen') ('Monteverdi', 'Messiaen', 'Mahler') ('Messiaen', 'Mahler', 'MacDowell') ('Mahler', 'MacDowell', 'Massenet') >>> # This function also allows the use of a function input for filtering. >>> # Say we wanted to iterate over the elements of the following collection that have >>> # 1s in the set. >>> cseg_data = [[0, {1, -1}], [4, {1}], [2, {-1}], [5, {1}], [5, {1}], [1, {1, -1}]] >>> fn = lambda x: 1 in x[1] >>> for this_frame in roll_window(cseg_data, 3, fn): ... print(this_frame) ([0, {1, -1}], [4, {1}], [5, {1}]) ([4, {1}], [5, {1}], [5, {1}]) ([5, {1}], [5, {1}], [1, {1, -1}]) """ assert type(window_length) == int if fn is not None: array = [x for x in array if fn(x) is True] windows = list(windowed(seq=array, n=window_length, step=1)) return windows
def addLettersToDates(bibliography): """ example: 0 einstein, 1912a 1 einstein, 1912 1 einstein, 1912b 2 einstein, 1912 2 einstein, 1912c 3 einstein, 1912 3 einstein, 1912d 4 schrodinger, 1920 """ count = 0 tail = False for thisRecord, nextRecord in windowed(bibliography, 2): if 'author' in thisRecord and 'author' in nextRecord and 'date' in thisRecord and 'date' in nextRecord: if thisRecord['date'] != 'n.d.' and nextRecord['date'] != 'n.d.': if thisRecord['author'] == nextRecord['author']: if thisRecord['date'] == nextRecord['date']: thisRecord['date']['year'] += ascii_lowercase[count] thisRecord['sortingStr'] = makeSortingString( thisRecord) count += 1 tail = True continue if tail is True: thisRecord['date']['year'] += ascii_lowercase[count] thisRecord['sortingStr'] = makeSortingString(thisRecord) tail = False
def solve(_input): for i, window in enumerate(windowed(table, len(_input))): if list(window) == _input: return i while True: s = sum(table[e] for e in elves_pos) if s >= 10: table.append((s // 10) % 10) table.append(s % 10) for elf_idx in range(len(elves_pos)): pos = elves_pos[elf_idx] pos += 1 + table[pos] pos %= len(table) elves_pos[elf_idx] = pos # print(' '.join( # f'({r})' if r_idx == elves_pos[0] else (f'[{r}]' if r_idx == elves_pos[1] else f'{r:^3}') # for r_idx, r in enumerate(table) # )) # print(table) # print(elves_pos) print(_input, table[len(table) - len(_input) - 1: len(table) - 1]) if _input == table[len(table) - len(_input) - 1: len(table) - 1]: return len(table) - len(_input) - 1 if _input == table[len(table) - len(_input):]: return len(table) - len(_input)
def get_assembly_points(agouti_path, source, oriented=False): result = [] for left, right in more_itertools.windowed(agouti_path, n=2): if oriented: if left.startswith("-"): seq1 = left[1:] seq1_or = "-" else: seq1 = left seq1_or = "+" if right.startswith("-"): seq2 = right[1:] seq2_or = "-" else: seq2 = right seq2_or = "+" else: seq1 = left seq2 = right seq1_or = "?" seq2_or = "?" ap = AssemblyPoint(seq1=seq1, seq2=seq2, seq1_or=seq1_or, seq2_or=seq2_or, sources=[str(source)]) result.append(ap) return result
def part_1(data, preamblesize=25): r''' >>> part_1("""\ ... 35 ... 20 ... 15 ... 25 ... 47 ... 40 ... 62 ... 55 ... 65 ... 95 ... 102 ... 117 ... 150 ... 182 ... 127 ... 219 ... 299 ... 277 ... 309 ... 576""", 5) 127 ''' data = [int(l) for l in data.splitlines()] for *pres, curr in mit.windowed(data, preamblesize+1): if not any(x+y == curr for x,y in it.combinations(pres, 2)): return curr
def from_seconds(cls, events: List[BPMAtSecond]) -> TimeMap: """Create a time map from a list of BPM changes with time positions given in seconds. The first BPM implicitely happens at beat zero""" if not events: raise ValueError("No BPM defined") grouped_by_time = group_by(events, key=lambda e: e.seconds) for time, events_at_time in grouped_by_time.items(): if len(events_at_time) > 1: raise ValueError(f"Multiple BPMs defined at {time} seconds : {events}") # take the first BPM change then compute from there sorted_events = sorted(events, key=lambda e: e.seconds) first_event = sorted_events[0] current_beat = Fraction(0) bpm_changes = [BPMChange(current_beat, first_event.seconds, first_event.BPM)] for previous, current in windowed(sorted_events, 2): if previous is None or current is None: continue seconds_since_last_event = current.seconds - previous.seconds beats_since_last_event = ( previous.BPM * seconds_since_last_event ) / Fraction(60) current_beat += beats_since_last_event bpm_change = BPMChange(current_beat, current.seconds, current.BPM) bpm_changes.append(bpm_change) return cls( events_by_beats=SortedKeyList(bpm_changes, key=lambda b: b.beats), events_by_seconds=SortedKeyList(bpm_changes, key=lambda b: b.seconds), )
def find_invalid(XMAS_Code,window_size): windows=zip(list(windowed(XMAS_Code,window_size)),XMAS_Code[window_size:]) for x in windows: if x[1] not in map(sum,combinations(x[0],2)): invalid=x[1] break return invalid
def ring_forming_scission_constraint_coordinates(rxn, zma): """ Obtain the constraint coordinates for a ring-forming scission :param rxn: a Reaction object :returns: the names of the constraint coordinates in the z-matrix :rtype: str """ chain_keys = ring_forming_scission_chain(rxn) ang_keys_lst = sorted(mit.windowed(chain_keys[1:], 3)) dih_keys_lst = sorted(mit.windowed(chain_keys, 4)) ang_names = [automol.zmat.central_angle_coordinate_name(zma, *ks) for ks in ang_keys_lst] dih_names = [automol.zmat.dihedral_angle_coordinate_name(zma, *ks) for ks in dih_keys_lst] const_names = tuple(ang_names + dih_names) return const_names
def _smooth(a: np.array) -> np.array: new_pts = [] for w in windowed(a, 2): q = (0.75 * w[0]) + (0.25 * w[1]) r = (0.25 * w[0]) + (0.75 * w[1]) new_pts.extend([q, r]) return np.array(new_pts)
def part_2(raw_input, steps=10): pairs = {} initial = "" for row in filter(None, raw_input.splitlines()): if "->" in row: source, new = row.split(" -> ") pairs[source] = (f"{source[0]}{new}", f"{new}{source[1]}") elif row: initial = row counts = defaultdict(int) for pair in ("".join(t) for t in windowed(initial, 2)): counts[pair] += 1 for _ in range(steps): cur_counts = dict(counts) counts = defaultdict(int) for pair, count in cur_counts.items(): for new_pair in pairs[pair]: counts[new_pair] += count count_by_letter = defaultdict(int) counts = dict(counts) for pair, count in counts.items(): count_by_letter[pair[0]] += count count_by_letter[pair[1]] += count count_by_letter[initial[0]] += 1 count_by_letter[initial[-1]] += 1 return int((max(count_by_letter.values()) - min(count_by_letter.values())) / 2)
def main2( filepath: str = "./data/raw/day9_sample.txt", preamble_length: int = 5, idx: int = 0, ): """Solution 2 for day 9 Args: filepath (str, optional): Defaults to './data/raw/day9_sample.txt'. preamble_length (int, optional): Defaults to 5. idx (int, optional): The id of the invalid number from main(). Defaults to 0. """ data = get_data(filepath) invalid_number = data[preamble_length:][idx] values = data[:idx + preamble_length] # find any set of contiguous values in values that sum to invalid_number for seq_len in range(2, len(values) + 1): res = map( check_sum, windowed(values, seq_len), [invalid_number for _ in range(len(values) + 1 - seq_len)], ) res = list(res) if any(res): idx = res.index(True) vec = values[idx:seq_len + idx] return vec return []
def branchify(self, nodes: Sequence[Sequence[Hashable]], start: Union[Hashable, Sequence[Hashable]] = None) -> None: """Adds parallel paths to the stored data structure. Subclasses should ordinarily provide their own methods. Args: nodes (Sequence[Sequence[Hashable]]): a list of list of nodes which should have a Cartesian product determined and extended to the stored data structure. start (Union[Hashable, Sequence[Hashable]]): where to add new node to. If there are multiple nodes in 'start', 'node' will be added to each of the starting points. If 'start' is None, 'endpoints' will be used. Defaults to None. """ if start is None: start = copy.deepcopy(self.endpoints) paths = list(map(list, itertools.product(*nodes))) for path in paths: if start: for starting in more_itertools.always_iterable(start): self.add_edge(start=starting, stop=path[0]) elif path[0] not in self.contents: self.add_node(path[0]) edges = more_itertools.windowed(path, 2) for edge_pair in edges: self.add_edge(start=edge_pair[0], stop=edge_pair[1]) return self
def _setup_controllers(self, ego_vehicle_location, speed_mps, route, column_ahead_of_ego_m): resolution_m = np.median([ t1.location.distance(t2.location) for t1, t2 in windowed(route, 2) ]) m2idx = 1 / resolution_m ego_vehicle_idx = int( np.argmin( [t.location.distance(ego_vehicle_location) for t in route])) column_start_idx = ego_vehicle_idx + int(column_ahead_of_ego_m * m2idx) ncontrollers = len(self._controllers) current_idx = column_start_idx cmds = [] for idx, controller in enumerate(self._controllers.values()): initial_location = route[current_idx].location cmds.extend( controller.reset(speed_mps=speed_mps, route=route, initial_location=initial_location)) controllers_left = ncontrollers - idx offset = _calc_offset(current_idx, controllers_left, resolution_m) current_idx = current_idx - offset assert current_idx >= 0 return cmds
def _move_env_vehicles(self, ego_vehicle_location: carla.Location): column_end_location = None column_end_idx = None route_resolution_m = None cmds = [] for controller_idx, controller in enumerate(self._controllers.values()): finished, cmds_ = controller.step() if not finished and _is_behind_ego_or_inside_birdview(controller, ego_vehicle_location): cmds.extend(cmds_) else: if column_end_location is None: # obtain location of last vehicle in column idxes = [c.idx for c in self._controllers.values()] locations = [c.location for c in self._controllers.values()] column_end_location = locations[int(np.argmin(idxes))] # resolution of not resampled route route_resolution_m = \ np.median([t1.location.distance(t2.location) for t1, t2 in windowed(self._route, 2)]) # idx of nearest point on not resamples route column_end_idx = np.argmin([t.location.distance(column_end_location) for t in self._route]) # obtain reset location offset = _calc_offset(column_end_idx, 1, route_resolution_m) column_end_idx = column_end_idx - offset assert column_end_idx >= 0 column_end_location = self._route[column_end_idx].location cmds.extend(controller.reset(initial_location=column_end_location)) if cmds: self._client.apply_batch_sync(cmds, do_tick=False)
def _generate_candidates(self, cas: Cas, n: int): # We generate token n-grams for tokens in mit.windowed(cas.select(TOKEN_TYPE), n): begin = tokens[0].begin end = tokens[-1].end text = cas.sofa_string[begin:end] yield (begin, end, text)
def find_gender_adj(document, gender_to_find, word_window=5, genders_to_exclude=None): # pylint: disable=too-many-locals """ Takes in a document and a Gender to look for, and returns a dictionary of adjectives that appear within a window of 5 words around each identifier :param document: Document :param gender_to_find: Gender :param word_window: number of words to search for in either direction of a gender instance :param genders_to_exclude: list of Genders to exclude, or None :return: dict of adjectives that appear around pronouns mapped to the number of occurrences >>> from corpus_analysis import document >>> from pathlib import Path >>> from gender_analysis import common >>> document_metadata = {'author': 'Hawthorne, Nathaniel', 'title': 'Scarlet Letter', 'date': \ '1966', 'filename': 'test_text_7.txt', 'filepath': Path(common.TEST_DATA_PATH, \ 'document_test_files', 'test_text_7.txt')} >>> scarlett = document.Document(document_metadata) >>> find_gender_adj(scarlett, common.MALE, genders_to_exclude=[common.FEMALE]) {'handsome': 3, 'sad': 1} """ output = {} identifiers_to_exclude = [] text = document.get_tokenized_text() adj_tags = ["JJ", "JJR", "JJS"] identifiers_to_find = gender_to_find.identifiers if genders_to_exclude is None: genders_to_exclude = list() for gender in genders_to_exclude: for identifier in gender.identifiers: identifiers_to_exclude.append(identifier) for words in windowed(text, 2 * word_window + 1): if not words[word_window].lower() in identifiers_to_find: continue if bool(set(words) & set(identifiers_to_exclude)): continue words = list(words) for index, word in enumerate(words): words[index] = word.lower() tags = nltk.pos_tag(words) for tag_index, _ in enumerate(tags): if tags[tag_index][1] in adj_tags: word = words[tag_index] if word in output.keys(): output[word] += 1 else: output[word] = 1 return output
def positions_to_transforms( positions: List[Union[Vector3, Vector2]]) -> List[Transform]: """Add orientation data to positions using normals to determine angles""" positions = [convert_to_vector2(w) for w in positions] assert len(positions) > 1 guarded_positions = [ positions[0] - (positions[1] - positions[0]), *positions, positions[-1] + (positions[-1] - positions[-2]), ] smooth_orientations = [] for p1, p2, p3 in windowed(guarded_positions, 3): dir_1 = (p2 - p1).normalized() dir_2 = (p3 - p2).normalized() tangent_dir = (dir_1 + dir_2).normalized() smooth_orientations.append(tangent_dir) assert len(positions) == len(smooth_orientations), \ f"Got {len(positions)} and {len(smooth_orientations)}" return [ Transform(p.to_vector3(0.), o) for (p, o) in zip(positions, smooth_orientations) ]
def next_password(inp): inps = [ord(x) - 97 for x in inp] while True: inps[-1] += 1 for x in range(len(inps) - 1, -1, -1): if inps[x] > 25: inps[x] = 0 if x > 0: inps[x - 1] += 1 if inps[x] in [8, 14, 11]: inps[x] += 1 if x < len(inps) - 1: for y in range(x + 1, len(inps)): inps[y] = 0 seq = False for x, y, z in windowed(inps, 3): if x + 2 == y + 1 == z: seq = True break pairs = False pair = -1 for x in range(len(inp) - 1): if inps[x] == inps[x + 1] and not ( x > 0 and inps[x - 1] == inps[x]) and not ( x < len(inp) - 2 and inps[x + 2] == inps[x]): if pair == -1: pair = inps[x] elif pair != inps[x]: pairs = True break if pairs and seq: return "".join(chr(x + 97) for x in inps)
def find_gender_adj(document, female): """ Takes in a document and boolean indicating gender, and returns a dictionary of adjectives that appear within a window of 5 words around each pronoun :param: document: Document :param: female: boolean indicating whether to search for female adjectives (true) or male adjectives (false) :return: dictionary of adjectives that appear around pronouns mapped to the number of occurrences >>> from gender_analysis import document >>> from pathlib import Path >>> from gender_analysis import common >>> document_metadata = {'author': 'Hawthorne, Nathaniel', 'title': 'Scarlet Letter', 'date': '1966', ... 'filename': 'test_text_7.txt', 'filepath': Path(common.TEST_DATA_PATH, 'document_test_files', 'test_text_7.txt')} >>> scarlett = document.Document(document_metadata) >>> find_gender_adj(scarlett, False) {'handsome': 3, 'sad': 1} """ output = {} text = document.get_tokenized_text() if female: distances = female_instance_dist(document) pronouns1 = common.FEM_WORDS pronouns2 = common.MASC_WORDS else: distances = male_instance_dist(document) pronouns1 = common.MASC_WORDS pronouns2 = common.FEM_WORDS if len(distances) == 0: return {} elif len(distances) <= 3: lower_window_bound = 5 else: lower_window_bound = median( sorted(distances)[:int(len(distances) / 2)]) if not lower_window_bound >= 5: return "lower window bound less than 5" for l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11 in windowed(text, 11): l6 = l6.lower() if not l6 in pronouns1: continue words = [l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11] if bool(set(words) & set(pronouns2)): continue for index, word in enumerate(words): words[index] = word.lower() tags = nltk.pos_tag(words) for tag_index, tag in enumerate(tags): if tags[tag_index][1] == "JJ" or tags[tag_index][ 1] == "JJR" or tags[tag_index][1] == "JJS": word = words[tag_index] if word in output.keys(): output[word] += 1 else: output[word] = 1 return output
def get_assembly_points(agouti_path, source, oriented=False): result = [] for left, right in more_itertools.windowed(agouti_path, n=2): if oriented: if left.startswith("-"): seq1 = left[1:] seq1_or = "-" else: seq1 = left seq1_or = "+" if right.startswith("-"): seq2 = right[1:] seq2_or = "-" else: seq2 = right seq2_or = "+" else: seq1 = left seq2 = right seq1_or = "?" seq2_or = "?" ap = AssemblyPoint(seq1=seq1, seq2=seq2, seq1_or=seq1_or, seq2_or=seq2_or, sources=[str(source)]) result.append(ap) return result