def _assert_valid(self, visits): """Check if writes dont overlap other writes and reads""" # check that writes open and close consequently write_blocks = mo.split_at(visits, lambda x: x[1] == 'r') for write_block in write_blocks: for v1, v2 in mo.chunked(write_block, 2): self.assertEqual(v1[0], v2[0]) # check that reads open and close in groups between writes read_blocks = mo.split_at(visits, lambda x: x[1] == 'w') for read_block in read_blocks: for v1, v2 in mo.chunked(sorted(read_block), 2): self.assertEqual(v1[0], v2[0])
def shphrase(s): terms = [] splits = list(SH_JOINS.keys()) for idx, _t in enumerate(split_at(s, lambda x: x in splits, keep_separator=1)): t = "".join(_t) if idx % 2 == 0: terms.append("("+shterm(t)+")") else: terms.append(t) out = "" t1 = terms[0] i = 1 if i == len(terms): return t1 else: while i < len(terms): op_s = terms[i] if op_s in SH_JOINS: op = SH_JOINS[op_s] t2 = terms[i+1] for k, v in SH_BACKREFS.items(): t2 = t2.replace(k, f"({t1})") if callable(op): out += op(t1, t2) else: out += f"({t1}.{op[0]}({t2}))" i += 2 return out
def __parse_disposition_events(soup): events_title = soup.find("div", class_=SECTION_TITLE_CLASS, string="Events & Orders of the Court") events = list(events_title.parent.next_siblings) split_events = list(split_at(events, CaseParser.__is_other_events_and_hearings)) assert len(split_events) == 2 disposition_events, other_events = split_events return disposition_events
def cluster(self, ident: float = 0.95, out_path: str = 'dataset/raw/intervals.clusters.txt', n_threads: int = 0, max_mem_mb: int = 4000, min_seq_len: int = 50, input_path: t.Optional[str] = None) -> t.List[t.List[NamedInterval]]: def get_interval(cluster_rec: str, mapping: t.Mapping[t.Tuple[str, int, int], NamedInterval]) \ -> NamedInterval: rec = cluster_rec.split()[2].rstrip('...').lstrip('>') name = rec.split('|')[0] start, stop = map(int, rec.split('|')[1].split('-')) return mapping[(name, start, stop)] if self.rolled is None: raise RequiresAttributeError('rolled') if input_path is None: input_path = self.dump_intervals() cmd = f'cd-hit -i {input_path} -o {out_path} -c {ident} ' \ f'-T {n_threads} -M {max_mem_mb} -l {min_seq_len} -d 0' logging.info(f'Will run clustering command {cmd}') try: sp.run(cmd, shell=True, check=True) except sp.CalledProcessError as e: res = sp.run(cmd, shell=True, check=False, capture_output=True, text=True) raise RuntimeError(f'Command {cmd} failed with an error {e}, ' f'stdout {res.stdout} and stderr {res.stderr}') mapping_ = {(x.name, x.start, x.stop): x for x in chain.from_iterable(self.rolled)} with open(f'{out_path}.clstr') as f: clusters = list(map( lambda c: [get_interval(r, mapping_) for r in c], filter(bool, split_at(f, lambda x: x.startswith('>'))))) logging.info(f'Clustered intervals into {len(clusters)} clusters') self.clusters = clusters return clusters
def get_element_block(xml_string: str, first_name: str, second_name: str = None, include_initial: bool = True, include_final: bool = True) -> str: """ warning: use great caution if attempting to apply this function, or anything like it, to tags that that may appear more than once in the label. this _general type of_ approach to XML parsing works reliably only in the special case where tag names (or sequences of tag names, etc.) are unique (or their number of occurrences are otherwise precisely known) """ if second_name is None: element_names = [first_name] else: element_names = [first_name, second_name] split = tuple( split_at(xml_string.splitlines(), are_in(element_names, or_), keep_separator=True)) chunk = split[2] if include_initial: chunk = split[1] + chunk if include_final: chunk = chunk + split[3] return "\n".join(chunk)
def __init__(self, input_, recursive = False): self.players = [] self.decks = [] self.winner = None self.recursive = recursive if recursive: self.previous_positions = set() for player in split_at(input_, lambda x: not x.strip()): deck = [] for line in player: if line.startswith("Player"): self.players.append(line.split()[1].strip(":\n")) else: deck.append(int(line)) self.decks.append(deck)
def _get_pulldown(flags: List[dict]) -> Tuple[int, Optional[str]]: """ Get most commonly used Pulldown cycle and syntax string. Returns tuple (pulldown, cycle), or (0, None) if Pulldown is not used. """ # TODO: Find a safe way to get cycle, i.e. not resort to most common digit. # Previously I would do this code on all progressive rff indexes, but when it entered and # exited interlaced sections the right index vs left index were very far apart, messing up # the accuracy of the cycles. I cannot find out why my test source (Family Guy S01E01 USA # NTSC) is still having random different numbers in each (now progressive only) sections. sections = [ section for split in split_at([dict(x, i=n) for n, x in enumerate(flags)], lambda flag: not flag["progressive_frame"]) for section in [[flag["i"] for flag in split if flag["rff"] and flag["tff"]]] if section and len(section) > 1 ] if not sections: return 0, None cycle = Counter([ Counter([(right - left) for left, right in zip(indexes[::2], indexes[1::2]) ]).most_common(1)[0][0] for indexes in sections ]).most_common(1)[0][0] + 1 pulldown = ["2"] * math.floor(cycle / 2) if cycle % 2: pulldown.pop() pulldown.append("3") return cycle, ":".join(pulldown)
def __init__(self, ID, l): if l == 'a' or l == 'b': self.val = l self.ID = ID self.branches = list(split_at(l, lambda x: x = '|'))
def __enter__(self): self.file = open(self.input_path) self.file.__enter__() lines = self.file.readlines() blocks = list( split_at([s.rstrip('\n') for s in lines], lambda l: l == "")) return blocks if self.cons is None else map(lambda b: self.cons(b), blocks)
def convert_data(): _rules, _my_ticket, _tickets = list( mit.split_at(input_data, pred=lambda x: x == "")) _rules = get_rules(_rules) _my_ticket = list(map(int, _my_ticket[1].split(","))) _tickets = get_tickets(_tickets[1:]) return _rules, _my_ticket, _tickets
def get_words_from_hagen_dictionary() -> Iterable[Word]: with open('hagen-morph.txt', encoding='windows-1251') as file: rows = (line.split('|') for line in file) articles = mit.split_at(rows, lambda row: len(row) < 4) stripped_articles = map(strip_article, articles) stripped_article_rows = (row for article in stripped_articles for row in article) words = map(row_to_word, stripped_article_rows) for word in words: yield word
def process_input(input_str): passports = split_at(input_str, lambda x: x == '') def process_passports(passport): fields = flatten(map(lambda x: x.split(' '), passport)) split_fields = map(lambda x: x.split(':'), fields) filtered = filter(lambda x: x[0] != 'cid', split_fields) return dict(filtered) return map(process_passports, passports)
def read_input_files(input_file: str) -> tuple[list[str], list[str]]: """ Extracts a grammar list. """ with open(input_file) as input_fobj: rules, messages = more_itertools.split_at( input_fobj, lambda line: not line.strip()) rules = [r.strip() for r in rules] messages = [m.strip() for m in messages] return rules, messages
def read_input_files(input_file: str) -> list[list[str]]: """ Extracts a list of surveys from the input file. Each survey is returned as a list of individual response strings. The presence of a letter in the string indicates the yes-answer. """ with open(input_file) as input_fobj: surveys = [[word.strip() for word in chunk] for chunk in more_itertools.split_at( input_fobj, lambda line: not line.strip())] return surveys
def read_input_files(input_file: str) -> dict[int, Tile]: """ Extracts a dictionary mapping of input tile numbers to actual tiles. """ with open(input_file) as input_fobj: tiles = { parse_raw_head(head.strip()): Tile.from_raw_body(body) for head, *body in more_itertools.split_at( input_fobj, lambda line: not line.strip()) } return tiles
def read_input_files(input_file: str) -> tuple[DeckInfo, DeckInfo]: """ Extracts a pair of starting decks where each deck is a list of cards. """ with open(input_file) as input_fobj: fst, snd = more_itertools.split_at(input_fobj, pred=lambda line: not line.strip()) fst = DeckInfo.from_raw(fst) snd = DeckInfo.from_raw(snd) assert fst.player_name == 'Player 1' and snd.player_name == 'Player 2' return fst, snd
def no_of_horizontal_strokes(heights): """let heights be an array of positive integers describing heights of uniform-width rectangles placed one-by-one. imagine we are painting the rectangles horizontally with single strokes, the height of the brush is 1. the function computes the minimal number of strokes needed to paint the rectangles.""" strokes = min(heights) heights = [x - strokes for x in heights] for subarray in itertools.filterfalse(lambda part: part == [], split_at(heights, lambda n: n == 0)): strokes += no_of_horizontal_strokes(subarray) return strokes
def main(): rules, messages = split_at(fileinput.input(), lambda x: x.strip() == "") rules = Rules(rules) print(sum(rules.check_rule(message.strip(), "0") for message in messages)) rules.parse_rule("8: 42 | 42 8") rules.parse_rule("11: 42 31 | 42 11 31") print(sum(rules.check_rule(message.strip(), "0") for message in messages))
def test(test_data, write_path): corrects = 0 with open(write_path, 'w', encoding='utf8') as f: for i, d in enumerate(test_data): dy.renew_cg() output = generate(d[0], enc_fwd_lstm, enc_bwd_lstm, dec_lstm) to_write = list(split_at(d[0], lambda x: x == '+')) to_write[0] = ''.join(to_write[0]) to_write[1] = ';'.join(to_write[1]) to_write[2] = ';'.join(to_write[2]) f.write('\t'.join(to_write + [''.join(d[1]), output]) + '\n') if output == ''.join(d[1]): corrects += 1 print(corrects / len(test_data)) print(write_path, 'written') return corrects / len(test_data)
def from_bytes(index_bytes: bytearray): _index_type, index_bytes = uvarint.cut(1, index_bytes).integers[0], uvarint.cut(1, index_bytes).rest if _index_type != 0: raise Exception("This is not an EliasFano index!") _n, _lower_bits, _upper_bits, inferiors_byte_count, superiors_byte_count = uvarint.cut(5, index_bytes).integers bytes_iter = iter(uvarint.cut(5, index_bytes).rest) if inferiors_byte_count: inferiors = ("{0:0%db}" % (_n * _lower_bits)).format( int.from_bytes(take(inferiors_byte_count, bytes_iter), 'little', signed=False)) _inferiors = list(map(lambda inf: int("".join(inf), 2), windowed(iter(inferiors), _lower_bits, step=_lower_bits))) else: _inferiors = [] if superiors_byte_count: # superiors contains exactly '2**(upper_bits)' 0s and exactly 'n' 1s superiors = ("{0:0%db}" % (_n + 2 ** _upper_bits)).format( int.from_bytes(take(superiors_byte_count, bytes_iter), 'little', signed=False)) _superiors = list(map(lambda x: len(x), split_at(iter(superiors), lambda v: v == '0', keep_separator=False)))[0:-1] _superiors_prefixSums = list(accumulate(_superiors)) else: _superiors = [] _superiors_prefixSums = [] # TODO: implement appropriate constructor ef_index = EliasFano([0]) ef_index._n = _n ef_index._u = 2 ** max(1, _lower_bits + _upper_bits) ef_index._lower_bits = _lower_bits ef_index._upper_bits = _upper_bits ef_index._inferiors = _inferiors ef_index._superiors = _superiors ef_index._superiors_prefixSums = _superiors_prefixSums return ef_index
def md_to_dict(cls, lines): all_unit_data = list( more_itertools.split_at( lines, lambda x: x.strip('\n') == DATA_SEP ) ) learning_units = [] module_data = None for unit_lines in all_unit_data: if unit_lines: metadata = _metadata_to_dict(unit_lines) display_content = _display_content_to_dict(unit_lines) clean_display_content = display_content.strip('\n') try: unit_data = metadata.get('meta', {}).get('data', {}) order = unit_data.pop('order', 0) unit_type = metadata.get('meta', {}).get('type', None) unit_cls = LessonData if unit_type == 'lesson' else QuizData except KeyError: print("[ERROR] Data not found. Please check the MD file") unit_data.update( {getattr(unit_cls,'display_content_field'): clean_display_content,} ) if unit_type == 'lesson': learning_unit_data = { "order": order, "type": unit_type, "quiz": None, "lesson": unit_data, } learning_units.append(learning_unit_data) elif unit_type == 'quiz': learning_unit_data = { "order": order, "type": unit_type, "quiz": unit_data, "lesson": None, } learning_units.append(learning_unit_data) elif unit_type == 'module': module_data = ModuleData.md_to_dict(unit_lines) return {'module': module_data, 'learning_units': learning_units}
def run() -> None: with open_input(__file__) as file: lines = file.read().splitlines() groups: List[List[str]] = list(split_at(lines, lambda line: line == "")) # part 1 result = sum( pipe(chain.from_iterable(group), set, len) for group in groups) # type: ignore print(result) # part 2 result = sum( len(reduce(set.intersection, map(set, group))) # type: ignore for group in groups) print(result)
def main(input, part): # Iterator of lines lines = map(lambda x: x.strip(), input.readlines()) # Iterator of lists of strings blocks = split_at(lines, lambda x: x == "") # Iterator of lists of sets of chars sets = map(lambda block: map(set, block), blocks) if part == "1": # Set of any yes (union) set_combiner = set.union elif part == "2": # Set of all yes (intersection) set_combiner = set.intersection # Iterator of sets of chars (according to set_combiner function) sets = map(apply(set_combiner), sets) # Iterator of ints (counts of sets) count_yes_answers = map(len, sets) # Summation of all ints (total count) count_yes_answers = sum(count_yes_answers) print(count_yes_answers)
def _topology_builder( self, coordinates: List[Tuple[float, float]], points_intersections: Set[Tuple[float, float]], ): is_rebuild = False coordinates_updated: List[List[Tuple[float, float]]] = [] # split coordinates found at intersection to respect the topology first_value, *middle_coordinates_values, last_value = coordinates for point_intersection in points_intersections: point_intersection = tuple(point_intersection) if point_intersection in middle_coordinates_values: # we get the middle values from coordinates to avoid to catch the first and last value when editing middle_coordinates_values = self._insert_value( middle_coordinates_values, point_intersection, tuple([point_intersection]), ) middle_coordinates_values = self._insert_value( middle_coordinates_values, point_intersection, self.__ITEM_LIST_SEPARATOR_TO_SPLIT_LINE, "after", ) coordinates = [first_value ] + middle_coordinates_values + [last_value] is_rebuild = True if is_rebuild: coordinates_updated = list( split_at(coordinates, lambda x: x == "_")) if not is_rebuild: coordinates_updated = list([coordinates]) return coordinates_updated
def read_input_files( input_file: str) -> tuple[list[Rule], Ticket, list[Ticket]]: """ Extracts a rules set, my own ticket, and a list of nearby tickets. """ with open(input_file) as input_fobj: stripped_input = (line.strip() for line in input_fobj) rules, my_ticket, nearby_tickets = more_itertools.split_at( stripped_input, pred=lambda line: not line) rules = [Rule.from_raw(r) for r in rules] num_attrs = len(rules) assert len(my_ticket) == 2 and my_ticket[0].strip() == "your ticket:" my_ticket = Ticket.from_raw(my_ticket[1]) assert len(my_ticket.attrs) == num_attrs assert len(nearby_tickets) >= 2 and nearby_tickets[0].strip( ) == "nearby tickets:" nearby_tickets = [Ticket.from_raw(t) for t in nearby_tickets[1:]] assert all(len(t.attrs) == num_attrs for t in nearby_tickets) return rules, my_ticket, nearby_tickets
def main(): tiles = [ Tile(t) for t in split_at(fileinput.input(), lambda x: not x.strip()) ] grid_dim = int(sqrt(len(tiles))) grid = Grid(grid_dim) solution = grid.solve(tiles) if solution: print(reduce(mul, solution.corners(), 1)) image = solution.assemble(tiles) monsters = image.find(MONSTER) roughness = "".join(image.grid).count("#") - monsters * MONSTER.count("#") print(roughness)
def eval_adv(expr: List): if all([isinstance(_, int) for _ in expr]): return reduce(mul, expr, 1) if any([isinstance(_, list) for _ in expr]): newels = [] for el in expr: if isinstance(el, list): newels.append(eval_adv(el)) else: newels.append(el) return eval_adv(newels) newvals = [] for val in split_at(expr, lambda x: x == "*"): if "+" in val: newval = lfilter(lambda x: x != "+", val) newval = lmap(int, newval) newval = sum(newval) newvals.append([newval]) else: newvals.append(lmap(int, val)) return eval_adv(list(concat(newvals)))
def main(input, part): # Iterator of lines lines = map(lambda x: x.strip(), input.readlines()) # Iterator of key-value pair strings entries = flatten(map(lambda x: x.split(" "), lines)) # Iterator of key-value pair tuples entries = map(lambda x: x.split(":"), entries) # Iterator of lists of key-value pairs (split on empty string) blocks = split_at(entries, lambda x: x == [""]) # Iterator of dicts dicts = map(dict, blocks) # Start applying filters, and print length dicts = filter(filter_required_keys, dicts) if part == "2": dicts = filter(filter_birth_year, dicts) dicts = filter(filter_issue_year, dicts) dicts = filter(filter_expire_year, dicts) dicts = filter(filter_height, dicts) dicts = filter(filter_hair_color, dicts) dicts = filter(filter_eye_color, dicts) dicts = filter(filter_passport_id, dicts) print(ilen(dicts))
import more_itertools lines = [ "erhgedrgh", "erhgedrghed", "esdrhesdresr", "ktguygkyuk", "-------------", "srdthsrdt", "waefawef", "ryjrtyfj", "-------------", "edthedt", "awefawe", ] list(more_itertools.split_at(lines, lambda x: '-------------' in x)) # [['erhgedrgh', 'erhgedrghed', 'esdrhesdresr', 'ktguygkyuk'], ['srdthsrdt', 'waefawef', 'ryjrtyfj'], ['edthedt', 'awefawe']]
def get_next_concept(self, lexed): current_block = list(split_at(lexed[self.position+1:], lambda c: c is None))[0] return find_highest_precedence_concept(current_block)