def get_lines_till_section(lines_in): lines = list(lines_in) lines_till_section = itertools.takewhile( lambda line: not line.startswith("#"), lines) remaining = itertools.dropwhile(lambda line: not line.startswith("#"), lines) return (peekable(lines_till_section), peekable(remaining))
def __init__(self, vid_read, skel_read: ShotSegmentedReader, skel_draw, **kwargs): self.skel_read = skel_read self.skel_draw = skel_draw self.shot_iter = peekable(iter(self.skel_read)) self.skel_iter = peekable(iter(self.shot_iter.peek())) super().__init__(vid_read, **kwargs)
def syntax_highlight_lines(self, terminal): if not terminal.does_styling: yield str(self) return has_items = False iter_self = more_itertools.peekable(self) for item in iter_self: if not has_items: yield terminal.bold(terminal.color(15)('[')) try: iter_self.peek() except StopIteration: for line in item.syntax_highlight_lines(terminal): yield ' ' * 2 + line else: iter_item = more_itertools.peekable(item.syntax_highlight_lines(terminal)) for line in iter_item: try: iter_item.peek() except StopIteration: yield ' ' * 2 + line + terminal.color(15)(',') else: yield ' ' * 2 + line has_items = True if has_items: yield terminal.bold(terminal.color(15)(']')) else: yield terminal.bold(terminal.color(15)('[]'))
def next_skel(self): try: next(self.skel_iter) self.skel_iter.peek() except StopIteration: try: next(self.shot_iter) self.skel_iter = peekable(iter(self.shot_iter.peek())) except StopIteration: self.skel_iter = peekable(repeat([]))
def get_section(lines_in): lines = list(lines_in) if not lines[0].startswith("#"): return lines_in header_prefix = lines[0].split()[0] + " " title = get_section_title(lines[0]) lines_in_section = [] remaining = [] if len(lines) > 1: lines_in_section = itertools.takewhile(lambda line: not line.startswith(header_prefix), lines[1:]) remaining = itertools.dropwhile(lambda line: not line.startswith(header_prefix), lines[1:]) return (title, peekable(lines_in_section), peekable(remaining))
def _get_offsets(self, row, expect_keys): cells_i = more_itertools.peekable(enumerate(row)) expect_keys_i = more_itertools.peekable(expect_keys) offsets = dict() end = object() while cells_i.peek(end) != end: i, cell = next(cells_i) if expect_keys_i.peek(end) == cell.value: expect_key = next(expect_keys_i) offsets[expect_key] = i return offsets
def syntax_highlight_lines(self, terminal): if not terminal.does_styling: yield str(self) return has_items = False iter_self = more_itertools.peekable(sorted(list(self.keys()))) for item in iter_self: if not has_items: yield terminal.bold(terminal.color(15)('{')) iter_key = more_itertools.peekable(item.syntax_highlight_lines(terminal)) for line in iter_key: try: iter_key.peek() except StopIteration: last_key_line = line else: yield ' ' * 2 + line try: iter_self.peek() except StopIteration: for line in self[item].syntax_highlight_lines(terminal): if last_key_line is None: yield ' ' * 2 + line else: yield ' ' * 2 + last_key_line + terminal.color(15)(': ') + line last_key_line = None else: iter_item = more_itertools.peekable(self[item].syntax_highlight_lines(terminal)) for line in iter_item: try: iter_item.peek() except StopIteration: if last_key_line is None: yield ' ' * 2 + line + terminal.color(15)(',') else: yield ' ' * 2 + last_key_line + terminal.color(15)(': ') + line + terminal.color(15)(',') last_key_line = None else: if last_key_line is None: yield ' ' * 2 + line else: yield ' ' * 2 + last_key_line + terminal.color(15)(': ') + line last_key_line = None has_items = True if has_items: yield terminal.bold(terminal.color(15)('}')) else: yield terminal.bold(terminal.color(15)('{}'))
def dedupleft(iterable, marker): """Deduplicates the marker on the left of an iterable object.""" iterator = peekable(iterable) for x in iterator: if iterator.peek(None) != marker: break return itertools.chain([marker], iterator)
def test_detection_func(lines, expected): lines = more_itertools.peekable( enumerate(more_itertools.always_iterable(lines), start=1) ) actual = doctest.detection_func(lines) assert actual == expected
def collate(*iterables, **kwargs): """ A slightly faster version of more_itertools.collate """ key = kwargs.get("key", lambda x: x) peekables = [peekable(it) for it in iterables] peekables = [pee for pee in peekables if pee] # remove empties vals = [key(pee.peek()) for pee in peekables] while len(peekables) > 0: min_i = 0 min_val = vals[0] for i, val in enumerate(vals): if val < min_val: min_i = i min_val = val yield peekables[min_i].next() if not peekables[min_i]: peekables = [pee for pee in peekables if pee] # remove empties vals = [key(pee.peek()) for pee in peekables] else: vals[min_i] = key(peekables[min_i].peek())
def generate_parsed_tokens(token_str): """ Generate parsed tokens. """ char_stream = more_itertools.peekable(token_str) token_text = '' token_category = '' bucket = 'text' while char_stream: char = next(char_stream) next_char = char_stream.peek(None) if char == '\\' and next_char: # It's an escaped char; add it to the set. token_text += next_char next(char_stream) elif char == ':' and next_char: bucket = 'category' elif char == '/' and next_char: # We have completed a token! yield (token_category, token_text, [], [], token_text) # Reset everything. bucket = 'text' token_text = '' token_category = '' else: # We can simply add the character to the string. if bucket == 'text': token_text += char else: token_category += char if token_text or token_category: yield (token_category, token_text, [], [], token_text)
def redirects(source: str, language: str) -> Iterator[CaptureResult[Redirect]]: """Return the redirects found in the document.""" assert (language in redirect_magicwords), \ 'Language {} not in allowed choices.'.format(language) redirect_re = redirect_res[language] redirect_matches = peekable(redirect_re.finditer(source, concurrent=True)) for match in redirect_matches: target = match.group('link') or '' target = target.strip() anchor = match.group('anchor') or target # newlines in anchor are visualized as spaces. anchor = anchor.replace('\n', ' ').strip() # split on '#' (link to section) tosection = '' if '#' in target: splittarget = target.split('#', 1) target = splittarget[0] tosection = splittarget[1] # For some reason if wikilink has no pipe, e.g. [[apple]] the regex # above captures everything in the anchor group, so we need to set # the link to the same page. if (anchor and not target): target = anchor redirect = Redirect(target=target, tosection=tosection) yield CaptureResult(redirect, Span(match.start(), match.end()))
def analyze_revisions(page: mwxml.Page, stats: Mapping, only_last_revision: bool) -> None: """Analyze revisions.""" revisions = more_itertools.peekable(page) section_names_stats = stats["section_names_per_revision"] sections_stats = stats["sections_per_revision"] for mw_revision in revisions: utils.dot() is_last_revision = not utils.has_next(revisions) if only_last_revision and not is_last_revision: continue text = utils.remove_comments(mw_revision.text or "") section_names = [section.name.strip().lower() for section, _ in extractors.sections(text)] sections_count = len(section_names) for section_name in section_names: section_names_stats["global"][section_name] += 1 if is_last_revision: section_names_stats["last_revision"][section_name] += 1 sections_stats["global"][sections_count] += 1 if is_last_revision: sections_stats["last_revision"][sections_count] += 1 stats["revisions"]["global"] += 1 if is_last_revision: stats["revisions"]["last_revision"] += 1 stats["performance"]["revisions_analyzed"] += 1
def unified_test_dev_split(inf, ingoldf, keyin, goldkeyin, outf, keyout): gold_sent_iter = peekable(iter_sentences(ingoldf)) rm_inst_ids = [] def sent_rm_gold(sent): gold_sent = gold_sent_iter.peek(None) if gold_sent is not None and gold_sent.attrib["id"] == sent.attrib[ "id"]: for instance in sent.xpath("./instance"): rm_inst_ids.append(instance.attrib["id"]) next(gold_sent_iter) return BYPASS transform_sentences(inf, sent_rm_gold, outf) def next_rm(): try: return rm_inst_ids.pop(0) except IndexError: return None rm_id = next_rm() for line in keyin: if rm_id == line.split()[0]: rm_id = next_rm() continue keyout.write(line) assert len(rm_inst_ids) == 0 and rm_id is None
def without_trailing(it, *, trailing, _exhausted=object()): """yield all elements of it, except for the last one, if the last one == trailing.""" it = more_itertools.peekable(it) for x in it: if it.peek(_exhausted) is _exhausted and x == trailing: return yield x
def split_to_sections(lines_in): remaining = peekable(lines_in) sections = [] while (remaining): (title, lines_in_section, remaining) = get_section(remaining) sections.append((title, lines_in_section)) return sections
def test_policy_theoretical_optimal_strategy(symbol='AAPL', sd=dt.datetime(2010, 1, 1), ed=dt.datetime(2011, 12, 31), st=100000, shares_contraint=1000): df_trades = create_trades_df(start_date=sd, end_date=ed) df_trades.set_index('Date', inplace=True) df_trades['Symbol'] = symbol df_trades['Order'] = 'BUY' df_prices = get_data([symbol], pd.date_range(sd, ed), False) df_price_filter = df_prices.dropna(subset=[symbol]) iterator = more_itertools.peekable(df_price_filter.iterrows()) ltd_shares = 0.0 for index, row in iterator: current_price = row[0] next_day_price = iterator.peek((np.NaN, [np.NAN]))[1][0] if np.isnan(next_day_price): break delta = current_price - next_day_price if delta > 0: # We want to sell, next day is going to be cheaper add_sell_order(df_trades, index, shares_contraint, ltd_shares) else: # We want to buy, next day the price will go up add_buy_order(df_trades, index=index, shares_constraint=shares_contraint, ltd_shares=ltd_shares) # update_life_to_date_shares ltd_shares = update_life_to_date_shares(ltd_shares, df_trades, index, shares_contraint) return df_trades.reset_index()
def sections(source: str, include_preamble: bool=False) \ -> Iterator[CaptureResult[Section]]: """Return the sections found in the document.""" section_header_matches = peekable(section_header_re.finditer(source)) if include_preamble: try: body_end = section_header_matches.peek().start() body_end -= 1 # Don't include the newline before the next section except StopIteration: body_end = len(source) preamble = Section( name='', level=0, body=source[:body_end], ) yield CaptureResult(preamble, Span(0, body_end)) for match in section_header_matches: name = match.group('section_name') level = len(match.group('equals')) body_begin = match.end() + 1 # Don't include the newline after try: body_end = section_header_matches.peek().start() body_end -= 1 # Don't include the newline before the next section except StopIteration: body_end = len(source) section = Section( name=name, level=level, body=source[body_begin:body_end], ) yield CaptureResult(section, Span(match.start(), body_end))
def sort_line_chars( chars: Sequence[PDFChar], interpreter: PDFPageInterpreter) -> Sequence[PDFChar]: chars = (normalize_char(char, interpreter) for char in chars) chars = sorted(chars, key=lambda char: char["x0"]) main_chars, combining_chars = partition( lambda char: char["text"] and unicodedata.combining( char["text"]), chars) combining_chars_iter = peekable(iter(combining_chars)) for main_char in main_chars: yield main_char while combining_chars_iter: combining_char = combining_chars_iter.peek() overlap = max( min(main_char["x1"], combining_char["x1"]) - max(main_char["x0"], combining_char["x0"]), 0) if overlap < main_char["width"] * Decimal("0.5"): break yield combining_char next(combining_chars_iter, None) assert (next(combining_chars_iter, None) is None) return yield
def __init__(self, vals): inits = [] rvals = peekable(vals) for index in range(0, len(vals) - 1): inits.extend([rvals.next(), rvals.peek()]) self.left = [x for x in inits[0:int(len(inits) / 2)]] self.right = [x for x in inits[int(len(inits) / 2):]]
def extract_revisions(mw_page: mwxml.Page, stats: Mapping, only_last_revision: bool, debug: bool) -> Iterator[Revision]: """Extract the internall links (wikilinks) from the revisions.""" revisions = more_itertools.peekable(mw_page) for mw_revision in revisions: utils.dot() is_last_revision = not utils.has_next(revisions) if only_last_revision and not is_last_revision: continue text = utils.remove_comments(mw_revision.text or '') wikilinks = (wikilink for wikilink, _ in extractors.wikilinks( page_title=mw_page.title, source=text, sections=extractors.sections(text), debug=debug, )) yield Revision(id=mw_revision.id, parent_id=mw_revision.parent_id, user=mw_revision.user, minor=mw_revision.minor, comment=mw_revision.comment, model=mw_revision.model, format=mw_revision.format, timestamp=mw_revision.timestamp.to_json(), text=text, wikilinks=wikilinks) stats['performance']['revisions_analyzed'] += 1
def parse_orgmode(f: IO, subprovider: str) -> Iterator[Item]: current_datetime: Optional[datetime.datetime] = None current_paragraph: List[str] = [] lines = peekable(f) for line in lines: line_clean = line.strip() if line_clean: m = regex_heading.match(line_clean) # Title line if m: if m.group('todo'): current_datetime = None else: current_datetime = datetime.datetime.strptime( m.group('date'), '%Y-%m-%d %a') # Paragraph line but not before first heading elif current_datetime: current_paragraph.append(line_clean) # Empty line after paragraph or last line of file if not line_clean or not lines: if current_datetime and current_paragraph: yield Item.normalized( datetime_=current_datetime, text='\n'.join(current_paragraph), provider=provider, subprovider=subprovider, all_day=True, ) current_paragraph.clear()
def document_dependency_graphs(self, document): document = peekable(iter(document)) while document: sentence = list(takewhile(lambda l: l != '</s>', document)) if not sentence: # It might happen because of the snippets like this: # # plates plate NNS 119 116 PMOD # </text> # </s> # <text id="ukwac:http://www.learning-connections.co.uk/curric/cur_pri/artists/links.html"> # <s> # Ideas Ideas NP 1 14 DEP # # where </text> is before </s>. continue try: dg = DependencyGraph( sentence, cell_extractor=ukwac_cell_extractor, cell_separator='\t', ) except DependencyGraphError: logger.exception("Couldn't instantiate a dependency graph.") else: for node in dg.nodes.values(): if self.lowercase_stem and node['lemma']: node['lemma'] = node['lemma'].lower() yield dg
def parse(self, fh): """Generate tap.line.Line objects, given a file-like object `fh`. `fh` may be any object that implements both the iterator and context management protocol (i.e. it can be used in both a "with" statement and a "for...in" statement.) Trailing whitespace and newline characters will be automatically stripped from the input lines. """ with fh: try: first_line = next(fh) except StopIteration: return first_parsed = self.parse_line(first_line.rstrip()) fh_new = itertools.chain([first_line], fh) if first_parsed.category == 'version' and \ first_parsed.version >= 13: if ENABLE_VERSION_13: fh_new = peekable(itertools.chain([first_line], fh)) self._try_peeking = True else: # pragma no cover print(""" WARNING: Optional imports not found, TAP 13 output will be ignored. To parse yaml, see requirements in docs: https://tappy.readthedocs.io/en/latest/consumers.html#tap-version-13""") for line in fh_new: yield self.parse_line(line.rstrip(), fh_new)
def documents(self, path): file_pass, path = path with gzip.open(path, 'rt', encoding='ISO-8859-1') as f: lines = (l.rstrip() for l in f) lines = peekable( l for l in lines if not l.startswith('<text') and l != '<s>' ) c = 0 while lines: if (c % (10 ** 4)) == 0: logger.debug( '%s text elements are read, every %s is processed. ' 'It\'s about %.2f of the file.', c, self.file_passes, c / 550000, # An approximate number of texts in a file. ) if (self.limit is not None) and (c > self.limit): logger.info('Limit of sentences is reached.') break document = list(takewhile(lambda l: l != '</text>', lines)) if (c % self.file_passes) == file_pass: yield document c += 1
def __init__( self, iterable: Iterable[Value], key: Callable[[Value], Key], ) -> None: """Initialize""" self._groups = peekable(groupby(iterable, key))
def extract_revisions( mw_page: mwxml.Page, language: str, stats: Mapping, only_last_revision: bool) -> Iterator[Revision]: """Extract the internall links (wikilinks) from the revisions.""" revisions = more_itertools.peekable(mw_page) for mw_revision in revisions: utils.dot() is_last_revision = not utils.has_next(revisions) if only_last_revision and not is_last_revision: continue text = utils.remove_comments(mw_revision.text or '') wikilinks = (wikilink for wikilink, _ in extractors.wikilinks(text, extractors.sections(text))) yield Revision( id=mw_revision.id, parent_id=mw_revision.parent_id, user=mw_revision.user, minor=mw_revision.minor, comment=mw_revision.comment, model=mw_revision.model, format=mw_revision.format, timestamp=mw_revision.timestamp.to_json(), text=text, wikilinks=wikilinks )
def add_dummy_entries(entries): entries = peekable(entries) prev_entry = next(entries) yield prev_entry output_month, output_year = prev_entry["date"].month, prev_entry[ "date"].year while entries: entry = next(entries) while output_month != entry["date"].month or output_year != entry[ "date"].year: output_month = output_month - 1 if output_month < 1: output_month = 12 output_year = output_year - 1 if output_month != entry["date"].month or output_year != entry[ "date"].year: yield { "type": "dummy", "subtype": "dummy", "instance": None, "date": datetime(year=output_year, month=output_month, day=1), } yield entry
def augment_timeline(entries): last_year = None last_month = None entries = peekable(add_dummy_entries(entries)) while entries: entry = next(entries) nentry = entries.peek(None) tl = { "year_first": False, "year_last": False, "month_first": False, "month_last": False, "entry_id": "{}:{}:{}".format( entry["type"], entry["subtype"], entry["instance"].pk ) if entry["instance"] else None, } if last_year != entry["date"].year: tl["year_first"] = True if last_month != entry["date"].month: tl["month_first"] = True if not nentry or nentry["date"].year != entry["date"].year: tl["year_last"] = True if not nentry or nentry["date"].month != entry["date"].month: tl["month_last"] = True yield dict(entry, tl=tl) last_year = entry["date"].year last_month = entry["date"].month
def parse(self, fh): """Generate tap.line.Line objects, given a file-like object `fh`. `fh` may be any object that implements both the iterator and context management protocol (i.e. it can be used in both a "with" statement and a "for...in" statement.) Trailing whitespace and newline characters will be automatically stripped from the input lines. """ with fh: try: first_line = next(fh) except StopIteration: return first_parsed = self.parse_line(first_line.rstrip()) fh_new = itertools.chain([first_line], fh) if first_parsed.category == "version" and first_parsed.version >= 13: if ENABLE_VERSION_13: fh_new = peekable(itertools.chain([first_line], fh)) self._try_peeking = True else: # pragma no cover print( """ WARNING: Optional imports not found, TAP 13 output will be ignored. To parse yaml, see requirements in docs: https://tappy.readthedocs.io/en/latest/consumers.html#tap-version-13""" ) for line in fh_new: yield self.parse_line(line.rstrip(), fh_new)
def __init__( self, server_list, nickname, realname, reconnection_interval=missing, recon=ExponentialBackoff(), **connect_params, ): super(SingleServerIRCBot, self).__init__() self.__connect_params = connect_params self.channels = IRCDict() specs = map(ServerSpec.ensure, server_list) self.servers = more_itertools.peekable(itertools.cycle(specs)) self.recon = recon # for compatibility if reconnection_interval is not missing: warnings.warn("reconnection_interval is deprecated; " "pass a ReconnectStrategy object instead") self.recon = ExponentialBackoff(min_interval=reconnection_interval) self._nickname = nickname self._realname = realname for i in [ "disconnect", "join", "kick", "mode", "namreply", "nick", "part", "quit", ]: self.connection.add_global_handler(i, getattr(self, "_on_" + i), -20)
def _split_headings(self, lines, hprefix): lines = peekable(lines) intro = [] while lines and not lines.peek().startswith(hprefix): intro.append(next(lines)) sections = [] current = None for line in lines: if line.startswith(hprefix): if current: sections.append(current) current = { "title": line[len(hprefix):].strip(), "lines": [], } else: current["lines"].append(line) if current: sections.append(current) for section in sections: if not section["lines"] or section["lines"][-1] != "": section["lines"].append("") return intro, sections
def user(username): # query database for list of user's records (6 in a row) records = [] row = [] record = {} db = get_db() cursor = db.cursor(buffered=True) cursor.execute( "SELECT release_title, artist, discogs_uri, image_url " "FROM record LEFT JOIN user ON user_id = user.id " "WHERE username = %s", (username,) ) i = 1 p = peekable(cursor) for result in p: record['release_title'] = result[0] record['artist'] = result[1] record['uri'] = result[2] record['image_url'] = result[3] row.append(record) record = {} if i % 6 == 0 or p.peek(None) == None: records.append(row) row = [] i += 1 n_items = i - 1 cursor.close() return render_template('friend/user.html', records=records, n_items=n_items, username=username)
def test_policy(symbol='AAPL', sd=dt.datetime(2010, 1, 1), ed=dt.datetime(2012, 12, 31), st=100000, shares_contraint=1000, look_back_period=14): df_trades = create_trades_df(start_date=sd, end_date=ed) df_trades.set_index('Date', inplace=True) df_trades['Symbol'] = symbol df_trades['Order'] = 'NOTHING' df_prices_sym = get_data([symbol], pd.date_range(sd, ed), False) df_prices_idx = get_data(['SPY'], pd.date_range(sd, ed), False, dropNonTradingSPY=False) df_price_filter_sym = df_prices_sym.dropna(subset=[symbol]) df_price_filter_idx = df_prices_idx.dropna(subset=['SPY']) iterator = more_itertools.peekable(df_price_filter_sym.iloc[look_back_period:].iterrows()) ltd_shares = 0.0 prev_sym_price_over_sma = indicator.get_price_over_sma(df_price_filter_sym.iloc[:look_back_period]) for index, row in iterator: # get current price to determine when we should close the position. df_prices_historical_sym = df_price_filter_sym.loc[:index][-look_back_period:] # Todo: Can we do this in one shot? df_prices_historical_idx = df_price_filter_idx.loc[:index][-look_back_period:] sym_price_over_sma = indicator.get_price_over_sma(df_prices_historical_sym) sym_bollinger_band_percent = indicator.get_bollinger_band_percent(df_prices_historical_sym) sym_rsi = indicator.get_rsi(df_prices_historical_sym, look_back_period) idx_price_over_sma = indicator.get_price_over_sma(df_prices_historical_idx) idx_bollinger_band_percent = indicator.get_bollinger_band_percent(df_prices_historical_idx) idx_rsi = indicator.get_rsi(df_prices_historical_idx, look_back_period) signal = get_signal(sym_price_over_sma, sym_bollinger_band_percent, sym_rsi, idx_price_over_sma, idx_bollinger_band_percent, idx_rsi, prev_sym_price_over_sma) print(signal) process_signal(df_trades, index, signal, ltd_shares, shares_contraint) ltd_shares = update_life_to_date_shares(ltd_shares, df_trades, index, shares_contraint) prev_sym_price_over_sma = sym_price_over_sma return df_trades.reset_index()
def merge_styles(self, offline: bool) -> Iterator[Fuss]: """Merge one or multiple style files.""" config = self.read_configuration() # pylint: disable=import-outside-toplevel from nitpick.style import StyleManager style = StyleManager(self, offline, config.cache) base = config.file.expanduser().resolve().as_uri( ) if config.file else None style_errors = list( style.find_initial_styles(peekable(always_iterable(config.styles)), base)) if style_errors: raise QuitComplainingError(style_errors) self.style_dict = style.merge_toml_dict() from nitpick.flake8 import NitpickFlake8Extension minimum_version = search_json(self.style_dict, NITPICK_MINIMUM_VERSION_JMEX, None) logger.debug(f"Minimum version: {minimum_version}") if minimum_version and version_to_tuple( NitpickFlake8Extension.version) < version_to_tuple( minimum_version): yield Reporter().make_fuss( ProjectViolations.MINIMUM_VERSION, project=PROJECT_NAME, expected=minimum_version, actual=NitpickFlake8Extension.version, ) self.nitpick_section = self.style_dict.get("nitpick", {}) self.nitpick_files_section = self.nitpick_section.get("files", {})
def create_partition_buffers(stream): bucketed_stream = more_itertools.bucket(stream, key=attrgetter("partition")) partition_buffers: Dict[int, Iterator[StreamEvent]] = { p: more_itertools.peekable(iter(bucketed_stream[p])) for p in range(partition_count) } global_event_buffer = bucketed_stream[StreamEvent.ALL_PARTITIONS] return partition_buffers, global_event_buffer
def auto_map_cols(source, cols): src_cols = peekable(source).peek().keys() col_map = dict(zip(src_cols, cols)) def _transform(row): return map_cols(col_map) return _transform
def _read(): with open(path, **fileparams) as csvfile: csvreader = csv.reader(csvfile, **csvparams) if headers: cols = next(csvreader) else: cols = range(len(peekable(csvreader.peek()))) for row in csvreader: yield OrderedDict(zip(cols, row))
def _read(): with open(path, **fileparams) as fwfile: fwreader = parse_fw_file(fwfile, fieldwidths) if headers: cols = next(fwreader) else: cols = range(len(peekable(fwreader).peek())) for row in fwreader: yield OrderedDict(zip(cols, row))
def _write(source): with open(path, mode) as fwfile: vals = peekable(source) if headers: padded_cols = pad_row(vals.peek().keys(), fieldwidths) fwfile.write(''.join(padded_cols)) for row in vals: padded_row = pad_row(row.values, fieldwidths, **params) fwfile.write(''.join(padded_row))
def sum_folder(channel): import pickle import logging from more_itertools import peekable import pandas as pd from fowler.corpora.execnet import initialize_channel _, data = initialize_channel(channel) logger = logging.getLogger('execnet.fum_folder') kwargs = data.get('kwargs', {}) instance = data['instance'] folder_name = data['folder_name'] folder = getattr(instance, folder_name) result = None for item in channel: if item == ('message', 'terminate'): if result is not None: logger.debug('Sending the final result, size: %s', len(result)) channel.send(('result', pickle.dumps(result))) break type_, data = item if type_ == 'task': intermediate_results = peekable(enumerate(folder(data, **kwargs))) if intermediate_results: if result is None: _, result = next(intermediate_results) # TODO: It would be nice to catch any exceptioin here, # (especially, the one that happens inside of the folder() call # and report it to the master. # Same applies to the next() call above. for i, r in intermediate_results: logger.debug('Iteration: %s, result size: %s', i, len(result)) result = pd.concat( [result, r], copy=False, ).groupby(level=result.index.names).sum() if (i % 10) == 9: result.sort(ascending=False, inplace=True) half = len(result) // 2 logger.debug('Sending a result. Result size: %s', half) channel.send(('result', pickle.dumps(result.tail(half)))) result = result.head(-half) channel.send(('message', 'send_next'))
def _write(source): with open(path, mode) as csvfile: vals = peekable(source) cols = vals.peek().keys() csv_writer = csv.DictWriter(csvfile, fieldnames=cols, **csvparams) if headers: csv_writer.writeheader() for row in vals: csv_writer.writerow(row.values())
def extract_island(text): tokens = tokenize_finditer(text, LEXICON) tokens = peekable(tokens) while tokens.peek(None) is not None: if tokens.peek()[0] == 'doi_start': yield ('doi', read_doi(tokens)) next(tokens)
def openCsvReader(filename): """ Open a csv reader on the given filename. Then use like 'for row in reader:' Wraps the reader iterator in peekable - see http://stackoverflow.com/a/27698681/243392 Then can say reader.peek() to just look at the current record. """ f = open(filename, 'rt') f = dataLines(f) # ignore comments, blank lines and header row reader = more_itertools.peekable(csv.reader(f)) return reader, f
def __init__(self, items, f_map, f_reduce, starting_level, mandatory_levels, mandatory_levels_all): self.mandatory_levels_max = max(mandatory_levels) if mandatory_levels else None self.iter = peekable(with_levels(items, starting_level=starting_level, mandatory_levels=(sorted(mandatory_levels) if not mandatory_levels_all else None), mandatory_levels_all=mandatory_levels_all, )) self.f_map = self.simple_struct_from_node if f_map is SIMPLE_MAP else (lambda x: x) if f_map is None else f_map self.f_reduce = f_reduce or _REDUCE_DEFAULT self.reduce_of_no_children = self.f_reduce([]) self.ni_active = None
def extract_search(text, lexicon=LEXICON): last_end = 0 for match in DOI_START_RE.finditer(text): if match.span()[0] > last_end: tokens = tokenize_search(text, match.span()[0], lexicon=lexicon) tokens = peekable(tokens) doi = read_doi(tokens) last_end = match.span()[0] + len(doi) yield Identifier('doi', doi) else: last_end = max(match.span()[1], last_end)
def extract(self): with open(self.path, **self.fileparams) as f: reader = self._get_reader(f) if self.headers: cols = next(reader) else: reader = peekable(reader) cols = range(len(reader.peek())) for row in reader: yield OrderedDict(zip(cols, row))
def wikilinks(source: str, sections: Iterator[CaptureResult[Section]]) \ -> Iterator[CaptureResult[Wikilink]]: """Return the wikilinks found in the document.""" wikilink_matches = peekable(wikilink_re.finditer(source, concurrent=True)) sections_limits = [SectionLimits(name=section.name, level=section.level, number=idx, begin=span.begin, end=span.end) for idx, (section, span) in enumerate(sections, 1)] last_section_seen = 0 for match in wikilink_matches: link = match.group('link') or '' link = link.strip() anchor = match.group('anchor') or link # newlines in anchor are visualized as spaces. anchor = anchor.replace('\n', ' ').strip() link_start = match.start() link_section_number = 0 link_section_name = '---~--- incipit ---~---' link_section_level = 0 for section in sections_limits[last_section_seen:]: if section.begin <= link_start <= section.end: link_section_number = section.number link_section_name = section.name link_section_level = section.level last_section_seen = (link_section_number - 1)\ if link_section_number > 0 else 0 break # For some reason if wikilink has no pipe, e.g. [[apple]] the regex # above captures everything in the anchor group, so we need to set # the link to the same page. if (anchor and not link): link = anchor wikilink = Wikilink( link=link, anchor=anchor, section_name=link_section_name, section_level=link_section_level, section_number=link_section_number ) yield CaptureResult(wikilink, Span(link_start, match.end()))
def __init__(self, mutations, mutation_data_factory): """ Initialize an new queue with a MutationData iterator :param mutations: any MutationData producing Iterator :param mutation_data_factory: a MutationDataFactory to be used to produce new mutations for the ONPs """ self.mutations = more_itertools.peekable(mutations) self.sns = SampleNameSelector(self.mutations.peek()) self.queue = collections.defaultdict(list) self.indel_queue = [] self.last = 0 self.logger = logging.getLogger(__name__) self.warned_about_order = False self._mutation_data_factory = mutation_data_factory
def pre_process(self): super(P, self).pre_process() # Compute four inter vectors generators for each pattern note, with four turning point types # tp_types 0, 1 iterate through a source sorted by onset (attack) # while types 2, 3 iterate through a source sorted by offset (release) for note in self.patternPointSet: note.source_ptrs = [ peekable((lambda p: (InterNoteVector(p, self.patternPointSet, s, self.sourcePointSet, self.settings['interval_func'], tp_type = 0) for s in self.sourcePointSet))(note)), peekable((lambda p: (InterNoteVector(p, self.patternPointSet, s, self.sourcePointSet, self.settings['interval_func'], tp_type = 1) for s in self.sourcePointSet))(note)), peekable((lambda p: (InterNoteVector(p, self.patternPointSet, s, self.sourcePointSet_offsetSort, self.settings['interval_func'], tp_type = 2) for s in self.sourcePointSet_offsetSort))(note)), peekable((lambda p: (InterNoteVector(p, self.patternPointSet, s, self.sourcePointSet_offsetSort, self.settings['interval_func'], tp_type = 3) for s in self.sourcePointSet_offsetSort))(note))]
def _mapquotes(pagetags): items = [] category = "" pagetagitr = more_itertools.peekable(pagetags) for tag in pagetagitr: if tag.name == "h2": category = tag.text continue if tag.name == "p" and pagetagitr.peek(tag).name == "p": matchresult = re.match('(“.+”)\s(—|–)\s(.+)', tag.text + " " + pagetagitr.peek(tag).text) if matchresult: items.append(Quote(category, matchresult.group(1), matchresult.group(3))) next(pagetagitr) continue return items
def extract_search(text: str) -> Iterator[CaptureResult[Identifier]]: last_end = 0 for match in DOI_START_RE.finditer(text): begin_pos = match.start() if begin_pos > last_end: tokens = tokenize_search(text, begin_pos) tokens = peekable(tokens) identifier = read_doi(tokens) end_pos = begin_pos + len(identifier.id) yield CaptureResult(identifier, Span(begin_pos, end_pos)) last_end = end_pos else: last_end = max(match.end(), last_end)
def test_broken_diffs(): revision_docs = [ {'id': 2, 'text': "Apples are blue.", 'page': {'title': "Foo"}, 'diff': {'last_id': 3, 'ops': []}}, {'id': 3, 'text': "Apples are red.", 'page': {'title': "Foo"}, 'diff': {'last_id': 1, 'ops': []}}, {'id': 4, 'text': "Apples are a red fruit", 'page': {'title': "Foo"}, 'diff': {'last_id': 2, 'ops': []}}, {'id': 5, 'text': "Apples are a lame fruit", 'page': {'title': "Foo"}, 'diff': {'last_id': 4, 'ops': []}} ] revision_docs = peekable(revision_docs) broken_docs = list(read_broken_docs(revision_docs)) print([d['id'] for d in broken_docs]) eq_(len(broken_docs), 3)
def parse_ngram_output(ngrams_len, ngram_output): lines = peekable(ngram_output.split("\n")) res = [] for i in xrange(ngrams_len): line = lines.next() while lines.peek().startswith("\t"): line = lines.next() m = re.findall(r"\] ([^\s]+) \[", line) if m: last_prob = float(m[0]) else: print >>sys.stderr, "ERROR parsing lastprob: %s" % line lines.next() lines.next() res.append(last_prob) lines.next() return res
def sequence(*iterables, **kwargs): compare = kwargs.get('compare', lambda i1, i2: i1 < i2) iterables = [peekable(it) for it in iterables] done = False while not done: next_i = None for i, it in enumerate(iterables): if it: # Not empty if next_i is None or \ compare(it.peek(), iterables[next_i].peek()): next_i = i if next_i is None: done = True else: yield next(iterables[next_i])
def extract_revisions( mw_page: mwxml.Page, language: str, stats: Mapping, only_last_revision: bool ) -> Iterator[Revision]: """Extract the sections which are bibliography from the revisions.""" section_names_stats = stats["section_names"] revisions = more_itertools.peekable(mw_page) for mw_revision in revisions: utils.dot() is_last_revision = not utils.has_next(revisions) if only_last_revision and not is_last_revision: continue text = utils.remove_comments(mw_revision.text or "") sections = (section for section, _ in extractors.sections(text)) bibliography_sections = list(section for section in sections if is_bibliography(section.name, language)) for section in bibliography_sections: section_names_stats["global"][section.name] += 1 if is_last_revision: section_names_stats["last_revision"][section.name] += 1 # TODO: use section.fullbody text = "".join(section.full_body for section in bibliography_sections) yield Revision( id=mw_revision.id, parent_id=mw_revision.parent_id, user=mw_revision.user, minor=mw_revision.minor, comment=mw_revision.comment, model=mw_revision.model, format=mw_revision.format, timestamp=mw_revision.timestamp.to_json(), text=text, sections=bibliography_sections, ) stats["performance"]["revisions_analyzed"] += 1