Ejemplo n.º 1
0
def get_lines_till_section(lines_in):
    lines = list(lines_in)
    lines_till_section = itertools.takewhile(
        lambda line: not line.startswith("#"), lines)
    remaining = itertools.dropwhile(lambda line: not line.startswith("#"),
                                    lines)
    return (peekable(lines_till_section), peekable(remaining))
Ejemplo n.º 2
0
 def __init__(self, vid_read, skel_read: ShotSegmentedReader, skel_draw,
              **kwargs):
     self.skel_read = skel_read
     self.skel_draw = skel_draw
     self.shot_iter = peekable(iter(self.skel_read))
     self.skel_iter = peekable(iter(self.shot_iter.peek()))
     super().__init__(vid_read, **kwargs)
Ejemplo n.º 3
0
 def syntax_highlight_lines(self, terminal):
     if not terminal.does_styling:
         yield str(self)
         return
     has_items = False
     iter_self = more_itertools.peekable(self)
     for item in iter_self:
         if not has_items:
             yield terminal.bold(terminal.color(15)('['))
         try:
             iter_self.peek()
         except StopIteration:
             for line in item.syntax_highlight_lines(terminal):
                 yield ' ' * 2 + line
         else:
             iter_item = more_itertools.peekable(item.syntax_highlight_lines(terminal))
             for line in iter_item:
                 try:
                     iter_item.peek()
                 except StopIteration:
                     yield ' ' * 2 + line + terminal.color(15)(',')
                 else:
                     yield ' ' * 2 + line
         has_items = True
     if has_items:
         yield terminal.bold(terminal.color(15)(']'))
     else:
         yield terminal.bold(terminal.color(15)('[]'))
Ejemplo n.º 4
0
 def next_skel(self):
     try:
         next(self.skel_iter)
         self.skel_iter.peek()
     except StopIteration:
         try:
             next(self.shot_iter)
             self.skel_iter = peekable(iter(self.shot_iter.peek()))
         except StopIteration:
             self.skel_iter = peekable(repeat([]))
Ejemplo n.º 5
0
def get_section(lines_in):
  lines = list(lines_in)
  if not lines[0].startswith("#"):
    return lines_in
  header_prefix = lines[0].split()[0] + " "
  title = get_section_title(lines[0])
  lines_in_section = []
  remaining = []
  if len(lines) > 1:
    lines_in_section = itertools.takewhile(lambda line: not line.startswith(header_prefix), lines[1:])
    remaining = itertools.dropwhile(lambda line: not line.startswith(header_prefix), lines[1:])
  return (title, peekable(lines_in_section), peekable(remaining))
Ejemplo n.º 6
0
	def _get_offsets(self, row, expect_keys):
		cells_i = more_itertools.peekable(enumerate(row))
		expect_keys_i = more_itertools.peekable(expect_keys)
		offsets = dict()
		end = object()

		while cells_i.peek(end) != end:
			i, cell = next(cells_i)
			if expect_keys_i.peek(end) == cell.value:
				expect_key = next(expect_keys_i)
				offsets[expect_key] = i

		return offsets
Ejemplo n.º 7
0
 def syntax_highlight_lines(self, terminal):
     if not terminal.does_styling:
         yield str(self)
         return
     has_items = False
     iter_self = more_itertools.peekable(sorted(list(self.keys())))
     for item in iter_self:
         if not has_items:
             yield terminal.bold(terminal.color(15)('{'))
         iter_key = more_itertools.peekable(item.syntax_highlight_lines(terminal))
         for line in iter_key:
             try:
                 iter_key.peek()
             except StopIteration:
                 last_key_line = line
             else:
                 yield ' ' * 2 + line
         try:
             iter_self.peek()
         except StopIteration:
             for line in self[item].syntax_highlight_lines(terminal):
                 if last_key_line is None:
                     yield ' ' * 2 + line
                 else:
                     yield ' ' * 2 + last_key_line + terminal.color(15)(': ') + line
                     last_key_line = None
         else:
             iter_item = more_itertools.peekable(self[item].syntax_highlight_lines(terminal))
             for line in iter_item:
                 try:
                     iter_item.peek()
                 except StopIteration:
                     if last_key_line is None:
                         yield ' ' * 2 + line + terminal.color(15)(',')
                     else:
                         yield ' ' * 2 + last_key_line + terminal.color(15)(': ') + line + terminal.color(15)(',')
                         last_key_line = None
                 else:
                     if last_key_line is None:
                         yield ' ' * 2 + line
                     else:
                         yield ' ' * 2 + last_key_line + terminal.color(15)(': ') + line
                         last_key_line = None
         has_items = True
     if has_items:
         yield terminal.bold(terminal.color(15)('}'))
     else:
         yield terminal.bold(terminal.color(15)('{}'))
Ejemplo n.º 8
0
def dedupleft(iterable, marker):
    """Deduplicates the marker on the left of an iterable object."""
    iterator = peekable(iterable)
    for x in iterator:
        if iterator.peek(None) != marker:
            break
    return itertools.chain([marker], iterator)
Ejemplo n.º 9
0
def test_detection_func(lines, expected):
    lines = more_itertools.peekable(
        enumerate(more_itertools.always_iterable(lines), start=1)
    )

    actual = doctest.detection_func(lines)
    assert actual == expected
Ejemplo n.º 10
0
def collate(*iterables, **kwargs):
    """
    A slightly faster version of more_itertools.collate
    """
    key = kwargs.get("key", lambda x: x)

    peekables = [peekable(it) for it in iterables]
    peekables = [pee for pee in peekables if pee]  # remove empties
    vals = [key(pee.peek()) for pee in peekables]

    while len(peekables) > 0:

        min_i = 0
        min_val = vals[0]
        for i, val in enumerate(vals):
            if val < min_val:
                min_i = i
                min_val = val

        yield peekables[min_i].next()

        if not peekables[min_i]:
            peekables = [pee for pee in peekables if pee]  # remove empties
            vals = [key(pee.peek()) for pee in peekables]
        else:
            vals[min_i] = key(peekables[min_i].peek())
Ejemplo n.º 11
0
def generate_parsed_tokens(token_str):
    """
    Generate parsed tokens.
    """
    char_stream = more_itertools.peekable(token_str)

    token_text = ''
    token_category = ''
    bucket = 'text'

    while char_stream:
        char = next(char_stream)
        next_char = char_stream.peek(None)
        if char == '\\' and next_char:
            # It's an escaped char; add it to the set.
            token_text += next_char
            next(char_stream)
        elif char == ':' and next_char:
            bucket = 'category'
        elif char == '/' and next_char:
            # We have completed a token!
            yield (token_category, token_text, [], [], token_text)
            # Reset everything.
            bucket = 'text'
            token_text = ''
            token_category = ''
        else:
            # We can simply add the character to the string.
            if bucket == 'text':
                token_text += char
            else:
                token_category += char

    if token_text or token_category:
        yield (token_category, token_text, [], [], token_text)
Ejemplo n.º 12
0
def redirects(source: str, language: str) -> Iterator[CaptureResult[Redirect]]:
    """Return the redirects found in the document."""

    assert (language in redirect_magicwords), \
           'Language {} not in allowed choices.'.format(language)

    redirect_re = redirect_res[language]
    redirect_matches = peekable(redirect_re.finditer(source, concurrent=True))

    for match in redirect_matches:
        target = match.group('link') or ''
        target = target.strip()
        anchor = match.group('anchor') or target
        # newlines in anchor are visualized as spaces.
        anchor = anchor.replace('\n', ' ').strip()

        # split on '#' (link to section)
        tosection = ''
        if '#' in target:
            splittarget = target.split('#', 1)
            target = splittarget[0]
            tosection = splittarget[1]

        # For some reason if wikilink has no pipe, e.g. [[apple]] the regex
        # above captures everything in the anchor group, so we need to set
        # the link to the same page.
        if (anchor and not target):
            target = anchor

        redirect = Redirect(target=target, tosection=tosection)

        yield CaptureResult(redirect, Span(match.start(), match.end()))
Ejemplo n.º 13
0
def generate_parsed_tokens(token_str):
    """
    Generate parsed tokens.
    """
    char_stream = more_itertools.peekable(token_str)

    token_text = ''
    token_category = ''
    bucket = 'text'

    while char_stream:
        char = next(char_stream)
        next_char = char_stream.peek(None)
        if char == '\\' and next_char:
            # It's an escaped char; add it to the set.
            token_text += next_char
            next(char_stream)
        elif char == ':' and next_char:
            bucket = 'category'
        elif char == '/' and next_char:
            # We have completed a token!
            yield (token_category, token_text, [], [], token_text)
            # Reset everything.
            bucket = 'text'
            token_text = ''
            token_category = ''
        else:
            # We can simply add the character to the string.
            if bucket == 'text':
                token_text += char
            else:
                token_category += char

    if token_text or token_category:
        yield (token_category, token_text, [], [], token_text)
Ejemplo n.º 14
0
def analyze_revisions(page: mwxml.Page, stats: Mapping, only_last_revision: bool) -> None:
    """Analyze revisions."""
    revisions = more_itertools.peekable(page)

    section_names_stats = stats["section_names_per_revision"]
    sections_stats = stats["sections_per_revision"]

    for mw_revision in revisions:
        utils.dot()

        is_last_revision = not utils.has_next(revisions)
        if only_last_revision and not is_last_revision:
            continue

        text = utils.remove_comments(mw_revision.text or "")

        section_names = [section.name.strip().lower() for section, _ in extractors.sections(text)]
        sections_count = len(section_names)

        for section_name in section_names:
            section_names_stats["global"][section_name] += 1
            if is_last_revision:
                section_names_stats["last_revision"][section_name] += 1

        sections_stats["global"][sections_count] += 1
        if is_last_revision:
            sections_stats["last_revision"][sections_count] += 1

        stats["revisions"]["global"] += 1
        if is_last_revision:
            stats["revisions"]["last_revision"] += 1

        stats["performance"]["revisions_analyzed"] += 1
Ejemplo n.º 15
0
def unified_test_dev_split(inf, ingoldf, keyin, goldkeyin, outf, keyout):
    gold_sent_iter = peekable(iter_sentences(ingoldf))
    rm_inst_ids = []

    def sent_rm_gold(sent):
        gold_sent = gold_sent_iter.peek(None)
        if gold_sent is not None and gold_sent.attrib["id"] == sent.attrib[
                "id"]:
            for instance in sent.xpath("./instance"):
                rm_inst_ids.append(instance.attrib["id"])
            next(gold_sent_iter)
            return BYPASS

    transform_sentences(inf, sent_rm_gold, outf)

    def next_rm():
        try:
            return rm_inst_ids.pop(0)
        except IndexError:
            return None

    rm_id = next_rm()
    for line in keyin:
        if rm_id == line.split()[0]:
            rm_id = next_rm()
            continue
        keyout.write(line)

    assert len(rm_inst_ids) == 0 and rm_id is None
Ejemplo n.º 16
0
def without_trailing(it, *, trailing, _exhausted=object()):
	"""yield all elements of it, except for the last one, if the last one == trailing."""
	it = more_itertools.peekable(it)
	for x in it:
		if it.peek(_exhausted) is _exhausted and x == trailing:
			return
		yield x
Ejemplo n.º 17
0
def split_to_sections(lines_in):
  remaining = peekable(lines_in)
  sections = []
  while (remaining):
    (title, lines_in_section, remaining) = get_section(remaining)
    sections.append((title, lines_in_section))
  return sections
def test_policy_theoretical_optimal_strategy(symbol='AAPL', sd=dt.datetime(2010, 1, 1), ed=dt.datetime(2011, 12, 31),
                                             st=100000, shares_contraint=1000):
    df_trades = create_trades_df(start_date=sd, end_date=ed)
    df_trades.set_index('Date', inplace=True)
    df_trades['Symbol'] = symbol
    df_trades['Order'] = 'BUY'
    df_prices = get_data([symbol], pd.date_range(sd, ed), False)
    df_price_filter = df_prices.dropna(subset=[symbol])
    iterator = more_itertools.peekable(df_price_filter.iterrows())
    ltd_shares = 0.0
    for index, row in iterator:
        current_price = row[0]
        next_day_price = iterator.peek((np.NaN, [np.NAN]))[1][0]
        if np.isnan(next_day_price):
            break
        delta = current_price - next_day_price
        if delta > 0:
            # We want to sell, next day is going to be cheaper
            add_sell_order(df_trades, index, shares_contraint, ltd_shares)
        else:
            # We want to buy, next day the price will go up
            add_buy_order(df_trades, index=index, shares_constraint=shares_contraint, ltd_shares=ltd_shares)
        # update_life_to_date_shares
        ltd_shares = update_life_to_date_shares(ltd_shares, df_trades, index, shares_contraint)
    return df_trades.reset_index()
Ejemplo n.º 19
0
def sections(source: str, include_preamble: bool=False) \
        -> Iterator[CaptureResult[Section]]:
    """Return the sections found in the document."""
    section_header_matches = peekable(section_header_re.finditer(source))
    if include_preamble:
        try:
            body_end = section_header_matches.peek().start()
            body_end -= 1  # Don't include the newline before the next section
        except StopIteration:
            body_end = len(source)
        preamble = Section(
            name='',
            level=0,
            body=source[:body_end],
        )
        yield CaptureResult(preamble, Span(0, body_end))

    for match in section_header_matches:
        name = match.group('section_name')
        level = len(match.group('equals'))

        body_begin = match.end() + 1  # Don't include the newline after
        try:
            body_end = section_header_matches.peek().start()
            body_end -= 1  # Don't include the newline before the next section
        except StopIteration:
            body_end = len(source)

        section = Section(
            name=name,
            level=level,
            body=source[body_begin:body_end],
        )

        yield CaptureResult(section, Span(match.start(), body_end))
Ejemplo n.º 20
0
                def sort_line_chars(
                        chars: Sequence[PDFChar],
                        interpreter: PDFPageInterpreter) -> Sequence[PDFChar]:
                    chars = (normalize_char(char, interpreter)
                             for char in chars)
                    chars = sorted(chars, key=lambda char: char["x0"])
                    main_chars, combining_chars = partition(
                        lambda char: char["text"] and unicodedata.combining(
                            char["text"]), chars)
                    combining_chars_iter = peekable(iter(combining_chars))
                    for main_char in main_chars:
                        yield main_char

                        while combining_chars_iter:
                            combining_char = combining_chars_iter.peek()

                            overlap = max(
                                min(main_char["x1"], combining_char["x1"]) -
                                max(main_char["x0"], combining_char["x0"]), 0)
                            if overlap < main_char["width"] * Decimal("0.5"):
                                break

                            yield combining_char
                            next(combining_chars_iter, None)

                    assert (next(combining_chars_iter, None) is None)

                    return
                    yield
Ejemplo n.º 21
0
 def __init__(self, vals):
     inits = []
     rvals = peekable(vals)
     for index in range(0, len(vals) - 1):
         inits.extend([rvals.next(), rvals.peek()])
     self.left = [x for x in inits[0:int(len(inits) / 2)]]
     self.right = [x for x in inits[int(len(inits) / 2):]]
def extract_revisions(mw_page: mwxml.Page, stats: Mapping,
                      only_last_revision: bool,
                      debug: bool) -> Iterator[Revision]:
    """Extract the internall links (wikilinks) from the revisions."""

    revisions = more_itertools.peekable(mw_page)
    for mw_revision in revisions:
        utils.dot()

        is_last_revision = not utils.has_next(revisions)
        if only_last_revision and not is_last_revision:
            continue

        text = utils.remove_comments(mw_revision.text or '')

        wikilinks = (wikilink for wikilink, _ in extractors.wikilinks(
            page_title=mw_page.title,
            source=text,
            sections=extractors.sections(text),
            debug=debug,
        ))

        yield Revision(id=mw_revision.id,
                       parent_id=mw_revision.parent_id,
                       user=mw_revision.user,
                       minor=mw_revision.minor,
                       comment=mw_revision.comment,
                       model=mw_revision.model,
                       format=mw_revision.format,
                       timestamp=mw_revision.timestamp.to_json(),
                       text=text,
                       wikilinks=wikilinks)
        stats['performance']['revisions_analyzed'] += 1
Ejemplo n.º 23
0
def parse_orgmode(f: IO, subprovider: str) -> Iterator[Item]:
    current_datetime: Optional[datetime.datetime] = None
    current_paragraph: List[str] = []
    lines = peekable(f)
    for line in lines:
        line_clean = line.strip()
        if line_clean:
            m = regex_heading.match(line_clean)
            # Title line
            if m:
                if m.group('todo'):
                    current_datetime = None
                else:
                    current_datetime = datetime.datetime.strptime(
                        m.group('date'), '%Y-%m-%d %a')
            # Paragraph line but not before first heading
            elif current_datetime:
                current_paragraph.append(line_clean)
        # Empty line after paragraph or last line of file
        if not line_clean or not lines:
            if current_datetime and current_paragraph:
                yield Item.normalized(
                    datetime_=current_datetime,
                    text='\n'.join(current_paragraph),
                    provider=provider,
                    subprovider=subprovider,
                    all_day=True,
                )
                current_paragraph.clear()
Ejemplo n.º 24
0
    def document_dependency_graphs(self, document):
        document = peekable(iter(document))

        while document:
            sentence = list(takewhile(lambda l: l != '</s>', document))

            if not sentence:
                # It might happen because of the snippets like this:
                #
                #    plates  plate   NNS     119     116     PMOD
                #    </text>
                #    </s>
                #    <text id="ukwac:http://www.learning-connections.co.uk/curric/cur_pri/artists/links.html">
                #    <s>
                #    Ideas   Ideas   NP      1       14      DEP
                #
                # where </text> is before </s>.
                continue

            try:
                dg = DependencyGraph(
                    sentence,
                    cell_extractor=ukwac_cell_extractor,
                    cell_separator='\t',
                )
            except DependencyGraphError:
                logger.exception("Couldn't instantiate a dependency graph.")
            else:
                for node in dg.nodes.values():

                    if self.lowercase_stem and node['lemma']:
                        node['lemma'] = node['lemma'].lower()

                yield dg
Ejemplo n.º 25
0
    def parse(self, fh):
        """Generate tap.line.Line objects, given a file-like object `fh`.

        `fh` may be any object that implements both the iterator and
        context management protocol (i.e. it can be used in both a
        "with" statement and a "for...in" statement.)

        Trailing whitespace and newline characters will be automatically
        stripped from the input lines.
        """
        with fh:
            try:
                first_line = next(fh)
            except StopIteration:
                return
            first_parsed = self.parse_line(first_line.rstrip())
            fh_new = itertools.chain([first_line], fh)
            if first_parsed.category == 'version' and \
                    first_parsed.version >= 13:
                if ENABLE_VERSION_13:
                    fh_new = peekable(itertools.chain([first_line], fh))
                    self._try_peeking = True
                else:  # pragma no cover
                    print("""
WARNING: Optional imports not found, TAP 13 output will be
    ignored. To parse yaml, see requirements in docs:
    https://tappy.readthedocs.io/en/latest/consumers.html#tap-version-13""")

            for line in fh_new:
                yield self.parse_line(line.rstrip(), fh_new)
Ejemplo n.º 26
0
    def documents(self, path):
        file_pass, path = path

        with gzip.open(path, 'rt', encoding='ISO-8859-1') as f:
            lines = (l.rstrip() for l in f)

            lines = peekable(
                l for l in lines
                if not l.startswith('<text') and l != '<s>'
            )

            c = 0
            while lines:
                if (c % (10 ** 4)) == 0:
                    logger.debug(
                        '%s text elements are read, every %s is processed. '
                        'It\'s about %.2f of the file.',
                        c,
                        self.file_passes,
                        c / 550000,  # An approximate number of texts in a file.
                    )

                if (self.limit is not None) and (c > self.limit):
                    logger.info('Limit of sentences is reached.')
                    break

                document = list(takewhile(lambda l: l != '</text>', lines))

                if (c % self.file_passes) == file_pass:
                    yield document

                c += 1
Ejemplo n.º 27
0
 def __init__(
     self,
     iterable: Iterable[Value],
     key: Callable[[Value], Key],
 ) -> None:
     """Initialize"""
     self._groups = peekable(groupby(iterable, key))
Ejemplo n.º 28
0
def extract_revisions(
        mw_page: mwxml.Page,
        language: str,
        stats: Mapping,
        only_last_revision: bool) -> Iterator[Revision]:
    """Extract the internall links (wikilinks) from the revisions."""

    revisions = more_itertools.peekable(mw_page)
    for mw_revision in revisions:
        utils.dot()

        is_last_revision = not utils.has_next(revisions)
        if only_last_revision and not is_last_revision:
            continue

        text = utils.remove_comments(mw_revision.text or '')

        wikilinks = (wikilink
                     for wikilink, _
                     in extractors.wikilinks(text, extractors.sections(text)))

        yield Revision(
            id=mw_revision.id,
            parent_id=mw_revision.parent_id,
            user=mw_revision.user,
            minor=mw_revision.minor,
            comment=mw_revision.comment,
            model=mw_revision.model,
            format=mw_revision.format,
            timestamp=mw_revision.timestamp.to_json(),
            text=text,
            wikilinks=wikilinks
        )
Ejemplo n.º 29
0
def add_dummy_entries(entries):
    entries = peekable(entries)
    prev_entry = next(entries)
    yield prev_entry
    output_month, output_year = prev_entry["date"].month, prev_entry[
        "date"].year

    while entries:
        entry = next(entries)
        while output_month != entry["date"].month or output_year != entry[
                "date"].year:
            output_month = output_month - 1
            if output_month < 1:
                output_month = 12
                output_year = output_year - 1
            if output_month != entry["date"].month or output_year != entry[
                    "date"].year:
                yield {
                    "type": "dummy",
                    "subtype": "dummy",
                    "instance": None,
                    "date": datetime(year=output_year,
                                     month=output_month,
                                     day=1),
                }
        yield entry
Ejemplo n.º 30
0
def augment_timeline(entries):
    last_year = None
    last_month = None

    entries = peekable(add_dummy_entries(entries))

    while entries:
        entry = next(entries)
        nentry = entries.peek(None)
        tl = {
            "year_first": False,
            "year_last": False,
            "month_first": False,
            "month_last": False,
            "entry_id": "{}:{}:{}".format(
                entry["type"], entry["subtype"], entry["instance"].pk
            )
            if entry["instance"]
            else None,
        }
        if last_year != entry["date"].year:
            tl["year_first"] = True
        if last_month != entry["date"].month:
            tl["month_first"] = True
        if not nentry or nentry["date"].year != entry["date"].year:
            tl["year_last"] = True
        if not nentry or nentry["date"].month != entry["date"].month:
            tl["month_last"] = True
        yield dict(entry, tl=tl)
        last_year = entry["date"].year
        last_month = entry["date"].month
Ejemplo n.º 31
0
    def parse(self, fh):
        """Generate tap.line.Line objects, given a file-like object `fh`.

        `fh` may be any object that implements both the iterator and
        context management protocol (i.e. it can be used in both a
        "with" statement and a "for...in" statement.)

        Trailing whitespace and newline characters will be automatically
        stripped from the input lines.
        """
        with fh:
            try:
                first_line = next(fh)
            except StopIteration:
                return
            first_parsed = self.parse_line(first_line.rstrip())
            fh_new = itertools.chain([first_line], fh)
            if first_parsed.category == "version" and first_parsed.version >= 13:
                if ENABLE_VERSION_13:
                    fh_new = peekable(itertools.chain([first_line], fh))
                    self._try_peeking = True
                else:  # pragma no cover
                    print(
                        """
WARNING: Optional imports not found, TAP 13 output will be
    ignored. To parse yaml, see requirements in docs:
    https://tappy.readthedocs.io/en/latest/consumers.html#tap-version-13"""
                    )

            for line in fh_new:
                yield self.parse_line(line.rstrip(), fh_new)
Ejemplo n.º 32
0
    def __init__(
            self,
            server_list,
            nickname,
            realname,
            reconnection_interval=missing,
            recon=ExponentialBackoff(),
            **connect_params,
    ):
        super(SingleServerIRCBot, self).__init__()
        self.__connect_params = connect_params
        self.channels = IRCDict()
        specs = map(ServerSpec.ensure, server_list)
        self.servers = more_itertools.peekable(itertools.cycle(specs))
        self.recon = recon
        # for compatibility
        if reconnection_interval is not missing:
            warnings.warn("reconnection_interval is deprecated; "
                          "pass a ReconnectStrategy object instead")
            self.recon = ExponentialBackoff(min_interval=reconnection_interval)

        self._nickname = nickname
        self._realname = realname
        for i in [
                "disconnect",
                "join",
                "kick",
                "mode",
                "namreply",
                "nick",
                "part",
                "quit",
        ]:
            self.connection.add_global_handler(i, getattr(self, "_on_" + i),
                                               -20)
Ejemplo n.º 33
0
def sections(source: str, include_preamble: bool=False) \
        -> Iterator[CaptureResult[Section]]:
    """Return the sections found in the document."""
    section_header_matches = peekable(section_header_re.finditer(source))
    if include_preamble:
        try:
            body_end = section_header_matches.peek().start()
            body_end -= 1  # Don't include the newline before the next section
        except StopIteration:
            body_end = len(source)
        preamble = Section(
            name='',
            level=0,
            body=source[:body_end],
        )
        yield CaptureResult(preamble, Span(0, body_end))

    for match in section_header_matches:
        name = match.group('section_name')
        level = len(match.group('equals'))

        body_begin = match.end() + 1  # Don't include the newline after
        try:
            body_end = section_header_matches.peek().start()
            body_end -= 1  # Don't include the newline before the next section
        except StopIteration:
            body_end = len(source)

        section = Section(
            name=name,
            level=level,
            body=source[body_begin:body_end],
        )

        yield CaptureResult(section, Span(match.start(), body_end))
Ejemplo n.º 34
0
    def _split_headings(self, lines, hprefix):
        lines = peekable(lines)
        intro = []
        while lines and not lines.peek().startswith(hprefix):
            intro.append(next(lines))

        sections = []
        current = None
        for line in lines:
            if line.startswith(hprefix):
                if current:
                    sections.append(current)

                current = {
                    "title": line[len(hprefix):].strip(),
                    "lines": [],
                }

            else:
                current["lines"].append(line)

        if current:
            sections.append(current)

        for section in sections:
            if not section["lines"] or section["lines"][-1] != "":
                section["lines"].append("")

        return intro, sections
Ejemplo n.º 35
0
def user(username):
    # query database for list of user's records (6 in a row)
    records = []
    row = []
    record = {}
    db = get_db()
    cursor = db.cursor(buffered=True)
    cursor.execute(
        "SELECT release_title, artist, discogs_uri, image_url " 
        "FROM record LEFT JOIN user ON user_id = user.id " 
        "WHERE username = %s",
        (username,)
    )
    i = 1
    p = peekable(cursor)
    for result in p:
        record['release_title'] = result[0]
        record['artist'] = result[1]
        record['uri'] = result[2]
        record['image_url'] = result[3]
        row.append(record)
        record = {}
        if i % 6 == 0 or p.peek(None) == None:
            records.append(row)
            row = []
        i += 1
    n_items = i - 1

    cursor.close()
    return render_template('friend/user.html', records=records,
                            n_items=n_items, username=username)
def test_policy(symbol='AAPL', sd=dt.datetime(2010, 1, 1), ed=dt.datetime(2012, 12, 31),
                st=100000, shares_contraint=1000, look_back_period=14):
    df_trades = create_trades_df(start_date=sd, end_date=ed)
    df_trades.set_index('Date', inplace=True)
    df_trades['Symbol'] = symbol
    df_trades['Order'] = 'NOTHING'
    df_prices_sym = get_data([symbol], pd.date_range(sd, ed), False)
    df_prices_idx = get_data(['SPY'], pd.date_range(sd, ed), False, dropNonTradingSPY=False)
    df_price_filter_sym = df_prices_sym.dropna(subset=[symbol])
    df_price_filter_idx = df_prices_idx.dropna(subset=['SPY'])
    iterator = more_itertools.peekable(df_price_filter_sym.iloc[look_back_period:].iterrows())
    ltd_shares = 0.0
    prev_sym_price_over_sma = indicator.get_price_over_sma(df_price_filter_sym.iloc[:look_back_period])
    for index, row in iterator:
        # get current price to determine when we should close the position.
        df_prices_historical_sym = df_price_filter_sym.loc[:index][-look_back_period:] # Todo: Can we do this in one shot?
        df_prices_historical_idx = df_price_filter_idx.loc[:index][-look_back_period:]

        sym_price_over_sma = indicator.get_price_over_sma(df_prices_historical_sym)
        sym_bollinger_band_percent = indicator.get_bollinger_band_percent(df_prices_historical_sym)
        sym_rsi = indicator.get_rsi(df_prices_historical_sym, look_back_period)
        idx_price_over_sma = indicator.get_price_over_sma(df_prices_historical_idx)
        idx_bollinger_band_percent = indicator.get_bollinger_band_percent(df_prices_historical_idx)
        idx_rsi = indicator.get_rsi(df_prices_historical_idx, look_back_period)
        signal = get_signal(sym_price_over_sma, sym_bollinger_band_percent, sym_rsi, idx_price_over_sma,
                           idx_bollinger_band_percent, idx_rsi, prev_sym_price_over_sma)
        print(signal)
        process_signal(df_trades, index, signal, ltd_shares, shares_contraint)
        ltd_shares = update_life_to_date_shares(ltd_shares, df_trades, index, shares_contraint)
        prev_sym_price_over_sma = sym_price_over_sma
    return df_trades.reset_index()
Ejemplo n.º 37
0
    def merge_styles(self, offline: bool) -> Iterator[Fuss]:
        """Merge one or multiple style files."""
        config = self.read_configuration()

        # pylint: disable=import-outside-toplevel
        from nitpick.style import StyleManager

        style = StyleManager(self, offline, config.cache)
        base = config.file.expanduser().resolve().as_uri(
        ) if config.file else None
        style_errors = list(
            style.find_initial_styles(peekable(always_iterable(config.styles)),
                                      base))
        if style_errors:
            raise QuitComplainingError(style_errors)

        self.style_dict = style.merge_toml_dict()

        from nitpick.flake8 import NitpickFlake8Extension

        minimum_version = search_json(self.style_dict,
                                      NITPICK_MINIMUM_VERSION_JMEX, None)
        logger.debug(f"Minimum version: {minimum_version}")
        if minimum_version and version_to_tuple(
                NitpickFlake8Extension.version) < version_to_tuple(
                    minimum_version):
            yield Reporter().make_fuss(
                ProjectViolations.MINIMUM_VERSION,
                project=PROJECT_NAME,
                expected=minimum_version,
                actual=NitpickFlake8Extension.version,
            )

        self.nitpick_section = self.style_dict.get("nitpick", {})
        self.nitpick_files_section = self.nitpick_section.get("files", {})
Ejemplo n.º 38
0
 def create_partition_buffers(stream):
     bucketed_stream = more_itertools.bucket(stream, key=attrgetter("partition"))
     partition_buffers: Dict[int, Iterator[StreamEvent]] = {
         p: more_itertools.peekable(iter(bucketed_stream[p])) for p in range(partition_count)
     }
     global_event_buffer = bucketed_stream[StreamEvent.ALL_PARTITIONS]
     return partition_buffers, global_event_buffer
Ejemplo n.º 39
0
def auto_map_cols(source, cols):
    src_cols = peekable(source).peek().keys()
    col_map = dict(zip(src_cols, cols))

    def _transform(row):
        return map_cols(col_map)

    return _transform
Ejemplo n.º 40
0
 def _read():
     with open(path, **fileparams) as csvfile:
         csvreader = csv.reader(csvfile, **csvparams)
         if headers:
             cols = next(csvreader)
         else:
             cols = range(len(peekable(csvreader.peek())))
         for row in csvreader:
             yield OrderedDict(zip(cols, row))
Ejemplo n.º 41
0
 def _read():
     with open(path, **fileparams) as fwfile:
         fwreader = parse_fw_file(fwfile, fieldwidths)
         if headers:
             cols = next(fwreader)
         else:
             cols = range(len(peekable(fwreader).peek()))
         for row in fwreader:
             yield OrderedDict(zip(cols, row))
Ejemplo n.º 42
0
 def _write(source):
     with open(path, mode) as fwfile:
         vals = peekable(source)
         if headers:
             padded_cols = pad_row(vals.peek().keys(), fieldwidths)
             fwfile.write(''.join(padded_cols))
         for row in vals:
             padded_row = pad_row(row.values, fieldwidths, **params)
             fwfile.write(''.join(padded_row))
Ejemplo n.º 43
0
def sum_folder(channel):
    import pickle
    import logging

    from more_itertools import peekable
    import pandas as pd

    from fowler.corpora.execnet import initialize_channel

    _, data = initialize_channel(channel)

    logger = logging.getLogger('execnet.fum_folder')

    kwargs = data.get('kwargs', {})
    instance = data['instance']
    folder_name = data['folder_name']
    folder = getattr(instance, folder_name)

    result = None
    for item in channel:

        if item == ('message', 'terminate'):
            if result is not None:
                logger.debug('Sending the final result, size: %s', len(result))
                channel.send(('result', pickle.dumps(result)))
            break

        type_, data = item
        if type_ == 'task':

            intermediate_results = peekable(enumerate(folder(data, **kwargs)))

            if intermediate_results:
                if result is None:
                    _, result = next(intermediate_results)

                # TODO: It would be nice to catch any exceptioin here,
                # (especially, the one that happens inside of the folder() call
                # and report it to the master.
                # Same applies to the next() call above.
                for i, r in intermediate_results:
                    logger.debug('Iteration: %s, result size: %s', i, len(result))

                    result = pd.concat(
                        [result, r],
                        copy=False,
                    ).groupby(level=result.index.names).sum()

                    if (i % 10) == 9:
                        result.sort(ascending=False, inplace=True)

                        half = len(result) // 2
                        logger.debug('Sending a result. Result size: %s', half)
                        channel.send(('result', pickle.dumps(result.tail(half))))
                        result = result.head(-half)

        channel.send(('message', 'send_next'))
Ejemplo n.º 44
0
 def _write(source):
     with open(path, mode) as csvfile:
         vals = peekable(source)
         cols = vals.peek().keys()
         csv_writer = csv.DictWriter(csvfile, fieldnames=cols, **csvparams)
         if headers:
             csv_writer.writeheader()
         for row in vals:
             csv_writer.writerow(row.values())
Ejemplo n.º 45
0
def extract_island(text):
    tokens = tokenize_finditer(text, LEXICON)
    tokens = peekable(tokens)

    while tokens.peek(None) is not None:

        if tokens.peek()[0] == 'doi_start':
            yield ('doi', read_doi(tokens))

        next(tokens)
Ejemplo n.º 46
0
def openCsvReader(filename):
    """
    Open a csv reader on the given filename.
    Then use like 'for row in reader:'
    Wraps the reader iterator in peekable - see http://stackoverflow.com/a/27698681/243392
    Then can say reader.peek() to just look at the current record.
    """
    f = open(filename, 'rt')
    f = dataLines(f) # ignore comments, blank lines and header row
    reader = more_itertools.peekable(csv.reader(f))
    return reader, f
Ejemplo n.º 47
0
    def __init__(self, items, f_map, f_reduce, starting_level, mandatory_levels, mandatory_levels_all):
        self.mandatory_levels_max = max(mandatory_levels) if mandatory_levels else None
        self.iter = peekable(with_levels(items, starting_level=starting_level,
            mandatory_levels=(sorted(mandatory_levels) if not mandatory_levels_all else None),
            mandatory_levels_all=mandatory_levels_all,
        ))
        self.f_map = self.simple_struct_from_node if f_map is SIMPLE_MAP else (lambda x: x) if f_map is None else f_map
        self.f_reduce = f_reduce or _REDUCE_DEFAULT

        self.reduce_of_no_children = self.f_reduce([])

        self.ni_active = None
Ejemplo n.º 48
0
def extract_search(text, lexicon=LEXICON):

    last_end = 0
    for match in DOI_START_RE.finditer(text):
        if match.span()[0] > last_end:
            tokens = tokenize_search(text, match.span()[0], lexicon=lexicon)
            tokens = peekable(tokens)
            doi = read_doi(tokens)
            last_end = match.span()[0] + len(doi)
            yield Identifier('doi', doi)
        else:
            last_end = max(match.span()[1], last_end)
Ejemplo n.º 49
0
    def extract(self):

        with open(self.path, **self.fileparams) as f:

            reader = self._get_reader(f)
            if self.headers:
                cols = next(reader)

            else:
                reader = peekable(reader)
                cols = range(len(reader.peek()))

            for row in reader:
                yield OrderedDict(zip(cols, row))
Ejemplo n.º 50
0
def wikilinks(source: str, sections: Iterator[CaptureResult[Section]]) \
        -> Iterator[CaptureResult[Wikilink]]:
    """Return the wikilinks found in the document."""
    wikilink_matches = peekable(wikilink_re.finditer(source, concurrent=True))

    sections_limits = [SectionLimits(name=section.name,
                                     level=section.level,
                                     number=idx,
                                     begin=span.begin,
                                     end=span.end)
                       for idx, (section, span) in enumerate(sections, 1)]

    last_section_seen = 0
    for match in wikilink_matches:
        link = match.group('link') or ''
        link = link.strip()
        anchor = match.group('anchor') or link
        # newlines in anchor are visualized as spaces.
        anchor = anchor.replace('\n', ' ').strip()

        link_start = match.start()

        link_section_number = 0
        link_section_name = '---~--- incipit ---~---'
        link_section_level = 0

        for section in sections_limits[last_section_seen:]:
            if section.begin <= link_start <= section.end:
                link_section_number = section.number
                link_section_name = section.name
                link_section_level = section.level
                last_section_seen = (link_section_number - 1)\
                    if link_section_number > 0 else 0
                break

        # For some reason if wikilink has no pipe, e.g. [[apple]] the regex
        # above captures everything in the anchor group, so we need to set
        # the link to the same page.
        if (anchor and not link):
            link = anchor

        wikilink = Wikilink(
            link=link,
            anchor=anchor,
            section_name=link_section_name,
            section_level=link_section_level,
            section_number=link_section_number
        )

        yield CaptureResult(wikilink, Span(link_start, match.end()))
Ejemplo n.º 51
0
    def __init__(self, mutations, mutation_data_factory):
        """
        Initialize an new queue with a MutationData iterator

        :param mutations: any MutationData producing Iterator
        :param mutation_data_factory: a MutationDataFactory to be used to produce new mutations for the ONPs
        """
        self.mutations = more_itertools.peekable(mutations)
        self.sns = SampleNameSelector(self.mutations.peek())
        self.queue = collections.defaultdict(list)
        self.indel_queue = []
        self.last = 0
        self.logger = logging.getLogger(__name__)
        self.warned_about_order = False
        self._mutation_data_factory = mutation_data_factory
Ejemplo n.º 52
0
    def pre_process(self):
        super(P, self).pre_process()

        # Compute four inter vectors generators for each pattern note, with four turning point types
        # tp_types 0, 1 iterate through a source sorted by onset (attack)
        # while types 2, 3 iterate through a source sorted by offset (release)
        for note in self.patternPointSet:
            note.source_ptrs = [
                peekable((lambda p:
                    (InterNoteVector(p, self.patternPointSet, s, self.sourcePointSet,
                        self.settings['interval_func'], tp_type = 0)
                    for s in self.sourcePointSet))(note)),
                peekable((lambda p:
                    (InterNoteVector(p, self.patternPointSet, s, self.sourcePointSet,
                        self.settings['interval_func'], tp_type = 1)
                    for s in self.sourcePointSet))(note)),
                peekable((lambda p:
                    (InterNoteVector(p, self.patternPointSet, s, self.sourcePointSet_offsetSort,
                        self.settings['interval_func'], tp_type = 2)
                    for s in self.sourcePointSet_offsetSort))(note)),
                peekable((lambda p:
                    (InterNoteVector(p, self.patternPointSet, s, self.sourcePointSet_offsetSort,
                        self.settings['interval_func'], tp_type = 3)
                    for s in self.sourcePointSet_offsetSort))(note))]
Ejemplo n.º 53
0
def _mapquotes(pagetags):
    items = []
    category = ""
    pagetagitr = more_itertools.peekable(pagetags)
    for tag in pagetagitr:
        if tag.name == "h2":
            category = tag.text
            continue

        if tag.name == "p" and pagetagitr.peek(tag).name == "p":
            matchresult = re.match('(“.+”)\s(—|–)\s(.+)', tag.text + " " + pagetagitr.peek(tag).text)
            if matchresult:
                items.append(Quote(category, matchresult.group(1), matchresult.group(3)))
                next(pagetagitr)
                continue

    return items
Ejemplo n.º 54
0
def extract_search(text: str) -> Iterator[CaptureResult[Identifier]]:
    last_end = 0
    for match in DOI_START_RE.finditer(text):
        begin_pos = match.start()

        if begin_pos > last_end:
            tokens = tokenize_search(text, begin_pos)
            tokens = peekable(tokens)

            identifier = read_doi(tokens)
            end_pos = begin_pos + len(identifier.id)

            yield CaptureResult(identifier, Span(begin_pos, end_pos))

            last_end = end_pos
        else:
            last_end = max(match.end(), last_end)
Ejemplo n.º 55
0
def test_broken_diffs():
    revision_docs = [
        {'id': 2, 'text': "Apples are blue.", 'page': {'title': "Foo"},
         'diff': {'last_id': 3, 'ops': []}},
        {'id': 3, 'text': "Apples are red.", 'page': {'title': "Foo"},
         'diff': {'last_id': 1, 'ops': []}},
        {'id': 4, 'text': "Apples are a red fruit", 'page': {'title': "Foo"},
         'diff': {'last_id': 2, 'ops': []}},
        {'id': 5, 'text': "Apples are a lame fruit", 'page': {'title': "Foo"},
         'diff': {'last_id': 4, 'ops': []}}
    ]

    revision_docs = peekable(revision_docs)

    broken_docs = list(read_broken_docs(revision_docs))
    print([d['id'] for d in broken_docs])
    eq_(len(broken_docs), 3)
Ejemplo n.º 56
0
def parse_ngram_output(ngrams_len, ngram_output):
    lines = peekable(ngram_output.split("\n"))
    res = []
    for i in xrange(ngrams_len):
        line = lines.next()
        while lines.peek().startswith("\t"):
            line = lines.next()
        m = re.findall(r"\] ([^\s]+) \[", line)
        if m:
            last_prob = float(m[0])
        else:
            print >>sys.stderr, "ERROR parsing lastprob: %s" % line
        lines.next()
        lines.next()
        res.append(last_prob)
        lines.next()
    return res
def sequence(*iterables, **kwargs):

    compare = kwargs.get('compare', lambda i1, i2: i1 < i2)
    iterables = [peekable(it) for it in iterables]

    done = False
    while not done:

        next_i = None

        for i, it in enumerate(iterables):
            if it:  # Not empty
                if next_i is None or \
                   compare(it.peek(), iterables[next_i].peek()):
                    next_i = i

        if next_i is None:
            done = True
        else:
            yield next(iterables[next_i])
def extract_revisions(
    mw_page: mwxml.Page, language: str, stats: Mapping, only_last_revision: bool
) -> Iterator[Revision]:
    """Extract the sections which are bibliography from the revisions."""
    section_names_stats = stats["section_names"]
    revisions = more_itertools.peekable(mw_page)
    for mw_revision in revisions:
        utils.dot()

        is_last_revision = not utils.has_next(revisions)
        if only_last_revision and not is_last_revision:
            continue

        text = utils.remove_comments(mw_revision.text or "")

        sections = (section for section, _ in extractors.sections(text))

        bibliography_sections = list(section for section in sections if is_bibliography(section.name, language))

        for section in bibliography_sections:
            section_names_stats["global"][section.name] += 1
            if is_last_revision:
                section_names_stats["last_revision"][section.name] += 1
        # TODO: use section.fullbody
        text = "".join(section.full_body for section in bibliography_sections)

        yield Revision(
            id=mw_revision.id,
            parent_id=mw_revision.parent_id,
            user=mw_revision.user,
            minor=mw_revision.minor,
            comment=mw_revision.comment,
            model=mw_revision.model,
            format=mw_revision.format,
            timestamp=mw_revision.timestamp.to_json(),
            text=text,
            sections=bibliography_sections,
        )

        stats["performance"]["revisions_analyzed"] += 1