Python split_before Examples, more_itertools.split_before Python Examples

Example #1

0

Show file

    def GetActivities(self, account, start, end):
        response = self.session.post(
            'https://ssl.grsaccess.com/english/member/activity_reports_details.aspx',
            data={
                'MbrPlanId': account.account_id,
                'txtEffStartDate': start.format('%m/%d/%Y'),
                'txtEffEndDate': end.format('%m/%d/%Y'),
                'Submit': 'Submit'
            })
        response.raise_for_status()
        """
        Activities are returned in an html table, with the first few rows being headers.
        Example activity row:
        <TR>
            <TD class='activities-d-lit1'>01-FEB-17</TD>
            <TD class='activities-d-lit2'>New contribution</TD>
            <TD class='activities-d-transamt'>123.45</TD>
            <TD class='activities-d-netunitvalamt'>22.123456</TD>
            <TD class='activities-d-unitnum'>5.58005</TD>
        </TR>
        """
        soup = BeautifulSoup(response.text, 'html.parser')
        tags = soup.find_all(class_=re.compile('activities-d-*'))
        if not tags:
            return []

        for activity_list in split_before(
                tags, lambda tag: tag.attrs['class'][0] == tags[0].attrs[
                    'class'][0]):
            yield [a.text for a in activity_list]

Example #2

0

Show file

def lines_to_programs(lines: List[str]) -> List[Tuple[str, List[dict]]]:
    """
    Programs will be represented as two-part tuples. The first part is the
    mask, a string. The second part is a list of registers and values,
    represented by a dict with the keys "r" and "v".
    """
    return lmap(parse_group, split_before(lines, lambda s: s[:4] == "mask"))

Example #3

0

Show file

File: LSM.py Project: svakulenk0/gpt2lsm

    def get_sp1_history(self):
        """
        This function gets the history of speaker 1 (chatbot) from the relevant input_ids, by padding
        the history of speaker2.
        :return: history of speaker 1
        """
        inp = self.input_ids
        sp1, sp2, pad = self.tokenizer.convert_tokens_to_ids(
            ['<speaker1>', '<speaker2>', '<pad>'])
        flatten = lambda l: [item for sublist in l for item in sublist]

        # get history of speaker1 (so pad all history of speaker2)
        hist1 = torch.zeros(inp.size(), device='cuda', dtype=torch.long)

        # for each training example in batch, get sp1 history
        for index, example in enumerate(inp):
            splitted = list(
                split_before(example.cpu().numpy(),
                             lambda x: x == sp1 or x == sp2))
            for i, z in enumerate(splitted):
                # replace sp1 tag with pad tag
                z = [x if x != sp1 else pad for x in z]
                splitted[i] = z
                # if sp2 or last sp1, replace with pad
                if sp2 in z or i == len(splitted) - 1:
                    splitted[i] = [pad] * len(z)

            hist1[index] = torch.tensor(flatten(splitted), dtype=torch.long)

        # also return sp1_ids in last sentence of each batch
        idxs = []
        for example in hist1:
            idxs.append((example != pad).nonzero().view(-1))

        return hist1.view(inp.size()), idxs

Example #4

0

Show file

File: parse_cdp_more_itertools.py Project: natenka/advpyneng-examples-exercises

def get_one_neighbor(filename):
    with open(filename) as f:
        neighbors = split_before(f, lambda x: "Device ID" in x)
        next(neighbors)  # избавляемся от текста до соседей
        for lines in neighbors:
            neighbor = "".join(lines)
            yield neighbor

Example #5

0

Show file

File: frequencies.py Project: aehrm/cophi-helpers

def process_file(f):
    fileobj = open(f, 'r')
    content = fileobj.read().split()
    fileobj.close()
    for pieces in more_itertools.split_before(
            content, lambda x: not x.startswith('##')):
        insert_pieces(pieces)

Example #6

0

Show file

def headlined_sections(string, headline_pattern):
    """ return sections with headlines matching a pattern
    """
    lines = string.splitlines()
    join_lines = '\n'.join
    pattern_matcher = apf.matcher(headline_pattern)
    lines = mit.lstrip(lines, pred=lambda line: not pattern_matcher(line))
    sections = list(map(join_lines, mit.split_before(lines, pattern_matcher)))
    return sections

Example #7

0

Show file

File: find.py Project: avcopan/automechanic

def sections_with_headline(pattern, string):
    """ return sections with headlines matching a pattern
    """
    lines = string.splitlines()
    join_lines = '\n'.join
    pattern_matcher = matcher(pattern)
    lines = lstrip(lines, pred=lambda line: not pattern_matcher(line))
    sections = list(map(join_lines, split_before(lines, pattern_matcher)))
    return sections

Example #8

0

Show file

File: ssh.py Project: EmilLaursen/ec2scp

 def _get_stanzas(self, config_text: str):
     whitespace_comment = " #\n"
     lines = [
         line.strip(whitespace_comment) for line in config_text.split("\n")
         if line.strip(whitespace_comment)
     ]
     stanzas = list(
         split_before(lines, lambda line: line.lower().startswith("host ")))
     print(f"Stanzas: {stanzas}")
     return stanzas

Example #9

0

Show file

File: _legacy_epub.py Project: blindpandas/bookworm

 def toc_tree(self):
     toc = self.fitz_doc._ebook.get_toc(simple=False)
     sect_count = len(toc)
     root = Section(
         title=self.metadata.title,
         pager=Pager(first=0, last=sect_count - 1),
         level=1,
     )
     stack = TreeStackBuilder(root)
     all_html_files = self.get_html_file_list()
     for (idx, (level, title, __, data)) in enumerate(toc):
         href = data["name"]
         section = stack.push(
             Section(
                 title=title,
                 pager=Pager(first=idx, last=idx),
                 level=level + 1,
                 data=dict(href=href),
             ))
         # ----------------
         filename, html_id = (href,
                              None) if "#" not in href else href.split("#")
         self._filename_to_section[filename] = section
         # ----------------
         if html_id is not None:
             self._split_section_anchor_ids.setdefault(filename,
                                                       []).append(html_id)
         # ----------------
         section_filename = (href if "#" not in href else
                             href.split("#")[0]).strip()
         if section_filename not in all_html_files:
             continue
         all_html_files[all_html_files.index(section_filename)] = section
         # End loop
     additional_html_files = ((sect, html_files) for (
         sect, *html_files) in more_itertools.split_before(
             all_html_files, pred=lambda item: isinstance(item, Section))
                              if html_files and isinstance(sect, Section))
     for (sect, additional_html_file_list) in additional_html_files:
         sect.data["additional_html_files"] = additional_html_file_list
         for aditional_file in additional_html_file_list:
             self._filename_to_section[aditional_file] = sect
     # ------------
     if sect_count == 0:
         href = (it(self.epub.items).find(
             lambda item: "html" in item.media_type).file_name)
         stack.push(
             Section(
                 title=_("Book contents"),
                 pager=Pager(first=0, last=0),
                 level=2,
                 data=dict(href=href),
             ))
         object.__setattr__(root.pager, "last", 0)
     return root

Example #10

0

Show file

    def create_subCells(self):
        '''This function splits all of the cell data into subcell objects
        to do this I find the length of the list before a fission even occurs
        I then take the length of the list till a fission event occurs and use
        it to slice the original data'''
        subcell_list = list(split_before(self.daughters, lambda x: x != 0))
        subcell_list_lengths = [
            len(length_subcell) for length_subcell in subcell_list
        ]
        all_subcells = []
        current_position = 0
        subcell_number_tracker = 1

        #Here I slice the original cell data and put it into subcell objects
        for number_of_frames_in_subcell in subcell_list_lengths:
            end_position = current_position + number_of_frames_in_subcell

            subcell_framenbs = np.asarray(
                self.framenbs[current_position:end_position])
            subcell_length = np.asarray(
                self.length[current_position:end_position])
            subcell_width = np.asarray(
                self.width[current_position:end_position])
            subcell_area = np.asarray(self.area[current_position:end_position])
            subcell_fluorescence = np.asarray(
                self.fluorescence[current_position:end_position])
            subcell_fluorescence_by_area = np.asarray(
                self.fluorescence_by_area[current_position:end_position])
            subcell_length_growth = np.asarray(
                self.length_growth[current_position:end_position - 1])
            subcell_width_growth = np.asarray(
                self.width_growth[current_position:end_position - 1])
            subcell_area_growth = np.asarray(
                self.area_growth[current_position:end_position - 1])
            subcell_fluorescence_growth = np.asarray(
                self.fluorescence_growth[current_position:end_position - 1])
            subcell_fluorescence_growth_by_area = np.asarray(
                self.
                fluorescence_growth_by_area[current_position:end_position - 1])

            current_subcell = SubCell(
                subcell_number_tracker, subcell_framenbs, subcell_length,
                subcell_width, subcell_area, subcell_fluorescence,
                subcell_fluorescence_by_area, subcell_length_growth,
                subcell_width_growth, subcell_area_growth,
                subcell_fluorescence_growth,
                subcell_fluorescence_growth_by_area)

            current_position = end_position
            all_subcells.append(current_subcell)
            subcell_number_tracker += 1
        self.subcells = all_subcells

        return self.subcells

Example #11

0

Show file

def get_movies_showtimes(theater, date):
    """Get movie names and times from Showtimes' website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://www.showtimes.com/movie-theaters/{}'

    D_THEATERS = {
        'regal fenway': lambda *args: 'regal-fenway-stadium-13-rpx-6269',
        'ua court st': lambda *args: 'ua-court-street-stadium-12-rpx-6608'
    }

    try:
        soup = soup_me(
            BASE_URL.format(
                D_THEATERS.get(theater.lower(), get_theaterpg_showtimes)(
                    theater)))  # fallback for unlisted theater
        # (phrased as functions, so theaterpg scraper won't run until necessary)

        movies = soup('li', class_='movie-info-box')

    except (Exception) as e:
        print(error_str.format(e))  # error msg only
        movies = []  # no matching theater

    movie_names = [
        ''.join((re.sub('[\r\n].*', '', name.text.strip())
                 for name in m('h2', class_='media-heading'))) for m in movies
    ]

    nested_buttons = [  # [[day, time, time, day, time], ..] -> [[[day, time, time], [day, time]], ..]
        list(
            split_before((button.text
                          for button in m('button', type='button')),
                         lambda txt: ',' in txt)) for m in movies
    ]

    movie_datetimes = [
        flatten(
            [['{} @ {}'.format(day.replace(':', ''), time) for time in times]
             for day, *times in buttons
             if (convert_date(day.replace(':', '')) == date)])
        for buttons in nested_buttons
    ]

    movie_times = filter_past(movie_datetimes)
    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times

Example #12

0

Show file

async def validator():
    cache_data = validator_TTCache.get(VALIDATOR_CACHE_KEY)
    if cache_data:
        resp: ValidatorsResponse = cache_data
    else:
        async with lock:
            cache_data = validator_TTCache.get(VALIDATOR_CACHE_KEY)
            if cache_data:
                return cache_data
            else:
                latest_block_number_tasks = []
                for validator in setting.validator_list:
                    latest_block_number_tasks.append(get_latest_block(validator))
                latest_infos = await asyncio.gather(*latest_block_number_tasks, return_exceptions=True)
                latest_infos_no_exception = list(filter(lambda x: x.block_number != NO_LATEST_BLOCK, latest_infos))
                latest_num_dict: Dict[str, LatestInfo] = {i.validator.host: i for i in latest_infos}
                # get latest blocks from all the validators failed then randomly return the `nextToPropose`
                if len(latest_infos_no_exception) == 0:
                    best = random.choice(setting.validator_list)
                    max_block_numbers = NO_LATEST_BLOCK
                else:
                    max_block_numbers = max([i.block_number for i in latest_infos_no_exception])
                    latest = first_true(latest_infos_no_exception, lambda x: x.block_number == max_block_numbers)
                    index = one(locate(setting.validator_list, lambda x: x.pub_key == latest.sender))

                    # why +2 ?
                    # actually index validator should be the latest proposed validator
                    # but it is possible that at this moment, the next validator is already trying
                    # to propose a new block. So choosing the +2 validator is more reliable
                    best = nth(ncycles(setting.validator_list, 2), index + 2)
                split_validators = list(split_before(setting.validator_list, lambda x: x.host == best.host))
                if len(split_validators) == 1:
                    sorted_validators = one(split_validators)
                else:
                    sorted_validators = last(split_validators) + first(split_validators)

                validators = list(map(lambda x: Validator(host=x.host, grpc_port=x.grpc_port, http_port=x.http_port,
                                                          latestBlockNumber=latest_num_dict.get(x.host).block_number,
                                                          timestamp=latest_num_dict.get(x.host).timestamp),
                                      sorted_validators))

                nextToPropose = NextToPropose(host=best.host, grpcPort=best.grpc_port, httpPort=best.http_port,
                                              latestBlockNumber=max_block_numbers)
                resp = ValidatorsResponse(nextToPropose=nextToPropose, validators=validators)
                validator_TTCache[VALIDATOR_CACHE_KEY] = resp
    return resp.dict()

Example #13

0

Show file

File: _format.py Project: lpratalimaffei/autoio

def headlined_sections(string, headline_pattern):
    """ Returns sections with headlines matching a pattern.

        :param string: string to return sections
        :type string: str
        :param headline_pattern: pattern to demarcate strings into sections
        :type headline_pattern: str
        :rtype: str
    """

    lines = string.splitlines()
    join_lines = '\n'.join
    pattern_matcher = apf.matcher(headline_pattern)
    lines = mit.lstrip(lines, pred=lambda line: not pattern_matcher(line))
    sections = list(map(join_lines, mit.split_before(lines, pattern_matcher)))

    return sections

Example #14

0

Show file

File: common_bpe.py Project: ygywdr/bpe

    def generate_joints_parts_idxs(self,
                                   num_channels,
                                   invis_aug=False,
                                   entire_body=False):

        len_joints = BodyPart(*(np.asarray(self._nr_joints_entire_body) * num_channels)) if entire_body \
            else BodyPart(*(np.asarray(self._nr_joints) * num_channels))
        if invis_aug:
            len_joints = BodyPart(
                *(list(len_joints[:-1]) +
                  [len_joints[-1] - 1]))  # remove visibility on velocity

        # BodyPartWithVelocity idxs for coordinates + (opt. visibility)
        body_parts = BodyPart(*more_itertools.split_before(
            range(sum(len_joints)),
            lambda i: i in list(itertools.accumulate(len_joints))))

        return len_joints, body_parts

Example #15

0

Show file

File: main.py Project: jserrai/PAC1_WebScraping

    writer.writerow([pais])
    writer.writerow(rows_title_list[1:])

    # XPath de les dades de les taules:'//*[@id='iid')]/tbody//tr//td'
    xpath = '//*[@id="' + iid + '"]/tbody//tr//td'
    rows_data = driver.find_elements_by_xpath(
        xpath)  # Obtenim les dades de les taules

    rows_data_list = [
    ]  # Inicialitzem la llista on aniran les dades de les taules
    # Afegim el text (les dades) a la llista:
    for rd in range(len(rows_data)):
        rows_data_list.append(rows_data[rd].text)

    by_rows_data = list(
        split_before(rows_data_list,
                     lambda x: x == ' '))  # Tallem la llista per files (rows)

    # Escribim les rows al document csv:
    for row in by_rows_data:
        if len(row) != 1:
            writer.writerow(row[1:len(row)])

    #Afegim una fila en blanc per a separar les taules:
    writer.writerow(' ')
    c += 1

# Obtenim el moment en el que hem extret les dades i l'afegim al final del dataset:
data = datetime.now()
ara = [data.strftime("%d/%m/%Y %H:%M:%S")]
writer.writerow(ara)

Example #16

0

Show file

File: utils.py Project: plammens/loveletter

def camel_to_phrase(name: str) -> str:
    """Convert camel/Pascal-case into a phrase with space-separated lowercase words."""
    return " ".join("".join(w).lower()
                    for w in mitt.split_before(name, str.isupper))

Example #17

0

Show file

File: makecache.py Project: aehrm/cophi-helpers

def token_ranges(seq):
    for t in more_itertools.split_before(enumerate(seq), lambda x: not x[1].startswith('#')):
        token = ''.join(map(lambda x: x[1], t)).replace('#', '')
        yield (token, t[0][0], t[-1][0])

Example #18

0

Show file

File: pool_games.py Project: Orion721/bookends_stats_club

 def matchups_by_day_time(self):
     matchups_day_time = [
         list(split_before(day, lambda x: ':' in x))
         for day in self.games_times_by_day()
     ]
     return matchups_day_time

Example #19

0

Show file

File: SMMParser.py Project: johndpope/SMMData

def parseTries(tries):
    t = list(
        mit.split_before(tries,
                         lambda x: x.attrs['class'][1] == 'typography-slash'))
    return [typographyToInt(t[0]), typographyToInt(t[1])]

Example #20

0

Show file

File: svg_plots.py Project: fakuivan/facu-control1

def parse_svg_path_commands(expr: str, initial = SVGPoint(0, 0)) -> Iterator[SVGPoint]:
    for name, *args in split_before(expr.split(" "), is_path_command):
        for point in parse_path_command(name, *args, initial=initial):
            yield (initial := point)

Example #21

0

Show file

def shchain(s):
    chars = list(SH_BINARY_OPS_EDGEAWARE.keys())
    chars.extend(SH_BINARY_OPS.keys())
    chars.extend(SH_UNARY_SUFFIX_PROPS)
    chars.extend(SH_UNARY_SUFFIX_FUNCS)
    chars.append(">")
    chars.append("Ƨ")
    
    cs = ["".join(x) for x in split_before(s, lambda x: x in chars) if x[0] != ">"]
    out = cs[0]
    spre = re.compile(",|—")
    skip = False

    for c in cs[1:]:
        f = c[0]
        if f == "Ƨ":
            skip = True
            continue
        elif skip:
            skip = False
            continue

        if f in SH_BINARY_OPS:
            fn = SH_BINARY_OPS[f]
            d = None
            if c[1] in ["X", "Y"]:
                d = c[1]
                args = spre.split(c[2:])
            else:
                args = spre.split(c[1:])
            if d:
                fn += "_" + d.lower()
            for i, a in enumerate(args):
                if a == "auto" or a == "a":
                    args[i] = '"auto"'
            out += f".{fn}({','.join(args)})"
        elif f in SH_BINARY_OPS_EDGEAWARE:
            fn = SH_BINARY_OPS_EDGEAWARE[f]
            d = "XY"
            if c[1] in ["X", "Y"]:
                d = c[1]
                args = spre.split(c[2:])
            else:
                args = spre.split(c[1:])
            for i, a in enumerate(args):
                if a[0] == "-":
                    e = "mn"
                elif a[0] == "=":
                    e = "md"
                elif a[0] == "+":
                    e = "mx"
                else:
                    raise Exception("Edge not matched", args[0])
                if d == "XY":
                    args[i] = (a[1:], '"'+e+"xy"[i]+'"')
                else:
                    args[i] = (a[1:], '"'+e+d.lower()+'"')
                out += f".{fn}({','.join(args[i])})"
        elif f in SH_UNARY_SUFFIX_PROPS:
            fn = SH_UNARY_SUFFIX_PROPS[f]
            out += f".{fn}" #+ c[1:]
        elif f in SH_UNARY_SUFFIX_FUNCS:
            fn = SH_UNARY_SUFFIX_FUNCS[f]
            out += f".{fn}()" #+ c[1:]
    
    return out

Example #22

0

Show file

 def compute_parts(annotations):
     return [
         part for part in more_itertools.split_before(
             enumerate(annotations), lambda x: x[1][1][0] == 'B')
         if part[0][1][1][0] == 'B'
     ]

Example #23

0

Show file

def sh(s, ctx:SHContext=None, dps=None, subs={}):
    from drafting.pens.draftingpen import DraftingPen

    #print("SH>", s, subs)

    if ctx is None:
        ctx = SHContext()

    evaled = []
    last_locals = {**ctx.locals}
    s = s.replace("_", "")
    s = "ƒ"+re.sub(r"[\s\n]+", "ƒ", s).strip()

    def expand_multisuffix(m):
        out = []
        arrows = list(m.group(2))
        for a in arrows:
            out.append(m.group(1)+a)
        return "ƒ".join(out)
    
    def do_eval(phrase):
        py = (shgroup(phrase))
        if not py:
            return None

        for k, v in SH_PATH_OPS.items():
            py = py.replace(k, '"' + v + '"')
        
        for k, v in ctx.lookups.items():
            py = py.replace(v.symbol, f"ctx.{k}.")

        for k, v in ctx.subs.items():
            py = py.replace(k, v(ctx) if callable(v) else v)
        
        for k, v in subs.items():
            py = py.replace(k, str(v))

        #print("EVAL<", py)

        try:
            res = eval(py, dict(
                ctx=ctx,
                _last=evaled[-1] if len(evaled) > 0 else None,
                _dps=dps,
                Point=Point,
                Line=Line,
                Rect=Rect,
                DraftingPen=DraftingPen)
                , last_locals)
            #print("LOCALS", last_locals)
            return res
        except SyntaxError as e:
            print("SYNTAX ERROR", e, phrase, py)
            return None

    #s = re.sub(r"([\$\&]{1}[a-z]+)([↖↑↗→↘↓↙←•⍺⍵µ]{2,})", expand_multisuffix, s)

    # for k, v in SH_PATH_OPS.items():
    #     s = s.replace(k, '"' + v + '"')

    join_to_path = False
    splits = ["ƒ"]
    splits.extend(SH_EXPLODES.keys())

    s = re.sub("ƒ\-[^ƒ]+", "", s)

    for phrase in split_before(s, lambda x: x in splits):
        phrase = "".join(phrase).strip()
        #print("PHRASE", phrase)
        last = None
        if not phrase:
            continue
        if phrase[0] in SH_EXPLODES:
            phrase = "_last"+phrase[1:]
        #    last = evaled[-1]
        if phrase[0] == "ƒ":
            phrase = phrase[1:]
        if not phrase:
            continue

        if phrase == "∫":
            phrase = "'∫'"

        more = []
        if "ø" in phrase:
            phrase = phrase.replace("ø", "")
        elif "|" in phrase:
            tuple = phrase.split("|")
            for i, t in enumerate(tuple):
                if isinstance(t, str):
                    if "∑" in t:
                        t = ",".join([f"'{c}'" for c in t])
                    elif len(t) > 1:
                        if t[0] in SH_UNARY_TO_STRING:
                            tuple[i] = [SH_UNARY_TO_STRING[x] for x in t]
                            continue
                    else:
                        if t in SH_UNARY_TO_STRING:
                            tuple[i] = SH_UNARY_TO_STRING[t]
                            continue
                tuple[i] = do_eval(t)
            more = tuple
            phrase = tuple[-1]

        if more:
            evaled.append(more)
        else:
            evaled.append(do_eval(phrase))
        if dps is not None:
            dps.append(evaled[-1])
    
    ctx.locals = {**ctx.locals, **last_locals}
    return evaled