Ejemplo n.º 1
0
def segment(inc_regex: Pattern, ex_regex: Pattern, **kwargs):
    """
    Process any files that need to be separated into
    segments, consistent with the include and exclude
    regular expressions. If the keyword argument
    'force' is True, then files that already have
    been segmented are resegmented.
    """
    force = kwargs.get('force', False)
    for dfname in DigFile.all_dig_files():
        # is this name consistent with the patterns?
        if not inc_regex.search(dfname):
            continue
        if ex_regex and ex_regex.search(dfname):
            continue
        df = DigFile(join(DigFile.dig_dir(), dfname))
        if not df.is_segment:
            n = df.has_segments
            if n > 0 and not force:
                continue
            # Now attempt to segment
            fids = Fiducials(df)
            splits = fids.values
            if len(splits):
                fids.split()
                print(
                    f"Split {df.filename} into {len(splits)} segments using Fiducials"
                )
                continue
            # If that didn't work, what else do we want to try?
            dt = kwargs.get('frame_length', 50e-6)  # 50 µs
            splitIntoEvenFrames(df, timeBetweenFrames=dt)
            print(f"Split {df.filename} into even frames")
Ejemplo n.º 2
0
def compare(
    a: Dict[HashableLessThan, FileProperties],
    b: Dict[HashableLessThan, FileProperties],
    hasher: Optional[Hasher],
    left: bool = True,
    right: bool = True,
    both: bool = True,
    ignore: re.Pattern = None,
    file: IO[str] = stdout,
) -> None:

    aset = a.keys()
    bset = b.keys()

    # retleft = None
    # retright = None

    # note: the key is usually the `relpath` or the `hash`

    if left:
        print("In left only", file=file)
        for key in sorted(aset - bset):
            if ignore and ignore.match(fspath(key)):
                continue
            print("lo:", key, a[key].relpath, file=file)

    if right:
        print("In right only", file=file)
        for key in sorted(bset - aset):
            if ignore and ignore.match(fspath(key)):
                continue
            print("ro:", key, b[key].relpath, file=file)

    if both:
        print("On both, but different", file=file)
        for key in sorted(aset & bset):
            if ignore and ignore.match(fspath(key)):
                continue

            aprops = a[key]
            bprops = b[key]

            if aprops.isdir != bprops.isdir:
                print("bo:", "one is dir, one is file", key, file=file)
            if not aprops.isdir:
                if aprops.size != bprops.size:
                    print("bo:", "size different", key, aprops.size, bprops.size, file=file)
                elif aprops.size == 0 and bprops.size == 0:
                    pass
                elif hasher is not None:  # same size
                    if (aprops.hash or aprops.abspath) and (bprops.hash or bprops.abspath):
                        if not aprops.hash:
                            aprops.hash = hasher.get(Path(aprops.abspath))  # type: ignore [arg-type]
                        if not bprops.hash:
                            bprops.hash = hasher.get(Path(bprops.abspath))  # type: ignore [arg-type]
                        if aprops.hash != bprops.hash:
                            print("bo:", "hash different", key, aprops.hash, bprops.hash, file=file)
                        # else: pass # same files
                    else:
                        print("bo:", "no hash or abspath for same size files", key, file=file)
Ejemplo n.º 3
0
        def _splitByDate(pattern: Pattern, content: str) -> List[str]:
            '''
                Splitting whole *.txt file content using
                date extraction regex we just built
            '''

            def _getTimeFormatRegex() -> Pattern:
                '''
                    Returns regular expression for extracting AM/PM pattern
                    from chat timestamp, where AM/PM could be prefixed with "\s" -> whitespace
                '''
                return reg_compile(r'^(\s?[a|p]m)$', flags=IGNORECASE)

            _timeFormatRegex = _getTimeFormatRegex()

            splitted = list(filter(lambda v: not _timeFormatRegex.search(v),
                                   filter(lambda v: len(v) != 0,
                                          filter(lambda v: v, pattern.split(content)))))

            index = -1
            for k, v in enumerate(splitted):
                if k != 0 and pattern.search(v):
                    index = k
                    break

            if index == -1:
                return splitted

            return splitted[index:]
Ejemplo n.º 4
0
def is_wanted_based_on_metadata(data: Iterable[Optional[str]],
                                allow_re: re.Pattern = None,
                                block_re: re.Pattern = None) -> bool:
    """Test each RE against each item in data (title, description...)"""
    if allow_re is None and block_re is None:
        return True
    wanted = True
    blocked = False

    if allow_re is not None:
        wanted = False
    if block_re is not None:
        blocked = True

    for item in data:
        if not item:
            continue
        if allow_re and allow_re.search(item):
            wanted = True
        if block_re and block_re.search(item):
            blocked = True

    if blocked:
        return False
    return wanted
Ejemplo n.º 5
0
def _get_regex_matches_in_scss_files(
    regex_pattern: re.Pattern,
    exclude_files: Optional[Iterable[str]] = None
) -> Iterable[Tuple[str, Iterable[Tuple[str, str]]]]:
    """Return a generator holding all matches of regex_pattern in scss_files (without exclude_files)
    Returned tuples hold the scss file's path and a list of line and match per match
    E.g.: (
            "git/check_mk/web/htdocs/themes/facelift/_main.scss",
            [
                ("Line 123", "rgb(0, 0, 0)"),
                ("Line 234", "rgb(255, 255, 255)"),
            ]
          )
    """
    for scss_file in scss_files():
        if exclude_files and scss_file.name in exclude_files:
            continue

        with open(scss_file) as f:
            file_matches: List[Tuple[str, str]] = []
            for i, l in enumerate(f):
                if match := regex_pattern.search(l):
                    file_matches.append((f"Line: {str(i)}", match.group()))

            if file_matches:
                yield (str(scss_file), file_matches)
Ejemplo n.º 6
0
 def _minify_dir(name: str, regex: re.Pattern = re.compile(r'^(\W*\w)')) -> str:
     """Shorten a string to the first group that matches regex.
     :param name: the single name from the path that is being shrunk
     :param regex: the pattern used to minify the name (using group 0)
     :return: the minified name if possible, else the whole name
     """
     if match := regex.match(name):
         return cast(str, match[0])
Ejemplo n.º 7
0
def display_aggregated_results(
    task_name: str,
    *,
    use_simplified_metric_name: bool = False,
    metrics_names: List[str] = None,
    exclude_regex: Pattern = None,
    include_regex: Pattern = None,
    renames: List[Tuple[str, str]] = None,
    n_steps: int,
):
    df = read_csv(get_aggregate_csv_file(task_name))

    if use_simplified_metric_name:
        df["metric"] = df["metric"].map(lambda s: s.replace(
            "/eval_phase/test_stream", "").replace("/Task000", ""))

    if exclude_regex:
        df = df[df["run_algo"].map(lambda s: exclude_regex.match(s) is None)]
    if include_regex:
        df = df[df["run_algo"].map(
            lambda s: include_regex.match(s) is not None)]

    for algo_name, replacement in renames or []:
        df["run_algo"] = df["run_algo"].map(lambda s: replacement
                                            if s == algo_name else s)

    algo_name2score = dict(df[(df["step"] == n_steps)
                              & (df["metric"] == "Top1_Acc_Stream")].groupby(
                                  "run_algo").mean()["value"].iteritems())
    df["run_algo"] = df["run_algo"].map(
        lambda name: f"{name} ({algo_name2score[name]:.1%})")

    all_metrics_names = sorted(set(df["metric"]),
                               key=_get_metric_name_priority)
    print(all_metrics_names)
    metrics_names = metrics_names or all_metrics_names

    g: FacetGrid = relplot(
        data=df,
        kind="line",
        x="step",
        y="value",
        hue="run_algo",
        col="metric",
        col_order=metrics_names,
        col_wrap=min(3, len(metrics_names)),
        facet_kws={
            "sharex": False,
            "sharey": False,
            "legend_out": False
        },
    )
    fig: Figure = g.fig
    fig.suptitle(task_name, fontsize=16)
    fig.tight_layout()
    fig.show()
Ejemplo n.º 8
0
def filter_regex(event: NewMessage.Event, pattern: re.Pattern) -> bool:
    text = event.message.text
    if text and pattern.search(text):
        return True
    if event.message.buttons:
        for button_row in event.message.buttons:
            for button in button_row:
                if button.text and pattern.search(button.text):
                    return True
                if button.url and pattern.search(button.url):
                    return True
    return False
def update_version(pattern: re.Pattern, v: str, file_path: str):
    print(f"Replacing {pattern} to {version} in {file_path}")
    with open(file_path, "r+") as f:
        file_content = f.read()
        if not pattern.search(file_content):
            raise Exception(
                f"Pattern {pattern!r} doesn't found in {file_path!r} file")
        new_content = pattern.sub(fr'\g<1>{v}\g<2>', file_content)
        if file_content == new_content:
            return
        f.seek(0)
        f.truncate()
        f.write(new_content)
Ejemplo n.º 10
0
def process(
    path: Path,
    locale: str,
    re_download_link: re.Pattern,
    re_old_versions: re.Pattern,
    re_change_log: re.Pattern,
    change_log: str,
):
    print(f"Processing {path}")

    with open(path, "r") as fi:
        text = fi.read()

    mt = re_download_link.search(text)
    if mt is None:
        print(f"Download link not found in: {path}")
        return

    plugin_name = mt.groups()[0]
    major_version = mt.groups()[1]
    minor_version = mt.groups()[2]
    patch_version = mt.groups()[3]
    download_url = mt.groups()[4]

    # Add old download link to Old Versions section.
    old_version = f"{major_version}.{minor_version}.{patch_version}"
    old_version_link = f"- [{plugin_name} {old_version} - VST 3 (github.com)]({download_url})"
    text = re_old_versions.sub(
        lambda exp: f"{exp.group()}\n{old_version_link}", text, count=1)

    # Update download link.
    new_version = f"{major_version}.{minor_version}.{int(patch_version) + 1}"
    new_downlaod_url = f"https://github.com/ryukau/VSTPlugins/releases/download/{release_name}/{plugin_name}{new_version}.zip"

    new_link = compose_download_link(locale, plugin_name, new_version,
                                     new_downlaod_url)
    if new_link is None:
        return

    text = re_download_link.sub(new_link, text, count=1)

    # Add change log.
    text = re_change_log.sub(
        lambda exp: f"{exp.group()}\n- {new_version}{change_log}",
        text,
        count=1)

    out_dir = Path("out") / Path(path.parts[-2])
    out_dir.mkdir(parents=True, exist_ok=True)
    with open(out_dir / Path(path.name), "w") as fi:
        fi.write(text)
Ejemplo n.º 11
0
def get_drug_names_by_suffix(drug_name: str, suffixes: List[str],
                             split_chars: re.Pattern,
                             remove_chars: re.Pattern):
    drug_name_token_list = []
    drug_name = drug_name.lower()
    drug_token = split_chars.split(drug_name)
    drug_token = [remove_chars.sub("", token) for token in drug_token]
    for token in drug_token:
        for suffix in suffixes:
            if token.endswith(suffix):
                drug_name_token_list.append(token)
                break

    return drug_name_token_list
Ejemplo n.º 12
0
 def passes_filter(self, account: re.Pattern, sender: re.Pattern,
                   conversation: re.Pattern, message: re.Pattern,
                   flags: re.Pattern):
     if account.fullmatch(self.account) is None:
         return False
     if sender.fullmatch(self.sender) is None:
         return False
     if conversation.fullmatch(self.conversation) is None:
         return False
     if message.fullmatch(self.message) is None:
         return False
     if flags.fullmatch(self.flags) is None:
         return False
     return True
Ejemplo n.º 13
0
    def check_for_match(self, pattern: re.Pattern) -> bool:
        match = []
        if self.title:
            match += pattern.findall(self.title.lower())

        if self.text:
            match += pattern.findall(self.text.lower())

        if match:
            logger.info('{0} - {1}: Match!!!'.format(self.id,
                                                     self.source_name))
            self.match_words = tools.delete_duplicates(match)
            return True
        else:
            return False
Ejemplo n.º 14
0
def samp(corpus: TextIO, samp_corpora: List[TextIO],
         samp_size: int, fd_removed: TextIO, valid_pwd: Pattern):
    for samp_corpus in samp_corpora:
        if not samp_corpus.writable():
            print("Training and Testing SHOULD be Writable!", file=sys.stderr)
            sys.exit(-1)
    if len(samp_corpora) < 1:
        print("At least one sample file!", file=sys.stderr)
        sys.exit(-1)
    pwd_set = []
    count_invalid = defaultdict(int)
    for line in corpus:
        line = line.strip("\r\n")
        if valid_pwd.match(line) is None:
            count_invalid[line] += 1
            continue
        pwd_set.append(line)
    samp_size = min(len(pwd_set), samp_size)
    for idx, samp_corpus in enumerate(samp_corpora):
        shuffle(pwd_set)
        for line in pwd_set[:samp_size]:
            samp_corpus.write(f"{line}\n")
        samp_corpus.flush()
        print(f"{idx + 1} sample file saved here: {samp_corpus.name}", file=sys.stderr)
        samp_corpus.close()

    if len(count_invalid) != 0 and fd_removed is not None:

        print(f"Removed invalid passwords saved in {fd_removed.name}", file=sys.stderr)
        for p, n in sorted(count_invalid.items(), key=lambda x: x[1], reverse=True):
            fd_removed.write(f"{p}\t{n}\n")
        fd_removed.close()
    print("Done!", file=sys.stderr)
Ejemplo n.º 15
0
def process_logs(query: re.Pattern, args: argparse.Namespace) -> None:
    with open(args.input_file_name, 'r') as f:
        with open(args.output_file_name, 'a') as o:
            for line in f:

                match = query.match(line)

                if match is None:
                    continue

                named_matches = match.groupdict()
                # these if statements could probably be put into a method and refactored to be better
                # but for now this is okay

                if named_matches['start'] != '-' and args.start is not None and \
                        int(named_matches['start']) < args.start:
                    continue

                if named_matches['start'] != '-' and args.end is not None and \
                        int(named_matches['start']) > args.end:
                    continue

                if named_matches['bytes'] != '-' and args.bytes is not None and \
                        int(named_matches['bytes']) < args.bytes:
                    continue

                if named_matches['packets'] != '-' and args.packets is not None and \
                        int(named_matches['packets']) < args.packets:
                    continue

                #  print(f"[FOUND] {line.rstrip()}")

                o.write(line)
Ejemplo n.º 16
0
 def _parse_from_keywords(
     self,
     transaction_type: str,
     description: list[str],
     keywords: re.Pattern,
     *,
     bookdate: date,
     value_date: date,
     amount: Decimal,
 ) -> BaseTransaction:
     d = dict[str, str]()
     current_key = 'transaction_type'
     current_value = transaction_type
     for line in description[1:]:
         m = keywords.match(line)
         if m is None:
             current_value += line
         else:
             d[current_key] = current_value.rstrip()
             current_key = m.group(1)
             current_value = line[m.end():]
     d[current_key] = current_value.rstrip()
     omschrijving = d.get('Omschrijving')
     if omschrijving is None:
         omschrijving = d['Kenmerk']
     return Transaction(account=self.account,
                        description=omschrijving,
                        operation_date=bookdate,
                        value_date=value_date,
                        amount=amount,
                        currency=self.currency,
                        metadata=d)
Ejemplo n.º 17
0
    def read_data_lines(self,
                        dat_file,
                        regex: re.Pattern,
                        start_flag,
                        end_flag=None,
                        split_data=False) -> list:
        """Reads line by line without any spaces to search for strings while disregarding formatting"""
        read_data = False
        results = []
        with open(dat_file, "r") as f:
            for line in f.readlines():
                compact_str = line.replace(" ", "").strip().lower()
                if start_flag in compact_str:
                    read_data = True
                if end_flag is not None and end_flag in compact_str:
                    return results
                if read_data is False:
                    continue
                res = regex.search(line)
                if res is not None:
                    result_data = res.group(1)
                    if split_data:
                        result_data = result_data.split()
                    results.append(result_data)

        return results
Ejemplo n.º 18
0
def search_in_page(regex: re.Pattern, page: Page) -> List[dict]:
    """Search for `text` in `page` and extract meta

    Arguments
      needle: the text to search for
      page: page number (1-based index)
    Returns
      a list of meta
    """
    result = []

    page_meta = page.getTextPage().extractDICT()

    # we are using get(key, []) to bypass any missing key errors
    for blk in page_meta.get('blocks', []):
        for ln in blk.get('lines', []):
            for spn in ln.get('spans', []):
                text = spn.get('text', "")
                # the current search algorithm is very naive and doesn't handle
                # line breaks and more complex layout. might want to take a
                # look at `page.searchFor`, but the current algorithm should be
                # enough for TeX-generated pdf
                if regex.search(text):
                    result.append(spn)
    return result
Ejemplo n.º 19
0
def _finditer_with_line_numbers(
        pattern: re.Pattern,
        string: str) -> ty.Iterator[ty.Tuple[re.Match, int]]:
    """
    A version of 're.finditer' that returns '(match, line_number)' pairs.
    """

    matches = list(pattern.finditer(string))
    if not matches:
        return []

    end = matches[-1].start()
    # -1 so a failed 'rfind' maps to the first line.
    newline_table = {-1: 0}
    for i, m in enumerate(re.finditer(r"\n", string), 1):
        # don't find newlines past our last match
        offset = m.start()
        if offset > end:
            break
        newline_table[offset] = i

    # Failing to find the newline is OK, -1 maps to 0.
    for m in matches:
        newline_offset = string.rfind("\n", 0, m.start())
        line_number = (newline_table[newline_offset] + 1
                       )  # + 1 since line numbers doesnt start at 0
        yield m, line_number
Ejemplo n.º 20
0
def re_replace(
        items: Iterable[str], regex: re.Pattern,
        subfunc: Callable[[re.Match], str]) -> Generator[str, None, None]:
    for item in items:
        m = regex.search(item)
        if m is not None:
            yield subfunc(m)
Ejemplo n.º 21
0
def valid_fc(argument: str, *, _fc: re.Pattern = _friend_code) -> str:
    fc = argument.upper().strip('"')
    m = _fc.match(fc)
    if m is None:
        raise commands.BadArgument("Not a valid friend code!")

    return "{one}-{two}-{three}".format(**m.groupdict())
Ejemplo n.º 22
0
def check_pattern(arg_value: str, pattern: re.Pattern):
    if not pattern.match(arg_value):
        raise argparse.ArgumentTypeError(
            f"Invalid value provided! Must match regex pattern: {pattern.pattern}"
        )
    else:
        return arg_value
Ejemplo n.º 23
0
def password_match(line: str,
                   password_policy_and_password_regex: re.Pattern) -> bool:
    match = password_policy_and_password_regex.match(line)

    if match == None:
        return False

    min_letter_count = int(match.group(1))
    max_letter_count = int(match.group(2))
    letter = match.group(3)
    password = match.group(4)
    occurences_count = password.count(letter)
    result = min_letter_count <= occurences_count <= max_letter_count

    print(
        "{password:<30} {occurences_count:>3}{letter} {belongs} [{min_letter_count}, {max_letter_count}] {result}"
        .format(
            password=password,
            occurences_count=occurences_count,
            letter=letter,
            belongs=("∈" if result else "∉"),
            min_letter_count=min_letter_count,
            max_letter_count=max_letter_count,
            result=('\033[92mOK\033[0m' if result else '\033[91mKO\033[0m')))
    return result
Ejemplo n.º 24
0
def get_pr_number_from_commit_message(commit_message: str,
                                      pattern: re.Pattern) -> int:
    """
    コミットメッセージからPR番号を取得

    ※PR番号はコミットメッセージの1行目に含まれることを想定

    Parameters
    ----------
    commit_message : str
        コミットメッセージ
    pattern: re.Pattern
        PR番号を表現する正規表現
        グループマッチの1つ目を使用する

    Returns
    -------
    int
        PR番号
        ※取得できない場合には0を返す
    """
    first_row = commit_message.split("\n")[0]
    m = pattern.search(first_row)
    if not m:
        # コミットメッセージの1行目にPR番号が含まれていない場合
        return 0
    pr_number = int(m.groups()[0])
    return pr_number
 def match(self, regexp: Pattern) -> Optional[ParseResult[str]]:
     match = regexp.match(self.string, self.index)
     if match:
         value = match.group(0)
         source = Source(self.string, self.index + len(value))
         return ParseResult(value, source)
     return None
Ejemplo n.º 26
0
def applyRegex(filename, regex: re.Pattern):
    infile = open(filename, "r")
    return [
        ":".join(str(elem) for elem in [filename, lineNumber, line])
        for lineNumber, line in enumerate(infile.readlines())
        if regex.search(line) is not None
    ]
Ejemplo n.º 27
0
def _resolve_version(version: str, regex: re.Pattern, value: str) -> str:
    """Extracts the version from the match, used the matched group
    indicated by an string with format: "\\1" or
    "\\1?value_1:value_2" (ternary version) in the \\;version field
    of the regex
    """
    if not version:
        return version

    matches = regex.search(value)

    if not matches:
        return version

    resolved = version
    matches = [matches.group()] + list(matches.groups())
    for index, match in enumerate(matches):
        ternary = re.search("\\\\{}\\?([^:]+):(.*)$".format(index), version)

        if ternary:
            ternary = [ternary.group()] + list(ternary.groups())

            if len(ternary) == 3:
                resolved = version.replace(ternary[0],
                                           ternary[1] if match else ternary[2])

        resolved = resolved.strip().replace("\\{}".format(index), match or "")

    return resolved
Ejemplo n.º 28
0
def _verify_public_instance_jwt(
    cache: 'cg_cache.inter_request.Backend[str]',
    signature: str,
    allowed_hosts: re.Pattern,
) -> str:
    # First get the url from the jwt without verifying, then get the
    # public key and do the verification.
    unsafe_decoded = jwt.decode(signature, verify=False)
    if allowed_hosts.match(unsafe_decoded.get('url', None)) is None:
        raise PermissionException(401)

    try:
        decoded = cache.cached_call(
            key=unsafe_decoded['url'],
            get_value=lambda: _download_public_key(
                unsafe_decoded['url'],
                unsafe_decoded['id'],
            ),
            callback=lambda public_key: jwt.decode(
                signature,
                key=public_key,
                algorithms='RS256',
                verify=True,
            )
        )
        assert decoded == unsafe_decoded
    except BaseException as exc:  # pylint: disable=broad-except
        logger.error('Got unauthorized broker request', exc_info=True)
        raise PermissionException(401) from exc
    else:
        return decoded['url']
Ejemplo n.º 29
0
    def consume(self, size: int = 1, regex: re.Pattern = None, text: str = None, regex_group: int = 0):
        at = self.pos

        if regex:
            if not isinstance(regex, re.Pattern):
                print("uncompiled regex passed to peek!")
                regex = re.compile(regex)
            match = regex.match(self.content[at:])
            if match is None:
                return None

            if regex_group != 0 and not match.group(0).startswith(match.group(regex_group)):
                print("Cannot consume regex group that does not start at match start!")
                return None
            self.pos += len(match.group(regex_group))
            return match.group(regex_group)

        if text:
            if self.content[at:].startswith(text):
                self.pos += len(text)
                return text
            return None

        self.pos += size
        return self.content[at:at + size]
Ejemplo n.º 30
0
def get_api_key(base_url: str, regex: re.Pattern) -> str:
    response = requests.get(base_url)
    re_return = regex.search(response.text).group()
    if not re_return:
        raise ValueError("Could not find the api key you were looking for.")

    return re_return[7:]