Python Pattern.searchの例、re.Pattern.search Pythonの例

コード例 #1

0

ファイルを表示

def is_wanted_based_on_metadata(data: Iterable[Optional[str]],
                                allow_re: re.Pattern = None,
                                block_re: re.Pattern = None) -> bool:
    """Test each RE against each item in data (title, description...)"""
    if allow_re is None and block_re is None:
        return True
    wanted = True
    blocked = False

    if allow_re is not None:
        wanted = False
    if block_re is not None:
        blocked = True

    for item in data:
        if not item:
            continue
        if allow_re and allow_re.search(item):
            wanted = True
        if block_re and block_re.search(item):
            blocked = True

    if blocked:
        return False
    return wanted

コード例 #2

0

ファイルを表示

def segment(inc_regex: Pattern, ex_regex: Pattern, **kwargs):
    """
    Process any files that need to be separated into
    segments, consistent with the include and exclude
    regular expressions. If the keyword argument
    'force' is True, then files that already have
    been segmented are resegmented.
    """
    force = kwargs.get('force', False)
    for dfname in DigFile.all_dig_files():
        # is this name consistent with the patterns?
        if not inc_regex.search(dfname):
            continue
        if ex_regex and ex_regex.search(dfname):
            continue
        df = DigFile(join(DigFile.dig_dir(), dfname))
        if not df.is_segment:
            n = df.has_segments
            if n > 0 and not force:
                continue
            # Now attempt to segment
            fids = Fiducials(df)
            splits = fids.values
            if len(splits):
                fids.split()
                print(
                    f"Split {df.filename} into {len(splits)} segments using Fiducials"
                )
                continue
            # If that didn't work, what else do we want to try?
            dt = kwargs.get('frame_length', 50e-6)  # 50 µs
            splitIntoEvenFrames(df, timeBetweenFrames=dt)
            print(f"Split {df.filename} into even frames")

コード例 #3

0

ファイルを表示

def _get_regex_matches_in_scss_files(
    regex_pattern: re.Pattern,
    exclude_files: Optional[Iterable[str]] = None
) -> Iterable[Tuple[str, Iterable[Tuple[str, str]]]]:
    """Return a generator holding all matches of regex_pattern in scss_files (without exclude_files)
    Returned tuples hold the scss file's path and a list of line and match per match
    E.g.: (
            "git/check_mk/web/htdocs/themes/facelift/_main.scss",
            [
                ("Line 123", "rgb(0, 0, 0)"),
                ("Line 234", "rgb(255, 255, 255)"),
            ]
          )
    """
    for scss_file in scss_files():
        if exclude_files and scss_file.name in exclude_files:
            continue

        with open(scss_file) as f:
            file_matches: List[Tuple[str, str]] = []
            for i, l in enumerate(f):
                if match := regex_pattern.search(l):
                    file_matches.append((f"Line: {str(i)}", match.group()))

            if file_matches:
                yield (str(scss_file), file_matches)

コード例 #4

0

ファイルを表示

def filter_regex(event: NewMessage.Event, pattern: re.Pattern) -> bool:
    text = event.message.text
    if text and pattern.search(text):
        return True
    if event.message.buttons:
        for button_row in event.message.buttons:
            for button in button_row:
                if button.text and pattern.search(button.text):
                    return True
                if button.url and pattern.search(button.url):
                    return True
    return False

コード例 #5

0

ファイルを表示

def get_components_by_name_with_regex(client: todoist.TodoistAPI,
                                      component: str, pattern: re.Pattern):
    log = logging.getLogger(__name__)

    # Make sure the caller is asking for something we know how to  fetch
    if component not in ['labels', 'projects', 'items', 'sections']:
        _e = "Don't know how to query todoist for component:{}".format(
            component)
        log.error(_e)
        raise TDTException(_e)

    # Ok, sanity confirmed, do the work
    _matches = []

    log.debug("Will look for '{}' matching '{}'...".format(component, pattern))
    # The Todoist API has a Mixin for all()
    for thing in getattr(client, component).all():
        # Most things in todoist have a 'name'. Except tasks (called items) which have a 'content'
        _property = None
        if component == 'items':
            _property = 'content'
        else:
            _property = 'name'

        # Now that we have property figured out, do the matches
        _match = pattern.search(thing[_property])
        if _match:
            _matches.append(thing)
    log.debug("returning {} _matches".format(len(_matches)))
    return _matches

コード例 #6

0

ファイルを表示

ファイル: pdfxmeta.py プロジェクト: Krasjet/pdf.tocgen

def search_in_page(regex: re.Pattern, page: Page) -> List[dict]:
    """Search for `text` in `page` and extract meta

    Arguments
      needle: the text to search for
      page: page number (1-based index)
    Returns
      a list of meta
    """
    result = []

    page_meta = page.getTextPage().extractDICT()

    # we are using get(key, []) to bypass any missing key errors
    for blk in page_meta.get('blocks', []):
        for ln in blk.get('lines', []):
            for spn in ln.get('spans', []):
                text = spn.get('text', "")
                # the current search algorithm is very naive and doesn't handle
                # line breaks and more complex layout. might want to take a
                # look at `page.searchFor`, but the current algorithm should be
                # enough for TeX-generated pdf
                if regex.search(text):
                    result.append(spn)
    return result

コード例 #7

0

ファイルを表示

def applyRegex(filename, regex: re.Pattern):
    infile = open(filename, "r")
    return [
        ":".join(str(elem) for elem in [filename, lineNumber, line])
        for lineNumber, line in enumerate(infile.readlines())
        if regex.search(line) is not None
    ]

コード例 #8

0

ファイルを表示

ファイル: cleanfucs.py プロジェクト: zhen8838/business_analytics

def process_education(stream: str, edu_pattern: re.Pattern):
    res = [[], []]
    stream = stream.strip('||')
    if edu_pattern.search(stream) == None:
        l = stream.split('||')
        if l[0] == 'degree':
            s = 1
            try:
                e_pop = l.index('Most Popular')
            except ValueError as e:
                e_pop = len(l)
            try:
                e_pub = l.index('Publications')
            except ValueError as e:
                e_pub = len(l)
            try:
                e_vmore = l.index('View More')
            except ValueError as e:
                e_vmore = len(l)
            e = min(e_pop, e_pub, e_vmore)
            # print(s, e)
            l = l[s + 1:e]
            for i, string in enumerate(l):
                if i % 2 == 0:
                    res[0].append(string)
                else:
                    res[1].append(string)
    maxlen = max(1, *[len(r) for r in res])
    for r in res:
        while len(r) < maxlen:
            r.append(None)
    return res

コード例 #9

0

ファイルを表示

ファイル: chat.py プロジェクト: itzmeanjan/chanalyze

        def _splitByDate(pattern: Pattern, content: str) -> List[str]:
            '''
                Splitting whole *.txt file content using
                date extraction regex we just built
            '''

            def _getTimeFormatRegex() -> Pattern:
                '''
                    Returns regular expression for extracting AM/PM pattern
                    from chat timestamp, where AM/PM could be prefixed with "\s" -> whitespace
                '''
                return reg_compile(r'^(\s?[a|p]m)$', flags=IGNORECASE)

            _timeFormatRegex = _getTimeFormatRegex()

            splitted = list(filter(lambda v: not _timeFormatRegex.search(v),
                                   filter(lambda v: len(v) != 0,
                                          filter(lambda v: v, pattern.split(content)))))

            index = -1
            for k, v in enumerate(splitted):
                if k != 0 and pattern.search(v):
                    index = k
                    break

            if index == -1:
                return splitted

            return splitted[index:]

コード例 #10

0

ファイルを表示

ファイル: utils.py プロジェクト: Krande/adapy

    def read_data_lines(self,
                        dat_file,
                        regex: re.Pattern,
                        start_flag,
                        end_flag=None,
                        split_data=False) -> list:
        """Reads line by line without any spaces to search for strings while disregarding formatting"""
        read_data = False
        results = []
        with open(dat_file, "r") as f:
            for line in f.readlines():
                compact_str = line.replace(" ", "").strip().lower()
                if start_flag in compact_str:
                    read_data = True
                if end_flag is not None and end_flag in compact_str:
                    return results
                if read_data is False:
                    continue
                res = regex.search(line)
                if res is not None:
                    result_data = res.group(1)
                    if split_data:
                        result_data = result_data.split()
                    results.append(result_data)

        return results

コード例 #11

0

ファイルを表示

def re_replace(
        items: Iterable[str], regex: re.Pattern,
        subfunc: Callable[[re.Match], str]) -> Generator[str, None, None]:
    for item in items:
        m = regex.search(item)
        if m is not None:
            yield subfunc(m)

コード例 #12

0

ファイルを表示

def _resolve_version(version: str, regex: re.Pattern, value: str) -> str:
    """Extracts the version from the match, used the matched group
    indicated by an string with format: "\\1" or
    "\\1?value_1:value_2" (ternary version) in the \\;version field
    of the regex
    """
    if not version:
        return version

    matches = regex.search(value)

    if not matches:
        return version

    resolved = version
    matches = [matches.group()] + list(matches.groups())
    for index, match in enumerate(matches):
        ternary = re.search("\\\\{}\\?([^:]+):(.*)$".format(index), version)

        if ternary:
            ternary = [ternary.group()] + list(ternary.groups())

            if len(ternary) == 3:
                resolved = version.replace(ternary[0],
                                           ternary[1] if match else ternary[2])

        resolved = resolved.strip().replace("\\{}".format(index), match or "")

    return resolved

コード例 #13

0

ファイルを表示

def get_api_key(base_url: str, regex: re.Pattern) -> str:
    response = requests.get(base_url)
    re_return = regex.search(response.text).group()
    if not re_return:
        raise ValueError("Could not find the api key you were looking for.")

    return re_return[7:]

コード例 #14

0

ファイルを表示

def get_pr_number_from_commit_message(commit_message: str,
                                      pattern: re.Pattern) -> int:
    """
    コミットメッセージからPR番号を取得

    ※PR番号はコミットメッセージの1行目に含まれることを想定

    Parameters
    ----------
    commit_message : str
        コミットメッセージ
    pattern: re.Pattern
        PR番号を表現する正規表現
        グループマッチの1つ目を使用する

    Returns
    -------
    int
        PR番号
        ※取得できない場合には0を返す
    """
    first_row = commit_message.split("\n")[0]
    m = pattern.search(first_row)
    if not m:
        # コミットメッセージの1行目にPR番号が含まれていない場合
        return 0
    pr_number = int(m.groups()[0])
    return pr_number

コード例 #15

0

ファイルを表示

def _parse_line3d(
    data: Iterable[str], criterion: re.Pattern
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray,
           np.ndarray]:
    """Parse the provided lines of data to get the start and end
    coordinates of the lines of sight. This works when the data is in the
    "line3d" format.

    Parameters
    ----------
    data
        An iterable returning lines from the SURF file which should be parsed
        to get line-of-sight data.
    criterion
        A regular expression against which to evaluate the name of each
        channel. The channel will only be included in result if the regular
        expression matches the channel name.

    Retruns
    -------
    Rstart
        Major radius for the start of the line of sight for each channel.
    Rend
        Major radius for the end of the line of sight for each channel.
    Zstart
        Vertical position for the start of the line of sight for each channel.
    Zend
        Vertical position for the end of the line of sight for each channel.
    Tstart
        Toroidal offset of start of the line of sight for each channel.
    Tend
        Toroidal offset of the end of the line of sight for each channel.

    """
    rstart = []
    rend = []
    zstart = []
    zend = []
    Tstart = []
    Tend = []
    for line in data:
        label, rs, Ts, zs, re, Te, ze = _DIVIDER.split(line[:-1])
        if criterion.search(label[1:-1]):
            rstart.append(float(rs))
            rend.append(float(re))
            zstart.append(float(zs))
            zend.append(float(ze))
            Tstart.append(float(Ts))
            Tend.append(float(Te))
    return (
        np.array(rstart),
        np.array(rend),
        np.array(zstart),
        np.array(zend),
        np.array(Tstart),
        np.array(Tend),
    )

コード例 #16

0

ファイルを表示

    def get_asl_workload(analysis_directory, parms: dict, workload_translator: dict, incl_regex: re.Pattern,
                         conditions: List[Tuple[str, bool]] = None):
        path_key = "MyPath"
        aslmod_dict = {}
        status_files = []
        glob_dictionary = {"ASL": "*ASL*.nii*", "FLAIR": "*FLAIR.nii*", "M0": "*M0.nii*"}
        # OLD EXPECTATION
        if is_earlier_version(parms[path_key], threshold_higher=140, higher_eq=False):
            workload = {"020_RealignASL.status", "030_RegisterASL.status", "040_ResampleASL.status",
                        "050_PreparePV.status", "060_ProcessM0.status", "070_Quantification.status",
                        "080_CreateAnalysisMask.status", "090_VisualQC_ASL.status", "999_ready.status"}
        # NEW EXPECTATION
        else:
            workload = {"020_RealignASL.status", "030_RegisterASL.status", "040_ResampleASL.status",
                        "050_PreparePV.status", "060_ProcessM0.status", "070_CreateAnalysisMask.status",
                        "080_Quantification.status", "090_VisualQC_ASL.status", "999_ready.status"}

        # The "060_ProcessM0.status" file is never generated if an integer or float is the value for the M0 parameter
        if isinstance(parms["M0"], (int, float)):
            workload.remove("060_ProcessM0.status")

        # conditions is a list of tuples whose first element is a workload filename that may be impacted and whose
        # second element is a boolean that defines whether to remove it or not
        if conditions is None:
            conditions = []
        for condition in conditions:
            filename, to_remove = condition
            if to_remove:
                workload.remove(filename)

        # Must iterate through both the subject level listing AND the session level (ASL_1, ASL_2, etc.) listing
        for subject_path in analysis_directory.iterdir():
            # Disregard files, standard directories, subjects that fail regex and subjects that are to be excluded
            if any([subject_path.is_file(), subject_path.name in ["Population", "lock", "Logs"],
                    subject_path.name in parms["dataset"]["exclusion"], not incl_regex.search(subject_path.name)]):
                continue
            aslmod_dict[subject_path.name] = {}
            for run_path in subject_path.iterdir():
                if run_path.is_file():  # This is kept separate since many files are expected
                    continue
                if not is_valid_for_analysis(path=run_path, parms=parms, glob_dict=glob_dictionary):
                    continue

                # Deduce the lock dir path and make it if it doesn't exist
                lock_dir: Path = analysis_directory / "lock" / "xASL_module_ASL" / subject_path.name / \
                                 f"xASL_module_ASL_{run_path.name}"
                if not lock_dir.exists():
                    lock_dir.mkdir(parents=True)

                # Filter out any anticipated status files that are already present in the lock dirs
                filtered_workload = [lock_dir / name for name in workload if not (lock_dir / name).exists()]
                status_files.extend(filtered_workload)
                # Calculate the numerical representation of the STATUS files workload
                num_repr = sum([workload_translator[stat_file.name] for stat_file in filtered_workload])
                aslmod_dict[subject_path.name][run_path.name] = num_repr

        return aslmod_dict, status_files

コード例 #17

0

ファイルを表示

ファイル: dump.py プロジェクト: kojiishi/east_asian_spacing

 def _has_diff(lines: Iterable[str], ignore: re.Pattern) -> bool:
     for line in lines:
         if ignore.search(line):
             continue
         ch0 = line[0]
         if ch0 == '-' or ch0 == '+':
             return True
         assert ch0 == ' ' or ch0 == '@'
     return False

コード例 #18

0

ファイルを表示

def regex_search(
    regex: re.Pattern,
    test_string: str,
    group_no: Union[int, str] = 1,
    should_raise: bool = True,
) -> Optional[str]:
    match = regex.search(test_string).group(group_no)
    if not match and should_raise:
        raise RegexMatchError
    return match

コード例 #19

0

ファイルを表示

def is_word_fit_to_pattern(word: str, pattern: re.Pattern) -> bool:
    """
    Проверяет - подходит ли слово в паттерн.
    :param word: слово
    :param pattern: паттерн
    :return:
    """

    # todo: вернуть индексы
    return bool(pattern.search(word))

コード例 #20

0

ファイルを表示

def _extract_label_keyword_func(exp_name: str,
                                extract_pattern: re.Pattern) -> str:
    """
    Args:
        exp_name (str): 輸入的實驗名稱
        extract_pattern(re.Pattern) : 擷取標籤的正則表達式
    Returns:
        str: 輸出的histroy plot label
    """
    return extract_pattern.search(exp_name).group()

コード例 #21

0

ファイルを表示

    def _match_single(self, filter : re.Pattern, key : str, value : Any) -> bool:
        if value is None:
            return False

        if callable(getattr(value, 'items', None)):
            for sub_key, sub_value in value.items():
                if self._match_single(filter, sub_key, sub_value):
                    return True

        if callable(getattr(value, 'items', None)):
            for sub_key, sub_value in value.items():
                if self._match_single(filter, sub_key, sub_value):
                    return True

            return False

        if key == 'primary':
            return value and filter.search(str(key))

        return filter.search(str(value)) is not None

コード例 #22

0

ファイルを表示

ファイル: word.py プロジェクト: deadshot465/owoify-py

    def replace_with_func_single(self, search_value: re.Pattern, func: Callable[[], str], replace_replaced_words: bool = False):
        replace_value = func()
        if not replace_replaced_words and self.search_value_contains_replaced_words(search_value, replace_value):
            return self

        replacing_word = self.word
        if search_value.search(self.word) is not None:
            match = search_value.search(self.word).group()
            replacing_word = self.word.replace(match, replace_value)
        collection = search_value.findall(self.word)
        replaced_words: List[str]
        if len(collection) > 1:
            replaced_words = list(map(lambda s: s.replace(s, replace_value), collection))
        else:
            replaced_words = []
        if replacing_word != self.word:
            for word in replaced_words:
                self.replaced_words.add(word)
            self.word = replacing_word
        return self

コード例 #23

0

ファイルを表示

ファイル: helpers.py プロジェクト: ThibTrip/npdoc_to_md

 def _list_files_recursive(folder:str, pattern:re.Pattern) -> List[str]:
     """
     Lists files whose name match `pattern` in a folder and its subfolders
     """
     filepaths = []
     for root, dirs, files in os.walk(folder):
         for f in files:
             # search pattern inside of file name
             if pattern.search(Path(f).name):
                 fullpath = os.path.join(root, f)
                 filepaths.append(fullpath)
     return filepaths

コード例 #24

0

ファイルを表示

ファイル: pusher.py プロジェクト: vlaghe/hackthebox-api

    def match(self, pattern: re.Pattern) -> list:
        '''Find pattern in pusher events.

        :param re.pattern pattern: regexp.
        :rtype: list
        '''

        for event in self.events:
            match = pattern.search(event)
            if match:
                return match.groups()
        return []

コード例 #25

0

ファイルを表示

ファイル: wiktionary.py プロジェクト: lingua-libre/Lingua-Libre-Bot

def safe_append_text(content, text, pattern: re.Pattern):
    """
    Append a string to a wikitext string, but before any category
    @param content:
    @param text:
    @param pattern:
    @return:
    """
    content = str(content)

    search = pattern.search(content)
    index = search.start() if search else len(content)
    return content[:index] + text + content[index:]

コード例 #26

0

ファイルを表示

ファイル: day07.py プロジェクト: andypymont/adventofcode

def findall(pattern: re.Pattern, text: str) -> Iterable[Sequence[Any]]:
    """
    In the style of regex.findall(text), but allowing for overlapping groups.
    """
    reported = set()
    pos = 0
    while pos < len(text):
        match = pattern.search(text, pos)
        if match:
            if match.start() not in reported:
                yield match.groups()
                reported.add(match.start())
        pos += 1

コード例 #27

0

ファイルを表示

ファイル: pre_commit_update_versions.py プロジェクト: youngyjd/incubator-airflow

def update_version(pattern: re.Pattern, v: str, file_path: str):
    print(f"Replacing {pattern} to {version} in {file_path}")
    with open(file_path, "r+") as f:
        file_content = f.read()
        if not pattern.search(file_content):
            raise Exception(
                f"Pattern {pattern!r} doesn't found in {file_path!r} file")
        new_content = pattern.sub(fr'\g<1>{v}\g<2>', file_content)
        if file_content == new_content:
            return
        f.seek(0)
        f.truncate()
        f.write(new_content)

コード例 #28

0

ファイルを表示

ファイル: CmdInteractif.py プロジェクト: Boumg/BuildRadStudio

    def expect_re(self, reg: re.Pattern):
        while not reg.search(self.output):
            c = self.process.stdout.read(1)
            if c == "":
                break
            self.output += c
            print(c, end="")

        # Now we're at a prompt; clear the output buffer and return its contents
        tmp = self.output
        self.output = ""

        return tmp

コード例 #29

0

ファイルを表示

ファイル: word.py プロジェクト: deadshot465/owoify-py

    def replace_with_func_multiple(self, search_value: re.Pattern, func: Callable[[str, str], str], replace_replaced_words: bool = False):
        if search_value.search(self.word) is None:
            return self
        word = self.word
        captures = search_value.search(word)
        replace_value = func(captures.group(1), captures.group(2))
        if not replace_replaced_words and self.search_value_contains_replaced_words(search_value, replace_value):
            return self
        replacing_word = self.word.replace(captures.group(0), replace_value)
        collection = search_value.findall(self.word)
        collection = list(flatten(collection))
        replaced_words: List[str]
        if len(collection) > 1:
            replaced_words = list(map(lambda s: s.replace(s, replace_value), collection))
        else:
            replaced_words = []

        if replacing_word != self.word:
            for word in replaced_words:
                self.replaced_words.add(word)
            self.word = replacing_word
        return self

コード例 #30

0

ファイルを表示

ファイル: rewrite.py プロジェクト: Noise-Labs/VSTPlugins

def process(
    path: Path,
    locale: str,
    re_download_link: re.Pattern,
    re_old_versions: re.Pattern,
    re_change_log: re.Pattern,
    change_log: str,
):
    print(f"Processing {path}")

    with open(path, "r") as fi:
        text = fi.read()

    mt = re_download_link.search(text)
    if mt is None:
        print(f"Download link not found in: {path}")
        return

    plugin_name = mt.groups()[0]
    major_version = mt.groups()[1]
    minor_version = mt.groups()[2]
    patch_version = mt.groups()[3]
    download_url = mt.groups()[4]

    # Add old download link to Old Versions section.
    old_version = f"{major_version}.{minor_version}.{patch_version}"
    old_version_link = f"- [{plugin_name} {old_version} - VST 3 (github.com)]({download_url})"
    text = re_old_versions.sub(
        lambda exp: f"{exp.group()}\n{old_version_link}", text, count=1)

    # Update download link.
    new_version = f"{major_version}.{minor_version}.{int(patch_version) + 1}"
    new_downlaod_url = f"https://github.com/ryukau/VSTPlugins/releases/download/{release_name}/{plugin_name}{new_version}.zip"

    new_link = compose_download_link(locale, plugin_name, new_version,
                                     new_downlaod_url)
    if new_link is None:
        return

    text = re_download_link.sub(new_link, text, count=1)

    # Add change log.
    text = re_change_log.sub(
        lambda exp: f"{exp.group()}\n- {new_version}{change_log}",
        text,
        count=1)

    out_dir = Path("out") / Path(path.parts[-2])
    out_dir.mkdir(parents=True, exist_ok=True)
    with open(out_dir / Path(path.name), "w") as fi:
        fi.write(text)