def is_wanted_based_on_metadata(data: Iterable[Optional[str]], allow_re: re.Pattern = None, block_re: re.Pattern = None) -> bool: """Test each RE against each item in data (title, description...)""" if allow_re is None and block_re is None: return True wanted = True blocked = False if allow_re is not None: wanted = False if block_re is not None: blocked = True for item in data: if not item: continue if allow_re and allow_re.search(item): wanted = True if block_re and block_re.search(item): blocked = True if blocked: return False return wanted
def segment(inc_regex: Pattern, ex_regex: Pattern, **kwargs): """ Process any files that need to be separated into segments, consistent with the include and exclude regular expressions. If the keyword argument 'force' is True, then files that already have been segmented are resegmented. """ force = kwargs.get('force', False) for dfname in DigFile.all_dig_files(): # is this name consistent with the patterns? if not inc_regex.search(dfname): continue if ex_regex and ex_regex.search(dfname): continue df = DigFile(join(DigFile.dig_dir(), dfname)) if not df.is_segment: n = df.has_segments if n > 0 and not force: continue # Now attempt to segment fids = Fiducials(df) splits = fids.values if len(splits): fids.split() print( f"Split {df.filename} into {len(splits)} segments using Fiducials" ) continue # If that didn't work, what else do we want to try? dt = kwargs.get('frame_length', 50e-6) # 50 µs splitIntoEvenFrames(df, timeBetweenFrames=dt) print(f"Split {df.filename} into even frames")
def _get_regex_matches_in_scss_files( regex_pattern: re.Pattern, exclude_files: Optional[Iterable[str]] = None ) -> Iterable[Tuple[str, Iterable[Tuple[str, str]]]]: """Return a generator holding all matches of regex_pattern in scss_files (without exclude_files) Returned tuples hold the scss file's path and a list of line and match per match E.g.: ( "git/check_mk/web/htdocs/themes/facelift/_main.scss", [ ("Line 123", "rgb(0, 0, 0)"), ("Line 234", "rgb(255, 255, 255)"), ] ) """ for scss_file in scss_files(): if exclude_files and scss_file.name in exclude_files: continue with open(scss_file) as f: file_matches: List[Tuple[str, str]] = [] for i, l in enumerate(f): if match := regex_pattern.search(l): file_matches.append((f"Line: {str(i)}", match.group())) if file_matches: yield (str(scss_file), file_matches)
def filter_regex(event: NewMessage.Event, pattern: re.Pattern) -> bool: text = event.message.text if text and pattern.search(text): return True if event.message.buttons: for button_row in event.message.buttons: for button in button_row: if button.text and pattern.search(button.text): return True if button.url and pattern.search(button.url): return True return False
def get_components_by_name_with_regex(client: todoist.TodoistAPI, component: str, pattern: re.Pattern): log = logging.getLogger(__name__) # Make sure the caller is asking for something we know how to fetch if component not in ['labels', 'projects', 'items', 'sections']: _e = "Don't know how to query todoist for component:{}".format( component) log.error(_e) raise TDTException(_e) # Ok, sanity confirmed, do the work _matches = [] log.debug("Will look for '{}' matching '{}'...".format(component, pattern)) # The Todoist API has a Mixin for all() for thing in getattr(client, component).all(): # Most things in todoist have a 'name'. Except tasks (called items) which have a 'content' _property = None if component == 'items': _property = 'content' else: _property = 'name' # Now that we have property figured out, do the matches _match = pattern.search(thing[_property]) if _match: _matches.append(thing) log.debug("returning {} _matches".format(len(_matches))) return _matches
def search_in_page(regex: re.Pattern, page: Page) -> List[dict]: """Search for `text` in `page` and extract meta Arguments needle: the text to search for page: page number (1-based index) Returns a list of meta """ result = [] page_meta = page.getTextPage().extractDICT() # we are using get(key, []) to bypass any missing key errors for blk in page_meta.get('blocks', []): for ln in blk.get('lines', []): for spn in ln.get('spans', []): text = spn.get('text', "") # the current search algorithm is very naive and doesn't handle # line breaks and more complex layout. might want to take a # look at `page.searchFor`, but the current algorithm should be # enough for TeX-generated pdf if regex.search(text): result.append(spn) return result
def applyRegex(filename, regex: re.Pattern): infile = open(filename, "r") return [ ":".join(str(elem) for elem in [filename, lineNumber, line]) for lineNumber, line in enumerate(infile.readlines()) if regex.search(line) is not None ]
def process_education(stream: str, edu_pattern: re.Pattern): res = [[], []] stream = stream.strip('||') if edu_pattern.search(stream) == None: l = stream.split('||') if l[0] == 'degree': s = 1 try: e_pop = l.index('Most Popular') except ValueError as e: e_pop = len(l) try: e_pub = l.index('Publications') except ValueError as e: e_pub = len(l) try: e_vmore = l.index('View More') except ValueError as e: e_vmore = len(l) e = min(e_pop, e_pub, e_vmore) # print(s, e) l = l[s + 1:e] for i, string in enumerate(l): if i % 2 == 0: res[0].append(string) else: res[1].append(string) maxlen = max(1, *[len(r) for r in res]) for r in res: while len(r) < maxlen: r.append(None) return res
def _splitByDate(pattern: Pattern, content: str) -> List[str]: ''' Splitting whole *.txt file content using date extraction regex we just built ''' def _getTimeFormatRegex() -> Pattern: ''' Returns regular expression for extracting AM/PM pattern from chat timestamp, where AM/PM could be prefixed with "\s" -> whitespace ''' return reg_compile(r'^(\s?[a|p]m)$', flags=IGNORECASE) _timeFormatRegex = _getTimeFormatRegex() splitted = list(filter(lambda v: not _timeFormatRegex.search(v), filter(lambda v: len(v) != 0, filter(lambda v: v, pattern.split(content))))) index = -1 for k, v in enumerate(splitted): if k != 0 and pattern.search(v): index = k break if index == -1: return splitted return splitted[index:]
def read_data_lines(self, dat_file, regex: re.Pattern, start_flag, end_flag=None, split_data=False) -> list: """Reads line by line without any spaces to search for strings while disregarding formatting""" read_data = False results = [] with open(dat_file, "r") as f: for line in f.readlines(): compact_str = line.replace(" ", "").strip().lower() if start_flag in compact_str: read_data = True if end_flag is not None and end_flag in compact_str: return results if read_data is False: continue res = regex.search(line) if res is not None: result_data = res.group(1) if split_data: result_data = result_data.split() results.append(result_data) return results
def re_replace( items: Iterable[str], regex: re.Pattern, subfunc: Callable[[re.Match], str]) -> Generator[str, None, None]: for item in items: m = regex.search(item) if m is not None: yield subfunc(m)
def _resolve_version(version: str, regex: re.Pattern, value: str) -> str: """Extracts the version from the match, used the matched group indicated by an string with format: "\\1" or "\\1?value_1:value_2" (ternary version) in the \\;version field of the regex """ if not version: return version matches = regex.search(value) if not matches: return version resolved = version matches = [matches.group()] + list(matches.groups()) for index, match in enumerate(matches): ternary = re.search("\\\\{}\\?([^:]+):(.*)$".format(index), version) if ternary: ternary = [ternary.group()] + list(ternary.groups()) if len(ternary) == 3: resolved = version.replace(ternary[0], ternary[1] if match else ternary[2]) resolved = resolved.strip().replace("\\{}".format(index), match or "") return resolved
def get_api_key(base_url: str, regex: re.Pattern) -> str: response = requests.get(base_url) re_return = regex.search(response.text).group() if not re_return: raise ValueError("Could not find the api key you were looking for.") return re_return[7:]
def get_pr_number_from_commit_message(commit_message: str, pattern: re.Pattern) -> int: """ コミットメッセージからPR番号を取得 ※PR番号はコミットメッセージの1行目に含まれることを想定 Parameters ---------- commit_message : str コミットメッセージ pattern: re.Pattern PR番号を表現する正規表現 グループマッチの1つ目を使用する Returns ------- int PR番号 ※取得できない場合には0を返す """ first_row = commit_message.split("\n")[0] m = pattern.search(first_row) if not m: # コミットメッセージの1行目にPR番号が含まれていない場合 return 0 pr_number = int(m.groups()[0]) return pr_number
def _parse_line3d( data: Iterable[str], criterion: re.Pattern ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Parse the provided lines of data to get the start and end coordinates of the lines of sight. This works when the data is in the "line3d" format. Parameters ---------- data An iterable returning lines from the SURF file which should be parsed to get line-of-sight data. criterion A regular expression against which to evaluate the name of each channel. The channel will only be included in result if the regular expression matches the channel name. Retruns ------- Rstart Major radius for the start of the line of sight for each channel. Rend Major radius for the end of the line of sight for each channel. Zstart Vertical position for the start of the line of sight for each channel. Zend Vertical position for the end of the line of sight for each channel. Tstart Toroidal offset of start of the line of sight for each channel. Tend Toroidal offset of the end of the line of sight for each channel. """ rstart = [] rend = [] zstart = [] zend = [] Tstart = [] Tend = [] for line in data: label, rs, Ts, zs, re, Te, ze = _DIVIDER.split(line[:-1]) if criterion.search(label[1:-1]): rstart.append(float(rs)) rend.append(float(re)) zstart.append(float(zs)) zend.append(float(ze)) Tstart.append(float(Ts)) Tend.append(float(Te)) return ( np.array(rstart), np.array(rend), np.array(zstart), np.array(zend), np.array(Tstart), np.array(Tend), )
def get_asl_workload(analysis_directory, parms: dict, workload_translator: dict, incl_regex: re.Pattern, conditions: List[Tuple[str, bool]] = None): path_key = "MyPath" aslmod_dict = {} status_files = [] glob_dictionary = {"ASL": "*ASL*.nii*", "FLAIR": "*FLAIR.nii*", "M0": "*M0.nii*"} # OLD EXPECTATION if is_earlier_version(parms[path_key], threshold_higher=140, higher_eq=False): workload = {"020_RealignASL.status", "030_RegisterASL.status", "040_ResampleASL.status", "050_PreparePV.status", "060_ProcessM0.status", "070_Quantification.status", "080_CreateAnalysisMask.status", "090_VisualQC_ASL.status", "999_ready.status"} # NEW EXPECTATION else: workload = {"020_RealignASL.status", "030_RegisterASL.status", "040_ResampleASL.status", "050_PreparePV.status", "060_ProcessM0.status", "070_CreateAnalysisMask.status", "080_Quantification.status", "090_VisualQC_ASL.status", "999_ready.status"} # The "060_ProcessM0.status" file is never generated if an integer or float is the value for the M0 parameter if isinstance(parms["M0"], (int, float)): workload.remove("060_ProcessM0.status") # conditions is a list of tuples whose first element is a workload filename that may be impacted and whose # second element is a boolean that defines whether to remove it or not if conditions is None: conditions = [] for condition in conditions: filename, to_remove = condition if to_remove: workload.remove(filename) # Must iterate through both the subject level listing AND the session level (ASL_1, ASL_2, etc.) listing for subject_path in analysis_directory.iterdir(): # Disregard files, standard directories, subjects that fail regex and subjects that are to be excluded if any([subject_path.is_file(), subject_path.name in ["Population", "lock", "Logs"], subject_path.name in parms["dataset"]["exclusion"], not incl_regex.search(subject_path.name)]): continue aslmod_dict[subject_path.name] = {} for run_path in subject_path.iterdir(): if run_path.is_file(): # This is kept separate since many files are expected continue if not is_valid_for_analysis(path=run_path, parms=parms, glob_dict=glob_dictionary): continue # Deduce the lock dir path and make it if it doesn't exist lock_dir: Path = analysis_directory / "lock" / "xASL_module_ASL" / subject_path.name / \ f"xASL_module_ASL_{run_path.name}" if not lock_dir.exists(): lock_dir.mkdir(parents=True) # Filter out any anticipated status files that are already present in the lock dirs filtered_workload = [lock_dir / name for name in workload if not (lock_dir / name).exists()] status_files.extend(filtered_workload) # Calculate the numerical representation of the STATUS files workload num_repr = sum([workload_translator[stat_file.name] for stat_file in filtered_workload]) aslmod_dict[subject_path.name][run_path.name] = num_repr return aslmod_dict, status_files
def _has_diff(lines: Iterable[str], ignore: re.Pattern) -> bool: for line in lines: if ignore.search(line): continue ch0 = line[0] if ch0 == '-' or ch0 == '+': return True assert ch0 == ' ' or ch0 == '@' return False
def regex_search( regex: re.Pattern, test_string: str, group_no: Union[int, str] = 1, should_raise: bool = True, ) -> Optional[str]: match = regex.search(test_string).group(group_no) if not match and should_raise: raise RegexMatchError return match
def is_word_fit_to_pattern(word: str, pattern: re.Pattern) -> bool: """ Проверяет - подходит ли слово в паттерн. :param word: слово :param pattern: паттерн :return: """ # todo: вернуть индексы return bool(pattern.search(word))
def _extract_label_keyword_func(exp_name: str, extract_pattern: re.Pattern) -> str: """ Args: exp_name (str): 輸入的實驗名稱 extract_pattern(re.Pattern) : 擷取標籤的正則表達式 Returns: str: 輸出的histroy plot label """ return extract_pattern.search(exp_name).group()
def _match_single(self, filter : re.Pattern, key : str, value : Any) -> bool: if value is None: return False if callable(getattr(value, 'items', None)): for sub_key, sub_value in value.items(): if self._match_single(filter, sub_key, sub_value): return True if callable(getattr(value, 'items', None)): for sub_key, sub_value in value.items(): if self._match_single(filter, sub_key, sub_value): return True return False if key == 'primary': return value and filter.search(str(key)) return filter.search(str(value)) is not None
def replace_with_func_single(self, search_value: re.Pattern, func: Callable[[], str], replace_replaced_words: bool = False): replace_value = func() if not replace_replaced_words and self.search_value_contains_replaced_words(search_value, replace_value): return self replacing_word = self.word if search_value.search(self.word) is not None: match = search_value.search(self.word).group() replacing_word = self.word.replace(match, replace_value) collection = search_value.findall(self.word) replaced_words: List[str] if len(collection) > 1: replaced_words = list(map(lambda s: s.replace(s, replace_value), collection)) else: replaced_words = [] if replacing_word != self.word: for word in replaced_words: self.replaced_words.add(word) self.word = replacing_word return self
def _list_files_recursive(folder:str, pattern:re.Pattern) -> List[str]: """ Lists files whose name match `pattern` in a folder and its subfolders """ filepaths = [] for root, dirs, files in os.walk(folder): for f in files: # search pattern inside of file name if pattern.search(Path(f).name): fullpath = os.path.join(root, f) filepaths.append(fullpath) return filepaths
def match(self, pattern: re.Pattern) -> list: '''Find pattern in pusher events. :param re.pattern pattern: regexp. :rtype: list ''' for event in self.events: match = pattern.search(event) if match: return match.groups() return []
def safe_append_text(content, text, pattern: re.Pattern): """ Append a string to a wikitext string, but before any category @param content: @param text: @param pattern: @return: """ content = str(content) search = pattern.search(content) index = search.start() if search else len(content) return content[:index] + text + content[index:]
def findall(pattern: re.Pattern, text: str) -> Iterable[Sequence[Any]]: """ In the style of regex.findall(text), but allowing for overlapping groups. """ reported = set() pos = 0 while pos < len(text): match = pattern.search(text, pos) if match: if match.start() not in reported: yield match.groups() reported.add(match.start()) pos += 1
def update_version(pattern: re.Pattern, v: str, file_path: str): print(f"Replacing {pattern} to {version} in {file_path}") with open(file_path, "r+") as f: file_content = f.read() if not pattern.search(file_content): raise Exception( f"Pattern {pattern!r} doesn't found in {file_path!r} file") new_content = pattern.sub(fr'\g<1>{v}\g<2>', file_content) if file_content == new_content: return f.seek(0) f.truncate() f.write(new_content)
def expect_re(self, reg: re.Pattern): while not reg.search(self.output): c = self.process.stdout.read(1) if c == "": break self.output += c print(c, end="") # Now we're at a prompt; clear the output buffer and return its contents tmp = self.output self.output = "" return tmp
def replace_with_func_multiple(self, search_value: re.Pattern, func: Callable[[str, str], str], replace_replaced_words: bool = False): if search_value.search(self.word) is None: return self word = self.word captures = search_value.search(word) replace_value = func(captures.group(1), captures.group(2)) if not replace_replaced_words and self.search_value_contains_replaced_words(search_value, replace_value): return self replacing_word = self.word.replace(captures.group(0), replace_value) collection = search_value.findall(self.word) collection = list(flatten(collection)) replaced_words: List[str] if len(collection) > 1: replaced_words = list(map(lambda s: s.replace(s, replace_value), collection)) else: replaced_words = [] if replacing_word != self.word: for word in replaced_words: self.replaced_words.add(word) self.word = replacing_word return self
def process( path: Path, locale: str, re_download_link: re.Pattern, re_old_versions: re.Pattern, re_change_log: re.Pattern, change_log: str, ): print(f"Processing {path}") with open(path, "r") as fi: text = fi.read() mt = re_download_link.search(text) if mt is None: print(f"Download link not found in: {path}") return plugin_name = mt.groups()[0] major_version = mt.groups()[1] minor_version = mt.groups()[2] patch_version = mt.groups()[3] download_url = mt.groups()[4] # Add old download link to Old Versions section. old_version = f"{major_version}.{minor_version}.{patch_version}" old_version_link = f"- [{plugin_name} {old_version} - VST 3 (github.com)]({download_url})" text = re_old_versions.sub( lambda exp: f"{exp.group()}\n{old_version_link}", text, count=1) # Update download link. new_version = f"{major_version}.{minor_version}.{int(patch_version) + 1}" new_downlaod_url = f"https://github.com/ryukau/VSTPlugins/releases/download/{release_name}/{plugin_name}{new_version}.zip" new_link = compose_download_link(locale, plugin_name, new_version, new_downlaod_url) if new_link is None: return text = re_download_link.sub(new_link, text, count=1) # Add change log. text = re_change_log.sub( lambda exp: f"{exp.group()}\n- {new_version}{change_log}", text, count=1) out_dir = Path("out") / Path(path.parts[-2]) out_dir.mkdir(parents=True, exist_ok=True) with open(out_dir / Path(path.name), "w") as fi: fi.write(text)