def has_vowel(in_string): return bool( regex.fullmatch(".*[ऄ-औ ऺ-ऻ ा-ौ ॎ-ॏ ॲ-ॷ].*".replace(" ", ""), in_string, flags=regex.UNICODE) or regex.fullmatch( ".*[क-हक़-य़ॸ-ॿ](?!्).*", in_string, flags=regex.UNICODE))
def base64_search(text: bytes) -> Dict[bytes, bytes]: """ Find all base64 encoded sections in a text. Args: text: The text to search. Returns: A dictionary with the original base64 encoded sections as keys and the corresponding decoded data as values. """ warnings.warn("base64_search is depricated, use find_base64 instead", DeprecationWarning) b64_matches = {} for b64_match in re.findall(BASE64_RE, text): if b64_match in b64_matches: continue b64_string = re.sub(HTML_ESCAPE_RE, b'', b64_match).replace(b'\n', b'').replace(b'\r', b'') \ .replace(b'<\x00 \x00', b'') if re.fullmatch(HEX_RE, b64_string): # Hexadecimal characters are a subset of base64 # Hashes commonly are hex and have multiple of 4 lengths continue if re.fullmatch(CAMEL_RE, b64_string): # Camel case text can be confused for base64 # It is common in scripts as names continue uniq_char = set(b64_string) if len(uniq_char) > MIN_B64_CHARS and len(b64_string) % 4 == 0: try: b64_result = binascii.a2b_base64(b64_string) b64_matches[b64_match] = b64_result except binascii.Error: pass return b64_matches
def remove_line_numbers(line): one_char_numeric_obj = regex.match(r'^\d ', line) two_char_numeric_obj = regex.match(r'^\d. ', line) three_char_numeric_obj = regex.match(r'^\d\d ', line) four_char_numeric_obj = regex.match(r'^\d\d. ', line) if one_char_numeric_obj is not None: line = line[one_char_numeric_obj.end():] elif two_char_numeric_obj is not None: line = line[two_char_numeric_obj.end():] elif three_char_numeric_obj is not None: line = line[three_char_numeric_obj.end():] elif four_char_numeric_obj is not None: line = line[four_char_numeric_obj.end():] one_char_numeric_obj = regex.fullmatch(r'^\d', line) two_char_numeric_obj = regex.fullmatch(r'^\d.', line) three_char_numeric_obj = regex.fullmatch(r'^\d\d', line) four_char_numeric_obj = regex.match(r'^\d\d.', line) if one_char_numeric_obj is not None: line = line[one_char_numeric_obj.end():] elif two_char_numeric_obj is not None: line = line[two_char_numeric_obj.end():] elif three_char_numeric_obj is not None: line = line[three_char_numeric_obj.end():] elif four_char_numeric_obj is not None: line = line[four_char_numeric_obj.end():] return line
def main() -> None: """ Calculate and output the solutions based on the real puzzle input. """ data = aocd.get_data(year=2020, day=19) rulesdata, messagesdata = data.split("\n\n") rules = read_rules(rulesdata) messages = read_messages(messagesdata) rule = regex_rule(rules["0"], rules) part1 = sum(1 for message in messages if regex.fullmatch(rule, message)) print(f"Part 1: {part1}") rules2 = dict(**rules) rules2.update(**{"8": "42 | 42 8", "11": "42 31 | 42 11 31"}) # >> [(number, content) for number, content in rules2.items() # if '8' in content.split(' ') or '11' in content.split(' ')] # .. [('0', '8 11'), ('8', '42 | 42 8'), ('11', '42 31 | 42 11 31')] # So rules 8 and 11 apear only in themselves and rule 0 # - Rule 8: rule 42 any number of times # - Rule 11: any number of 42s followed by the same number of 31s # Rewrite these and run rule 0 and we're there fortytwo = regex_rule(rules2["42"], rules2) thirtyone = regex_rule(rules2["31"], rules2) eight = "(?:42)+".replace("42", fortytwo) eleven = "((?:4231)|(?:42(?1)31))".replace("42", fortytwo).replace( "31", thirtyone) zero = eight + eleven part2 = sum(1 for message in messages if regex.fullmatch(zero, message)) print(f"Part 2: {part2}")
def find_base64(data: bytes) -> List[Tuple[bytes, int, int]]: """ Find all base64 encoded sections in some data. Args: data: The data to search. Returns: A list of decoded base64 sections and the location indexes of the section in the original data. """ b64_matches = [] for b64_match in re.finditer(BASE64_RE, data): b64_string = re.sub(HTML_ESCAPE_RE, b'', b64_match.group()).replace(b'\n', b'').replace(b'\r', b'') \ .replace(b'<\x00 \x00', b'') if len(b64_string) % 4 != 0 or len(set(b64_string)) <= MIN_B64_CHARS: continue if re.fullmatch(HEX_RE, b64_string): # Hexadecimal characters are a subset of base64 # Hashes commonly are hex and have multiple of 4 lengths continue if re.fullmatch(CAMEL_RE, b64_string): # Camel case text can be confused for base64 # It is common in scripts as names continue if b64_string.count(b'/')/len(b64_string) > 3/32: # If there are a lot of / it as more likely a path continue try: b64_result = binascii.a2b_base64(b64_string) b64_matches.append((b64_result, b64_match.start(), b64_match.end())) except binascii.Error: pass return b64_matches
def checkInput(self, text, textOrEmail): """checks the validity of the input """ if textOrEmail == "email": pattern = regex.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)") return regex.fullmatch(pattern, text) elif textOrEmail == "text": pattern = regex.compile(r"(^[a-zA-Z0-9.-_+]+$)") return regex.fullmatch(pattern, text) return None
def json_msg(string: str) -> MessageChain: result = [] for match in regex.split(r'(\[json_element:.+?\])', string): if element := regex.fullmatch(r'(\[json_element:(.+?)\])', match): try: result.append(json.loads(base64.b64decode(element.group(2)))) except: result.append({'type': 'Plain', 'text': match}) elif match:#去除空字符串 result.append({'type': 'Plain', 'text': match})
def pickle_msg(string: str) -> MessageChain: result = [] for match in regex.split(r'(\[pickle_element:.+?\])', string): if element := regex.fullmatch(r'(\[pickle_element:(.+?)\])', match): try: result.append(pickle.loads(base64.b64decode(element.group(2)))) except: result.append(Plain(match)) elif match:#去除空字符串 result.append(Plain(match))
def test_regex_character_class(characters_exclusions: Tuple[str, str]) -> None: characters, exclusions = characters_exclusions re = regex.compile(f"[{regex_character_class(characters, exclusions)}]*") # A string of all characters which should be matched by the regex. characters_excluded = "".join(filter(lambda c: c not in exclusions, characters)) assert regex.fullmatch(re, characters_excluded) # If there are exclusions, the regex should not match the original characters. if characters_excluded != characters: assert not regex.fullmatch(re, characters)
def getmatchingவிதிகள்(நிலைமொழி, வருமொழி): தொடர்மொழி_விதிகள் =[] நிலைமொழிவிரி = எழுத்து.உயிர்மெய்விரி(நிலைமொழி) வருமொழிவிரி = எழுத்து.உயிர்மெய்விரி(வருமொழி) for விதி in விதிகள்: if regex.fullmatch(விதி.நிலைமொழி_regex, நிலைமொழிவிரி ) and \ regex.fullmatch(விதி.வருமொழி_regex, வருமொழிவிரி ): தொடர்மொழி_விதிகள்.append(விதி) # print(தொடர்மொழி_விதிகள்) return தொடர்மொழி_விதிகள்
def check_end_for_given_chars_or_for_an_uppercase_character(value): regex1 = ".* ?[\p{Lu}]$" #end with an upper case regex2 = ".*[?.!]$" #ends with [?.!] regex3 = ".*[,;:] ?$" #ends with [,;:] and then a space matched1 = re.fullmatch(regex1, value) matched2 = re.fullmatch(regex2, value) matched3 = re.fullmatch(regex3, value) if not bool(matched1) or not bool(matched2) or not bool(matched3): if (not bool(matched1) and not bool(matched2)) and not bool(matched3): raise serializers.ValidationError("characters ?.! should be end of text or followed by one space and an uppercase character") elif (bool(matched1) and bool(matched2)) and not bool(matched3): raise serializers.ValidationError("characters ,;: should be end of text or followed by one space") return value
def discover_uid(root, entry): section = root.select_one('section[id]') if section: uid = section.attrib['id'] if not regex.fullmatch(r'\P{alpha}+', uid): entry._lineno = section.sourceline return uid hgroup = root.select_one('hgroup[id]') if hgroup: uid = hgroup.attrib['id'] if not regex.fullmatch(r'\P{alpha}+', uid): entry._lineno = hgroup.sourceline return uid return None
def interchange (string, form1, form2): if isType(Quantifiers([0]) + Term([1]) + "=" + Term([2]),string): return [] else: one_to_two = regex.fullmatch(Quantifiers([0]) + AB_wfs(form1, True), string) # I can be more efficient and remove these with a regex.subf below. two_to_one = regex.fullmatch(Quantifiers([0]) + AB_wfs(form2, True), string) ## ^^^^^ deep = regex.fullmatch(general, string) left = [] for lstring in interchange (deep.group("wfs_1"), form1, form2): left.extend(replace(deep, "{Quantifiers_0}{lbra}" + lstring + "{opperator}{wfs_2}{rbra}")) right = [] for rstring in interchange (deep.group("wfs_2"), form1, form2): right.extend(replace(deep, "{Quantifiers_0}{lbra}{wfs_1}{opperator}" + rstring + "{rbra}")) return replace(one_to_two, "{Quantifiers_0}" + AB_wfs(form2, False)) + replace(two_to_one, "{Quantifiers_0}" + AB_wfs(form1, False)) + left + right
def testSlurpy(testCase): m = re.match(reSlimp, testCase) if m: postFix = testCase[len(m.group()):] return re.fullmatch(reSlump, postFix) != None else: return False
def __post_init__(self) -> None: super().__post_init__() if not regex.fullmatch(CustomChannel.NAME_REGEX, self.name): raise ValueError(f"Invalid name {self.name!r}") if self.password is not None and not regex.fullmatch( CustomChannel.PASSWORD_REGEX, self.password ): raise ValueError(f"Invalid password {self.password!r}") if self.id is not None and not regex.fullmatch(CustomChannel.ID_REGEX, self.id): raise ValueError(f"Invalid id {id!r}") if self.id is None: self.id = f"usr.{swtor_lower(self.name)}"
def check_discount(self, string): if type(string) != str: return False rex = r'-+\d+,\d\d' if regex.fullmatch(rex, string): return True return False
def xml_validator(expr): m = regex.fullmatch(xml_grammar, expr, regex.X | regex.DOTALL) print(m) if m is None: print("Syntax error") else: print("Ok, valid xml") print(m.capturesdict())
def part1(start_pattern="^ 0 $"): acc = 0 pattern = create_pattern(parse_rules(rules), start_pattern) for message in massages: if regex.fullmatch(pattern, message): acc += 1 return acc
def make_reaction(self, message, unicode): logger_.debug(f'make_reaction: {unicode}\n on: {fobj(m=message)}') assert isinstance(unicode, str) assert regex.fullmatch(r'\p{Emoji}', unicode) reaction = self.pytest_mocker.MagicMock( spec=discord.Reaction, name='reaction') reaction.emoji = self.pytest_mocker.MagicMock( spec=discord.PartialEmoji, name='reaction.emoji') reaction.emoji.name = unicode reaction.emoji.__str__.return_value = unicode reaction.__str__.return_value = unicode reaction.count = 0 reaction.me = False reaction.message = message reaction.users_for_mock = {} async def users(limit=None, oldest_first=None): for i, m in enumerate(reaction.users_for_mock.values()): if limit is not None and i >= limit: break yield m reaction.users.side_effect = users return reaction
def _find_sea_monsters_in_single_orientation(image, regex_mode): image = [''.join(row) for row in image] if regex_mode == 'chunked': matches = 0 for rows in more_itertools.windowed(image, 3): window_iters = [ more_itertools.windowed(row, 20) for row in rows ] for section in zip(*window_iters): section_str = [''.join(line) for line in section] pattern_line = zip(JurassicJigsaw.SEA_MONSTER_PATTERN, section_str) if all( regex.fullmatch(pattern, line) for pattern, line in pattern_line): matches += 1 return matches elif regex_mode == 'full': image_str = '\n'.join(image) len_pattern = len(JurassicJigsaw.SEA_MONSTER_PATTERN[0]) spaces_between_rows = '.{{{}}}'.format( len(image) - len_pattern + 1) pattern = f'{spaces_between_rows}'.join( JurassicJigsaw.SEA_MONSTER_PATTERN) return len( regex.findall(pattern, image_str, flags=regex.DOTALL, overlapped=True))
def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False, concurrent=None, **kwargs): """Wrapper for fullmatch.""" return regex.fullmatch( _apply_search_backrefs(pattern, flags), string, flags, pos, endpos, partial, concurrent, **kwargs )
def __init__(self, fullvalue, *, negated=False, exact=False, dbcog): subquery, = re.fullmatch(self.RE_MATCH, fullvalue.lower()).groups() super().__init__(fullvalue, subquery, negated=negated, exact=exact, dbcog=dbcog)
def parse_panels(setting: str) -> List[PanelSetting]: match = regex.fullmatch( r""" (?: (?P<number>[0-9]+)\. (?P<name>[^.;]+)\. (?P<channel_bitmask>[0-9]+); )+ """, setting, regex.VERBOSE, ) if match is None: raise ValueError(f"Failed to parse ChatChannels: {setting!r}") result = [] for number_s, name, channel_bitmask_s in zip( match.captures("number"), match.captures("name"), match.captures("channel_bitmask"), ): channel_bitmask = int(channel_bitmask_s) channel_ixs = set() n = 0 while channel_bitmask >> n: if (channel_bitmask >> n) & 1: channel_ixs.add(n) n += 1 result.append(PanelSetting(int(number_s), name, channel_ixs)) return result
def check_charset(value): charset = Charset.objects.get(id=1) regex = "["+charset.charset+"]+" matched = re.fullmatch(regex, value); if not bool(matched): raise serializers.ValidationError('Characters outside of the Character Set used. Only use characters present in the set.') return value
def potentialStems(wordList, f): counter = 0 for i, word in enumerate(wordList): if counter == 50: f.write("Найдено 50 потенциальных ошибок, заканчиваю обработку") f.write("\n") break print(i) print("/") print(len(wordList)) if len(word) < 2: continue wordstem = stemmer.stem(word) if len(wordstem) < 2: continue searchRule = word + "{i<=10}" matches = [] for potentialMatch in wordList: if regex.fullmatch(searchRule, potentialMatch): if potentialMatch != word: matches.append(potentialMatch) if len(matches) > 1: f.write("Слово: " + word) f.write("\n") f.write("Стемм: " + wordstem) f.write("\n") f.write("Потенциальные ошибки: ") f.write("\n") cleanMatches = set(matches) for curr in cleanMatches: f.write(curr) f.write("\n") counter += 1
def __init__(self, fullvalue, *, negated=False, exact=False, dbcog): lbound, rbound = re.fullmatch(self.RE_MATCH, fullvalue.lower()).groups() self.lower_bound = int(lbound) self.upper_bound = int(rbound or 9e9) super().__init__(fullvalue, negated=negated, exact=exact, dbcog=dbcog)
def str2size(string: str) -> Size: """ Return a size for a shorthand size. Accepts a string defining a size:: 1337 - 1337 bytes 150K - 150 kilobytes 2M - 2 megabytes @Returns: a tuple ``(size, unit)``, where ``size`` is an integer and unit is ``'B'`` (bytes) or ``'T'`` (threads). """ match = re.fullmatch(r'(\d{1,3}(?: \d{3})+|\d+) *([BkKMT]?)', string) if not match: raise MalformedConfigError("Couldn't parse size: {}".format(string)) val, unit = (int(match.group(1).replace(' ', '')), match.group(2)) if unit == 'M': val *= 1024 unit = 'K' if unit in ('K', 'k'): val *= 1024 if unit != 'T': unit = 'B' return val, unit
def FeedSentence(self, words): self.num_sentences += 1 self.num_words += len(words) self.num_words_since_cutoff += len(words) uniq_phrases = set() start_index = 0 while start_index < len(words): end_index = min(start_index + self.num_ngrams, len(words)) if start_index == 0: word = words[0] if regex.fullmatch(r"\p{Lu}[-\p{Ll}]+", word): tokens = [word.lower()] uniq_phrases.add(" ".join(tokens)) index = 1 while index < end_index: tokens.append(words[index]) uniq_phrases.add(" ".join(tokens)) index += 1 tokens = [] index = start_index while index < end_index: tokens.append(words[index]) uniq_phrases.add(" ".join(tokens)) index += 1 start_index += 1 for phrase in uniq_phrases: self.mem_phrase_count.Increment(phrase, 1) if self.num_words >= BATCH_MAX_WORDS: self.Dump() self.Start() elif self.num_words_since_cutoff >= BATCH_MAX_WORDS / BATCH_CUTOFF_FREQ: self.DoCutOff()
def cli_wrapper(args): db_path = Path(args["DB_PATH"]) parent_path = db_path.parent m = regex.fullmatch(r"(.+)[_-]db\.json", db_path.name) prefix = m[1] if m else None pipeline_path = Path(args["--pipe"] or parent_path / f"{prefix}_pipe.py") if pipeline_path.is_file(): try: commands = literal_eval(pipeline_path.read_text()) except Exception: # Too many possible exceptions sys.exit(f"The pipeline '{pipeline_path}' is malformed: aborted.") elif args["--pipe"]: sys.exit(f"No pipeline at '{pipeline_path}': aborted.") else: commands = [] rec = Recommendations( commands=commands, db=json.loads(db_path.read_text()), base_path=Path(args["--base"] or parent_path), cost_assessment_strategy=args["--cost"], ) rec.run_pipeline() output_path = Path(args["--output"] or parent_path / f"{prefix}_recommendations.md") output_path.write_text(rec.get_markdown()) print(f"Dumped: {output_path.resolve()}.\n")
def import_doc(self, items, meta): pos = self._sp(meta) uri = items[0] if len(items) > 1 and isinstance(items[1], str): namespace = items[1].value else: # infer namespace from filename/URI namespace = uri try: namespace = namespace[namespace.rindex("/") + 1:] except ValueError: pass namespace = namespace.split("?")[0].split(".")[0] if not regex.fullmatch("[a-zA-Z][a-zA-Z0-9_]*", namespace) or namespace in self._keywords: raise Error.SyntaxError( pos, """declare an import namespace that follows WDL name rules and isn't a language keyword (import "filename" as some_namespace)""", ) aliases = [p for p in items[1:] if isinstance(p, tuple)] return Tree.DocImport(pos=pos, uri=uri, namespace=namespace, aliases=aliases, doc=None)
def get_pattern(cls): patterns = [] for alternative in cls.alternatives: pattern = regex.escape(alternative) if regex.fullmatch(ur'\w', alternative[-1]): pattern += ur'\b' patterns.append(pattern)
def fromSerializationString(cls, string: str) -> "MessageChain": """将以 "Mirai 码" 表示特殊对象的字符串转为消息链对象 Returns: MessageChain: 转换后得到的消息链, 所包含的信息可能不完整. """ from .elements.internal import Plain, At, AtAll, Source, FlashImage, Image, Face PARSE_FUNCTIONS = { "atall": lambda args: AtAll(), "source": lambda args: Source(id=args[0], time=args[1]), "at": lambda args: At(target=args[0], display=args[1]), "face": lambda args: Face(faceId=args[0]), "image": lambda args: Image(imageId=args[0]), "flash": lambda args: FlashImage(imageId=args[0]), } result = [] for match in regex.split(r"(\[mirai:.+?\])", string): mirai = regex.fullmatch(r"\[mirai:(.+?)(:(.+?))\]", match) if mirai: # 容错:参数数量太少不行,太多可以 args = mirai.group(3).split(",") result.append(PARSE_FUNCTIONS[mirai.group(1)](args)) elif match: result.append(Plain(match.replace("[_", "["))) return MessageChain.create(result)
def Detachment (string): result = [] match_regex = "(?P<lbra><)" + wfs([1]) + "(?P<opperator>-)" + wfs([2]) + "(?P<rbra>>)" match = regex.fullmatch(match_regex, string) if match: for theorem in theorems: if match.group("wfs_1") == theorem[2]: result.append(match.group("wfs_2")) return result
def fullmatch(pattern, string, *args, **kwargs): """Wrapper for `fullmatch`.""" flags = args[2] if len(args) > 2 else kwargs.get('flags', 0) return _regex.fullmatch(_apply_search_backrefs(pattern, flags), string, *args, **kwargs)
def fuzzy_region_matching(region, compared, error='auto'): if re.match('^auto$', error) is not None: error = max(1, int(len(region) * 0.5)) return regex.fullmatch('(?:%s){e<=%s}' % (region, str(error)), compared) is not None
def fuzzy_variable_matching(variable,compared,error='auto'): if re.match('^auto$',error) is not None: error = max(1,int(len(variable)*0.6)) return regex.fullmatch('(?:%s){e<=%s}' % (variable, str(error)),compared)
def isType (exp, string): return True if regex.fullmatch(exp, string) else False
def testSlurpy(testCase): m = re.match(reSlimp, testCase) if m: postFix = testCase[len(m.group()):] return re.fullmatch(reSlump, postFix) != None else: return False # TestCase testSlumps = "DFG", "EFG", "DFFFFFG", "DFDFDFDFG", "DFEFFFFFG" testNotSlumps = "DFEFF", "EFAHG", "DEFG", "DG", "EFFFFDG" testSlimps = "AH", "ABAHC", "ABABAHCC", "ADFGC", "ADFFFFGC", "ABAEFGCC", "ADFDFGC" testNotSlimps = "ABC", "ABAH", "DFGC", "ABABAHC", "SLIMP", "ADGC" testSlurpys = "AHDFG", "ADFGCDFFFFFG", "ABAEFGCCDFEFFFFFG" testNotSlurpys = "AHDFGA", "DFGAH", "ABABCC" print("testSlumps :", all([re.fullmatch(reSlump, testCase) != None for testCase in testSlumps]), testSlumps) print("testNotSlumps :", all([re.fullmatch(reSlump, testCase) == None for testCase in testNotSlumps]), testNotSlumps) print("testSlimps :", all([re.fullmatch(reSlimp, testCase) != None for testCase in testSlimps]), testSlimps) print("testNotSlimps :", all([re.fullmatch(reSlimp, testCase) == None for testCase in testNotSlimps]), testNotSlimps) print("testSlurpys :", all([testSlurpy(testCase) for testCase in testSlurpys]), testSlurpys) print("testNotSlurpys :", all([not testSlurpy(testCase) for testCase in testNotSlurpys]), testNotSlurpys) print("-"*50) if __name__ == "__main__": n = input("input test count : ") testCases = [input("Test case " + str(i+1) + " : ").upper() for i in range(int(n))] print("-"*50) print("SLURPYS OUTPUT") for testCase in testCases: print("YES" if testSlurpy(testCase) else "NO") print("END OF OUTPUT")
def Seperation (string): match_regex = Quantifiers([0]) + "(?P<lbra><)" + wfs([1]) + "(?P<opperator>&)" + wfs([2]) + "(?P<rbra>>)" newform1 = "{Quantifiers_0}{wfs_1}" newform2 = "{Quantifiers_0}{wfs_2}" match = regex.fullmatch(match_regex, string) return replace(match, newform1) + replace(match, newform2)