def __init__(self, pattern): self.headers = [] self.slashes = len(pattern) - len(pattern.replace(os.path.sep, '')) + 1 self.pattern = None # patterns look like <tagname> non regexy stuff <tagname> ... pieces = re.split(r'(<[A-Za-z0-9~_]+>)', pattern) override = {'<tracknumber>': r'\d\d?', '<discnumber>': r'\d\d??'} dummies_found = 0 for i, piece in enumerate(pieces): if not piece: continue if piece[0] + piece[-1] == '<>': piece = piece.lower() # canonicalize to lowercase tag names if "~" in piece: dummies_found += 1 piece = "<QUOD_LIBET_DUMMY_%d>" % dummies_found pieces[i] = '(?P%s%s)' % (piece, override.get(piece, '.+?')) if "QUOD_LIBET" not in piece: self.headers.append(piece[1:-1].encode("ascii", "replace")) else: pieces[i] = re_escape(piece) # some slight magic to anchor searches "nicely" # nicely means if it starts with a <tag>, anchor with a / # if it ends with a <tag>, anchor with .xxx$ # but if it's a <tagnumber>, don't bother as \d+ is sufficient # and if it's not a tag, trust the user if pattern.startswith('<') and not pattern.startswith('<tracknumber>')\ and not pattern.startswith('<discnumber>'): pieces.insert(0, re_escape(os.path.sep)) if pattern.endswith('>') and not pattern.endswith('<tracknumber>')\ and not pattern.endswith('<discnumber>'): pieces.append(r'(?:\.[A-Za-z0-9_+]+)$') self.pattern = re.compile(''.join(pieces))
def _fixup_literal_list(literals, mapping): u = u"".join(map(chr, literals)) # longest matches first, we will handle contained ones in the replacement # function reg = u"(%s)" % u"|".join( map(re_escape, sorted(mapping.keys(), key=len, reverse=True))) def replace_func(match): text = match.group(1) all_ = u"" for c in text: all_ += _fixup_literal(ord(c), False, mapping) if len(text) > 1: multi = u"".join(mapping[text]) if len(multi) > 1: multi = "[%s]" % re_escape(multi) else: multi = re_escape(multi) return "(?:%s|%s)" % (all_, multi) return all_ new = u"" pos = 0 for match in re.finditer(reg, u): new += re_escape(u[pos:match.start()]) new += replace_func(match) pos = match.end() new += re_escape(u[pos:]) return new
def _fixup_literal_list(literals, mapping): u = u"".join(map(unichr, literals)) # longest matches first, we will handle contained ones in the replacement # function reg = u"(%s)" % u"|".join( map(re_escape, sorted(mapping.keys(), key=len, reverse=True))) def replace_func(match): text = match.group(1) all_ = u"" for c in text: all_ += _fixup_literal(ord(c), False, mapping) if len(text) > 1: multi = u"".join(mapping[text]) if len(multi) > 1: multi = "[%s]" % re_escape(multi) else: multi = re_escape(multi) return "(?:%s|%s)" % (all_, multi) return all_ new = u"" pos = 0 for match in re.finditer(reg, u): new += re_escape(u[pos:match.start()]) new += replace_func(match) pos = match.end() new += re_escape(u[pos:]) return new
def _fixup_range(start, end, mapping): extra = [] for i in xrange(start, end + 1): u = unichr(i) if u in mapping: extra.append(re_escape(mapping[u])) start = re_escape(unichr(start)) end = re_escape(unichr(end)) return u"%s%s-%s" % ("".join(extra), start, end)
def replace_func(match): text = match.group(1) all_ = u"" for c in text: all_ += _fixup_literal(ord(c), False, mapping) if len(text) > 1: multi = u"".join(mapping[text]) if len(multi) > 1: multi = "[%s]" % re_escape(multi) else: multi = re_escape(multi) return "(?:%s|%s)" % (all_, multi) return all_
def replace_func(match): text = match.group(1) all_ = u"" for c in text: all_ += _fixup_literal(ord(c), False, mapping) if len(text) > 1: multi = mapping[text] if len(multi) > 1: multi = "[%s]" % re_escape(multi) else: multi = re_escape(multi) return "(%s|%s)" % (all_, multi) return all_
def str_to_re(self, string): """Convert plain string to escaped regexp that can be compiled""" if isinstance(string, str): string = string.encode('utf-8') string = codecs.escape_decode(string)[0] string = string.decode('utf-8') return "^%s$" % re_escape(string)
def _fixup_literal_list(literals, mapping): u = re_escape("".join(map(unichr, literals))) if not mapping: return u # longest matches first, we will handle contained ones in the replacement # function reg = u"(%s)" % u"|".join( map(re_escape, sorted(mapping.keys(), key=len, reverse=True))) def replace_func(match): text = match.group(1) all_ = u"" for c in text: all_ += _fixup_literal(ord(c), False, mapping) if len(text) > 1: multi = mapping[text] if len(multi) > 1: multi = "[%s]" % re_escape(multi) else: multi = re_escape(multi) return "(%s|%s)" % (all_, multi) return all_ return re.sub(reg, replace_func, u)
def Value(self, outer=False): """Rule for value. Either a regexp, quoted string, boolean combination of values, or free text string""" if self.accept('/'): regex = self.expect_re(REGEXP) self.expect('/') return self.RegexpMods(regex) elif self.accept('"'): regex = self.str_to_re(self.expect_re(DOUBLE_STRING)) self.expect('"') return self.RegexpMods(regex) elif self.accept("'"): regex = self.str_to_re(self.expect_re(SINGLE_STRING)) self.expect("'") return self.RegexpMods(regex) elif self.accept('!'): return self.Negation(self.Value) elif self.accept('|'): return self.Union(self.Value) elif self.accept('&'): return self.Intersection(self.Value) else: if outer: # Hack to force plain text parsing for top level free text raise ParseError('Free text not allowed at top level of query') return match.Regex(re_escape(self.expect_re(TEXT)), u"d")
def str_to_re(self, string): """Convert plain string to escaped regexp that can be compiled""" if isinstance(string, unicode): string = string.encode('utf-8') string = string.decode('string_escape') string = string.decode('utf-8') return "^%s$" % re_escape(string)
def MatchTag(self): tag = self.lookahead.lexeme self.match(TAG) try: return re.compile(re_escape(tag), re.IGNORECASE | re.UNICODE) except re.error: raise ParseError("The regular expression was invalid")
def _fixup_literal(literal, in_seq, mapping): u = unichr(literal) if u in mapping: u = u + mapping[u] need_seq = len(u) > 1 u = re_escape(u) if need_seq and not in_seq: u = u"[%s]" % u return u
def _fixup_literal(literal, in_seq, mapping): u = chr(literal) if u in mapping: u = u + u"".join(mapping[u]) need_seq = len(u) > 1 u = re_escape(u) if need_seq and not in_seq: u = u"[%s]" % u return u
def str_to_re(self, string): """Convert plain string to escaped regexp that can be compiled""" if isinstance(string, text_type): string = string.encode('utf-8') if PY3: string = codecs.escape_decode(string)[0] else: string = string.decode('string_escape') string = string.decode('utf-8') return "^%s$" % re_escape(string)
def __init__(self, string, star=None, dumb_match_diacritics=True): """Parses the query string and returns a match object. star -- List of tags to look in if none are specified in the query. Defaults to those specified in `STAR`. dumb_match_diacritics -- In case of text queries (QueryType.TEXT) try to match variants with diacritic marks. This parses the query language as well as some tagless shortcuts: "foo bar" -> &(star1,star2=foo,star1,star2=bar) "!foo" -> !star1,star2=foo "&(foo, bar)" -> &(star1,star2=foo, star1,star2=bar) "&(foo, !bar)" -> &(star1,star2=foo, !star1,star2=bar) "|(foo, bar)" -> |(star1,star2=foo, star1,star2=bar) "!&(foo, bar)" -> !&(star1,star2=foo, star1,star2=bar) "!(foo, bar)" -> !star1,star2=(foo, bar) etc... """ # TODO dumb_match_diacritics if star is None: star = self.STAR if not isinstance(string, text_type): assert PY2 string = string.decode('utf-8') self.star = list(star) self.string = string self.type = QueryType.VALID try: self._match = QueryParser(string, star=star).StartQuery() return except self.error: pass if not set("#=").intersection(string): parts = ["/%s/" % re_escape(s) for s in string.split()] if dumb_match_diacritics: parts = [p + "d" for p in parts] string = "&(" + ",".join(parts) + ")" self.string = string try: self.type = QueryType.TEXT self._match = QueryParser(string, star=star).StartQuery() return except self.error: pass raise error('Query is not VALID or TEXT')
def __init__(self, string, star=None, dumb_match_diacritics=True): """Parses the query string and returns a match object. star -- List of tags to look in if none are specified in the query. You can add some by extending Query.START and pass it here. dumb_match_diacritics -- In case of text queries (QueryType.TEXT) try to match variants with diacritic marks. This parses the query language as well as some tagless shortcuts: "foo bar" -> &(star1,star2=foo,star1,star2=bar) "!foo" -> !star1,star2=foo "&(foo, bar)" -> &(star1,star2=foo, star1,star2=bar) "&(foo, !bar)" -> &(star1,star2=foo, !star1,star2=bar) "|(foo, bar)" -> |(star1,star2=foo, star1,star2=bar) "!&(foo, bar)" -> !&(star1,star2=foo, star1,star2=bar) "!(foo, bar)" -> !star1,star2=(foo, bar) etc... """ if star is None: star = self.STAR if not isinstance(string, unicode): string = string.decode('utf-8') self.star = list(star) self.string = string try: self.type = QueryType.VALID self._match = QueryParser(QueryLexer(string)).StartStarQuery(star) return except error: pass # normal string, put it in a intersection to get a value list if not set("#=").intersection(string): parts = ["/%s/" % re_escape(s) for s in string.split()] if dumb_match_diacritics: parts = [p + "d" for p in parts] string = "&(" + ",".join(parts) + ")" self.string = string try: self.type = QueryType.TEXT self._match = QueryParser( QueryLexer(string)).StartStarQuery(star) return except error: pass self.type = QueryType.VALID self._match = QueryParser(QueryLexer(string)).StartQuery()
def __init__(self, string: str, star: Optional[Iterable[str]] = None): """Parses the query string and returns a match object. :param string: The text to parse :param star: Tags to look in, if none are specified in the query. Defaults to those specified in `STAR`. This parses the query language as well as some tagless shortcuts: "foo bar" -> &(star1,star2=foo,star1,star2=bar) "!foo" -> !star1,star2=foo "&(foo, bar)" -> &(star1,star2=foo, star1,star2=bar) "&(foo, !bar)" -> &(star1,star2=foo, !star1,star2=bar) "|(foo, bar)" -> |(star1,star2=foo, star1,star2=bar) "!&(foo, bar)" -> !&(star1,star2=foo, star1,star2=bar) "!(foo, bar)" -> !star1,star2=(foo, bar) etc... """ print_d(f"Creating query {string!r}") if star is None: star = self.STAR assert isinstance(string, str) self.star = list(star) self.string = string self.type = QueryType.VALID try: self._match = QueryParser(string, star=star).StartQuery() breakpoint() if not self._match.valid: self.type = QueryType.INVALID return except self.Error: pass if not set("#=").intersection(string): for c in config.get("browsers", "ignored_characters"): string = string.replace(c, "") parts = ["/%s/d" % re_escape(s) for s in string.split()] string = "&(" + ",".join(parts) + ")" self.string = string try: self.type = QueryType.TEXT self._match = QueryParser(string, star=star).StartQuery() return except self.Error: pass print_d("Query '%s' is invalid" % string) self.type = QueryType.INVALID self._match = False_()
def __init__(self, string, star=None): """Parses the query string and returns a match object. star -- List of tags to look in if none are specified in the query. Defaults to those specified in `STAR`. This parses the query language as well as some tagless shortcuts: "foo bar" -> &(star1,star2=foo,star1,star2=bar) "!foo" -> !star1,star2=foo "&(foo, bar)" -> &(star1,star2=foo, star1,star2=bar) "&(foo, !bar)" -> &(star1,star2=foo, !star1,star2=bar) "|(foo, bar)" -> |(star1,star2=foo, star1,star2=bar) "!&(foo, bar)" -> !&(star1,star2=foo, star1,star2=bar) "!(foo, bar)" -> !star1,star2=(foo, bar) etc... """ print_d("Creating query \"%s\", called from %s" % (string, frame_info(1))) if star is None: star = self.STAR if not isinstance(string, text_type): assert PY2 string = string.decode('utf-8') self.star = list(star) self.string = string self.type = QueryType.VALID try: self._match = QueryParser(string, star=star).StartQuery() return except self.error: pass if not set("#=").intersection(string): parts = ["/%s/d" % re_escape(s) for s in string.split()] string = "&(" + ",".join(parts) + ")" self.string = string try: self.type = QueryType.TEXT self._match = QueryParser(string, star=star).StartQuery() return except self.error: pass # raise error('Query is not VALID or TEXT') print_d("Query '%s' is invalid" % string) self.type = QueryType.INVALID self._match = False_()
def __init__(self, string, star=None): """Parses the query string and returns a match object. star -- List of tags to look in if none are specified in the query. Defaults to those specified in `STAR`. This parses the query language as well as some tagless shortcuts: "foo bar" -> &(star1,star2=foo,star1,star2=bar) "!foo" -> !star1,star2=foo "&(foo, bar)" -> &(star1,star2=foo, star1,star2=bar) "&(foo, !bar)" -> &(star1,star2=foo, !star1,star2=bar) "|(foo, bar)" -> |(star1,star2=foo, star1,star2=bar) "!&(foo, bar)" -> !&(star1,star2=foo, star1,star2=bar) "!(foo, bar)" -> !star1,star2=(foo, bar) etc... """ print_d("Creating query \"%s\", called from %s" % (string, frame_info(1))) if star is None: star = self.STAR assert isinstance(string, str) self.star = list(star) self.string = string self.type = QueryType.VALID try: self._match = QueryParser(string, star=star).StartQuery() return except self.error: pass if not set("#=").intersection(string): for c in config.get("browsers", "ignored_characters"): string = string.replace(c, "") parts = ["/%s/d" % re_escape(s) for s in string.split()] string = "&(" + ",".join(parts) + ")" self.string = string try: self.type = QueryType.TEXT self._match = QueryParser(string, star=star).StartQuery() return except self.error: pass # raise error('Query is not VALID or TEXT') print_d("Query '%s' is invalid" % string) self.type = QueryType.INVALID self._match = False_()
def split_value(s, splitters=[u"/", u"&", u","]): """Splits a string. The first match in 'splitters' is used as the separator; subsequent matches are intentionally ignored. """ if not splitters: return [s.strip()] values = s.split("\n") for spl in splitters: spl = re.compile(r"\b\s*%s\s*\b" % re_escape(spl), re.UNICODE) if not list(filter(spl.search, values)): continue return [st.strip() for v in values for st in spl.split(v)] return values
def _add_directory(app, value): player = app.player window = app.window library = app.library filename = os.path.normpath(os.path.realpath(value)) for added in library.scan([filename]): pass if app.browser.can_filter_text(): app.browser.filter_text( "filename = /^%s/c" % util.re_escape(filename)) else: basepath = filename + "/" songs = [song for (fn, song) in library.iteritems() if fn.startswith(basepath)] songs.sort(reverse=True) queue = window.playlist.q for song in songs: queue.insert_before(queue.get_iter_first(), row=[song]) player.next()
def regex_for(sp): return r'{start}\s*{split}\s*{end}'.format(start=r'(?:\b|(?<=\W))', split=re_escape(sp), end=r'(?:\b|(?=\W))')
def regex_for(sp): return r'{start}\s*{split}\s*{end}'.format( start=r'(?:\b|(?<=\W))', split=re_escape(sp), end=r'(?:\b|(?=\W))')
def test_many_unsafe(self): self.failUnlessEqual(re_escape("*quux#argh?woo"), r"\*quux\#argh\?woo")
title = title[:l] return title.rstrip(), subtitle else: return title, None def split_title(s, splitters=["/", "&", ","]): title, subtitle = find_subtitle(s) return ((title.strip(), split_value(subtitle, splitters)) if subtitle else (s, [])) __FEATURING = ["feat.", "featuring", "feat", "ft", "ft.", "with", "w/"] __ORIGINALLY = ["originally by ", " cover"] # Cache case-insensitive regex searches of the above __FEAT_REGEX = [re.compile(re_escape(s + " "), re.I) for s in __FEATURING] __ORIG_REGEX = [re.compile(re_escape(s), re.I) for s in __ORIGINALLY] def split_people(s, splitters=["/", "&", ","]): title, subtitle = find_subtitle(s) if not subtitle: parts = s.split(" ") if len(parts) > 2: for feat in __FEATURING: try: i = [p.lower() for p in parts].index(feat) orig = " ".join(parts[:i]) others = " ".join(parts[i + 1:]) return (orig, split_value(others, splitters)) except (ValueError, IndexError):
def test_safe(self): self.failUnlessEqual(re_escape("fo o"), "fo o")
def test_unsafe(self): self.failUnlessEqual(re_escape("!bar"), r"\!bar")
def test_empty_unicode(self): self.failUnlessEqual(re_escape(u""), u"") self.assertTrue(isinstance(re_escape(u""), unicode))
def _fixup_not_literal(literal, mapping): u = unichr(literal) if u in mapping: u = u + mapping[u] u = re_escape(u) return u"[^%s]" % u
def _fixup_not_literal(literal, mapping): u = chr(literal) return u"[^%s]" % u"".join(re_escape(u + u"".join(mapping.get(u, []))))
def test_empty(self): self.failUnlessEqual(re_escape(""), "") self.assertTrue(isinstance(re_escape(""), bytes))
def _fixup_not_literal(literal, mapping): u = unichr(literal) return u"[^%s]" % u"".join(re_escape(u + u"".join(mapping.get(u, []))))
def str_to_re(self, scanner, string): if isinstance(string, unicode): string = string.encode('utf-8') string = string[1:-1].decode('string_escape') string = string.decode('utf-8') return QueryLexeme(RE, "^%s$" % re_escape(string))
def test_many_unsafe(self): self.failUnlessEqual( re_escape("*quux#argh?woo"), r"\*quux\#argh\?woo")