def Regexp(pattern): ''' Match a regular expression. If groups are defined, they are returned as results. Otherwise, the entire expression is returned. If the pattern contains groups, they are returned as separate results, otherwise the whole match is returned. :Parameters: pattern The regular expression to match. ''' pattern = compile_(pattern) def match(support, stream): try: match = pattern.match(stream.text) except AttributeError: # no text method match = pattern.match(stream) if match: eaten = len(match.group()) if match.groups(): return (list(match.groups()), stream[eaten:]) else: return ([match.group()], stream[eaten:]) return match
def test_random(self): ''' Compares lepl + python expressions. This runs 'til it fails, and it always does fail, because lepl's expressions are guarenteed greedy while python's aren't. This is "normal" (Perl is the same as Python) but I cannot fathom why it should be - it seems *harder* to make them wwork that way... ''' #basicConfig(level=DEBUG) #log = getLogger('lepl.reexgp._test.random') match_alphabet = '012' string_alphabet = '013' for _ in range(100): expression = random_expression(3, match_alphabet) string = random_string(3, string_alphabet) matcher = DfaRegexp(expression) matcher.config.no_full_first_match() lepl_result = matcher.parse(string) if lepl_result: lepl_result = lepl_result[0] #log.debug(format('{0} {1} {2}', expression, string, lepl_result)) try: python_result = compile_(expression).match(string) if python_result: python_result = python_result.group() assert lepl_result == python_result, \ format('{0} != {1}\n{2} {3}', lepl_result, python_result, expression, string) except: (e, v, _t) = exc_info() if repr(v) == "error('nothing to repeat',)": pass else: raise e
def Regexp(pattern): ''' Match a regular expression. If groups are defined, they are returned as results. Otherwise, the entire expression is returned. If the pattern contains groups, they are returned as separate results, otherwise the whole match is returned. :Parameters: pattern The regular expression to match. ''' pattern = compile_(pattern) #noinspection PyUnusedLocal def match(support, stream): (line, _) = s_line(stream, True) match = pattern.match(line) if match: eaten = len(match.group()) if match.groups(): return (list(match.groups()), s_next(stream, count=eaten)[1]) else: return ([match.group()], s_next(stream, count=eaten)[1]) return match
def Regexp(pattern): ''' Match a regular expression. If groups are defined, they are returned as results. Otherwise, the entire expression is returned. If the pattern contains groups, they are returned as separate results, otherwise the whole match is returned. :Parameters: pattern The regular expression to match. ''' pattern = compile_(pattern) def match(support, stream): (line, _) = s_line(stream, True) match = pattern.match(line) if match: eaten = len(match.group()) if match.groups(): return (list(match.groups()), s_next(stream, count=eaten)[1]) else: return ([match.group()], s_next(stream, count=eaten)[1]) return match
def env_python(): """ Return (python, version) from env. Checks for the environment variable TM_FIRST_LINE and parses it for a #!. Failing that, checks for the environment variable TM_PYTHON. Failing that, uses "/usr/bin/env python". """ python = "" if "TM_FIRST_LINE" in env: first_line = env["TM_FIRST_LINE"] hash_bang = compile_(r"^#!(.*)$") m = hash_bang.match(first_line) if m: python = m.group(1) version_string = sh(python + " -S -V 2>&1") if version_string.startswith("-bash:"): python = "" if not python and "TM_PYTHON" in env: python = env["TM_PYTHON"] elif not python: python = "/usr/bin/env python" version_string = sh(python + " -S -V 2>&1") version = version_string.strip().split()[1] version = int(version[0] + version[2]) return python, version
def test_random(self): ''' Compares lepl + python expressions. This runs 'til it fails, and it always does fail, because lepl's expressions are guaranteed greedy while python's aren't. This is "normal" (Perl is the same as Python) but I cannot fathom why it should be - it seems *harder* to make them work that way... ''' #basicConfig(level=DEBUG) log = getLogger('lepl.regexp._test.random') match_alphabet = '012' string_alphabet = '013' for _ in range(100): expression = random_expression(3, match_alphabet) string = random_string(3, string_alphabet) matcher = DfaRegexp(expression) # matcher = NfaRegexp(expression) matcher.config.no_full_first_match() lepl_result = matcher.parse(string) if lepl_result: lepl_result = lepl_result[0] log.debug(fmt('{0} {1} {2}', expression, string, lepl_result)) try: python_result = compile_(expression).match(string) if python_result: python_result = python_result.group() assert lepl_result == python_result, \ fmt('{0} != {1}\n{2} {3}', lepl_result, python_result, expression, string) except: (e, v, _t) = exc_info() if repr(v) == "error('nothing to repeat',)": pass else: raise e
def MailToUrl(): ''' Generate a validator for email addresses, according to RFC3696, which returns True if the URL is valid, and False otherwise. RFC 3696: The following characters may appear in MAILTO URLs only with the specific defined meanings given. If they appear in an email address (i.e., for some other purpose), they must be encoded: : The colon in "mailto:" < > # " % { } | \ ^ ~ ` These characters are "unsafe" in any URL, and must always be encoded. The following characters must also be encoded if they appear in a MAILTO URL ? & = Used to delimit headers and their values when these are encoded into URLs. ---------- The RFC isn't that great a guide here. The best approach, I think, is to check the URL for "forbidden" characters, then decode it, and finally validate the decoded email. So we implement the validator directly (ie this is not a matcher). ''' MAIL_TO = 'mailto:' encoded_token = compile_('(%.{0,2})') email = _Email() email.config.compile_to_re().no_memoize() @_guarantee_bool def validator(url): assert url.startswith(MAIL_TO) url = url[len(MAIL_TO):] for char in r':<>#"{}|\^~`': assert char not in url def unpack(chunk): if chunk.startswith('%'): assert len(chunk) == 3 return chr(int(chunk[1:], 16)) else: return chunk url = ''.join(unpack(chunk) for chunk in encoded_token.split(url)) assert url return email.parse(url) return validator
def MailToUrl(): ''' Generate a validator for email addresses, according to RFC3696, which returns True if the URL is valid, and False otherwise. RFC 3696: The following characters may appear in MAILTO URLs only with the specific defined meanings given. If they appear in an email address (i.e., for some other purpose), they must be encoded: : The colon in "mailto:" < > # " % { } | \ ^ ~ ` These characters are "unsafe" in any URL, and must always be encoded. The following characters must also be encoded if they appear in a MAILTO URL ? & = Used to delimit headers and their values when these are encoded into URLs. ---------- The RFC isn't that great a guide here. The best approach, I think, is to check the URL for "forbidden" characters, then decode it, and finally validate the decoded email. So we implement the validator directly (ie this is not a matcher). ''' MAIL_TO = 'mailto:' encoded_token = compile_('(%.{0,2})') email = _Email() email.config.compile_to_re() @_guarantee_bool def validator(url): assert url.startswith(MAIL_TO) url = url[len(MAIL_TO):] for char in r':<>#"{}|\^~`': assert char not in url def unpack(chunk): if chunk.startswith('%'): assert len(chunk) == 3 return chr(int(chunk[1:], 16)) else: return chunk url = ''.join(unpack(chunk) for chunk in encoded_token.split(url)) assert url return email.parse(url) return validator
def __regex_object(self, pattern): from re import I, U, M, L, error, compile as compile_ try: flags = I|M|U|L if self.__ignore_case else U|M|L from gobject import idle_add idle_add(self.__manager.emit, "regex-flags", flags) regex_object = compile_(pattern, flags) idle_add(self.__manager.emit, "new-regex", regex_object) except error: from gobject import idle_add idle_add(self.__manager.emit, "reset") idle_add(self.__manager.emit, "search-complete") from gettext import gettext as _ message = _("Error: improperly escaped regular expression") self.__editor.update_message(message, "no", 7) idle_add(self.__manager.emit, "focus-entry") return False
def current_word(pat, direction="both"): """ Return the current word from the environment. pat – A regular expression (as a raw string) matching word characters. Typically something like this: r"[A-Za-z_]*". direction – One of "both", "left", "right". The function will look in the specified directions for word characters. """ word = "" if "TM_SELECTED_TEXT" in env: word = env["TM_SELECTED_TEXT"] elif "TM_CURRENT_WORD" in env and env["TM_CURRENT_WORD"]: line, x = env["TM_CURRENT_LINE"], int(env["TM_LINE_INDEX"]) # get text before and after the index. first_part, last_part = line[:x], line[x:] word_chars = compile_(pat) m = word_chars.match(first_part[::-1]) if m and direction in ("left", "both"): word = m.group(0)[::-1] m = word_chars.match(last_part) if m and direction in ("right", "both"): word += m.group(0) return word
def __regex_object(self, pattern): from re import I, U, M, L, compile as compile_ flags = I|M|U|L if IGNORE_CASE else U|M|L regex_object = compile_(pattern, flags) self.__manager.emit("regex-object", regex_object) return False
from re import U, M, L, escape, compile as compile_ BEGIN_CHARACTER = "/\*+" END_CHARACTER = "\*+/" flags = U|M|L BEGIN_RE = compile_(BEGIN_CHARACTER, flags) END_RE = compile_(END_CHARACTER, flags) def has_comment(text): text = text.strip(" \t") if text.startswith("//"): return True if text.startswith("/*") and text.endswith("*/"): return True return False def get_indentation(text): is_indentation_character = lambda character: character in (" ", "\t") from itertools import takewhile whitespaces = takewhile(is_indentation_character, text) return "".join(whitespaces) def comment(text, multiline=False): if multiline is False: return __comment_single_line(text) return __comment_multiple_lines(text) def __comment_single_line(text): return get_indentation(text) + "// " + text.lstrip(" \t") def __comment_multiple_lines(text): indent_value = lambda line: len(line.replace("\t", " ")) line_indentations = [(indent_value(line), get_indentation(line)) for line in text.splitlines()] line_indentations.sort() indentation = line_indentations[0][1]
def make_3(): text = get_data(1) fix = compile_(r'(?m)^(\s*)(?::(?:[A-Z][a-z]*)+)+?(:(?:[A-Z][a-z]*)+.*)$') text = fix.sub(r'\1\2', text) with get_file(3, 'w') as out: out.write(text)
def _pattern(str_val): return compile_(str_val, IGNORECASE)
from datetime import datetime from doctest import testmod from itertools import chain from optparse import OptionParser, OptionGroup from os import linesep, environ, remove, rename from os.path import exists, isfile from platform import system from re import compile as compile_ from sys import stdout, stderr, stdin, exc_info from urllib import urlretrieve __VERSION__ = '0.0' EOL = compile_(r'\r?\n') # these match entire lines BEGIN = compile_(r'(?i)^\s*#{2,}\s*BEGIN\s*GHETTONET') END = compile_(r'(?i)^\s*#{2,}\s*END\s*GHETTONET') DATE = compile_(r'(?i)^\s*#{2,}\s*DATE\s*(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)(\s+(?P<hour>\d\d?):(?P<min>\d\d?)(:(?P<sec>\d\d?))?)?(\s+(?P<extra>.*))?$') POSSIBLE_DATE = compile_(r'(?i)^\s*#{2,}\s*DATE') COMMENT_OR_BLANK = compile_(r'^\s*(?:#.*)?$') # these match fragments of a line IPV4 = compile_(r'^\s*(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})(.*)') # this attempts to drop embedded HTML to help pull from web pages NAME = compile_(r'^\s*([\w\-]+(?:\.[\w\-]+)*)(.*)') # clunky removal of HTML markup HTML = compile_(r'<[^<>]+>')
def __regex_object(self, pattern): from re import I, U, M, L, compile as compile_ flags = I | M | U | L if IGNORE_CASE else U | M | L regex_object = compile_(pattern, flags) self.__manager.emit("regex-object", regex_object) return False
# Match any strings enclosed in ${} with the exception of ${}. from re import UNICODE, compile as compile_ placeholder_pattern = compile_("\$\{[^${}]*\}", UNICODE) #special_placeholders = ("${time}", "${timestring}", "${timestamp}", # "${date}", "${day}", "${month}", "${year}", # "${author}", "${rfc2822}") # Generated by [skqr] special_placeholders = ( "${time}", "${timestring}", "${timestamp}", "${date}", "${day}", "${month}", "${year}", "${author}", "${rfc2822}", "${fileuri}", "${filepath}", "${filename}", "${clipboard}", "${selection}" ) # Generated by [skqr] clipboard_text = None #def replace_special_placeholder(placeholder): def replace_special_placeholder(placeholder, uri="", clipboards=None): from time import localtime if placeholder == "${day}":
def _RejectRegexp(matcher, pattern): ''' Reject a match if it matches a (ie some other) regular expression ''' regexp = compile_(pattern) return PostCondition(matcher, lambda results: not regexp.match(results[0]))
# required by the LGPL License. If you do not delete the provisions # above, a recipient may use your version of this file under either the # MPL or the LGPL License. ''' Support for operator syntactic sugar (and operator redefinition). ''' from re import compile as compile_ from lepl.matchers.matcher import Matcher from lepl.support.context import Namespace, NamespaceMixin, Scope from lepl.support.lib import open_stop, fmt, basestring DIGITS = compile_('^(-?\d+)(.*)') def RepeatWrapper(matcher, start, stop, step, separator, add, reduce): '''Parse `step` if it is a string.''' # Handle circular dependencies from lepl.matchers.derived import Repeat try: int(step) # if this works, we may have a var, so keep the instance limit = step algorithm = DEPTH_FIRST except ValueError: if (isinstance(step, basestring)): limit = None algorithm = None while step: match = DIGITS.match(step)
from re import U, M, L, escape, compile as compile_ DOUBLE_QOUTE_PATTERN = '".*?"' SINGLE_QUOTE_PATTERN = "'.*?'" flags = U|L DOUBLE_QOUTE_RE = compile_(DOUBLE_QOUTE_PATTERN, flags) SINGLE_QUOTE_RE = compile_(SINGLE_QUOTE_PATTERN, flags) PAIR_CHARACTERS = ("(", "{", "[", "<", ")", "}", "]", ">", "\"", "'") OPEN_PAIR_CHARACTERS = ("(", "{", "[", "<", "\"", "'") CLOSE_PAIR_CHARACTERS = (")", "}", "]", ">", "\"", "'") QUOTE_CHARACTERS = ("\"", "'") def get_pair_for(character): if __is_pair(character) is False: return "" if character in OPEN_PAIR_CHARACTERS: return __get_close_pair_for(character) return __get_open_pair_for(character) def is_open_pair(character): if __is_pair(character) is False: return False return character in OPEN_PAIR_CHARACTERS def __is_pair(character): return character in PAIR_CHARACTERS def __get_close_pair_for(open_character): close_pair_for = {"(": ")", "{": "}", "[": "]", "<": ">", "\"": "\"", "'": "'"} return close_pair_for[open_character] def __get_open_pair_for(close_character): open_pair_for = {")": "(", "}": "{", "]": "[", ">": "<", "\"": "\"", "'": "'"} return open_pair_for[close_character]
from re import U, M, L, escape, compile as compile_ DOUBLE_QOUTE_PATTERN = '".*?"' SINGLE_QUOTE_PATTERN = "'.*?'" flags = U | L DOUBLE_QOUTE_RE = compile_(DOUBLE_QOUTE_PATTERN, flags) SINGLE_QUOTE_RE = compile_(SINGLE_QUOTE_PATTERN, flags) PAIR_CHARACTERS = ("(", "{", "[", "<", ")", "}", "]", ">", "\"", "'") OPEN_PAIR_CHARACTERS = ("(", "{", "[", "<", "\"", "'") CLOSE_PAIR_CHARACTERS = (")", "}", "]", ">", "\"", "'") QUOTE_CHARACTERS = ("\"", "'") def get_pair_for(character): if __is_pair(character) is False: return "" if character in OPEN_PAIR_CHARACTERS: return __get_close_pair_for(character) return __get_open_pair_for(character) def is_open_pair(character): if __is_pair(character) is False: return False return character in OPEN_PAIR_CHARACTERS def __is_pair(character): return character in PAIR_CHARACTERS def __get_close_pair_for(open_character): close_pair_for = {
from re import U, M, L, escape, compile as compile_ BEGIN_CHARACTER = "/\*+" END_CHARACTER = "\*+/" flags = U | M | L BEGIN_RE = compile_(BEGIN_CHARACTER, flags) END_RE = compile_(END_CHARACTER, flags) def has_comment(text): text = text.strip(" \t") if text.startswith("//"): return True if text.startswith("/*") and text.endswith("*/"): return True return False def get_indentation(text): is_indentation_character = lambda character: character in (" ", "\t") from itertools import takewhile whitespaces = takewhile(is_indentation_character, text) return "".join(whitespaces) def comment(text, multiline=False): if multiline is False: return __comment_single_line(text) return __comment_multiple_lines(text) def __comment_single_line(text): return get_indentation(text) + "// " + text.lstrip(" \t")
# of this file under the MPL, indicate your decision by deleting the # provisions above and replace them with the notice and other provisions # required by the LGPL License. If you do not delete the provisions # above, a recipient may use your version of this file under either the # MPL or the LGPL License. ''' Support for operator syntactic sugar (and operator redefinition). ''' from re import compile as compile_ from lepl.matchers.matcher import Matcher from lepl.support.context import Namespace, NamespaceMixin, Scope from lepl.support.lib import open_stop, fmt, basestring DIGITS = compile_('^(-?\d+)(.*)') def RepeatWrapper(matcher, start, stop, step, separator, add, reduce): '''Parse `step` if it is a string.''' # Handle circular dependencies from lepl.matchers.derived import Repeat try: int(step) # if this works, we may have a var, so keep the instance limit = step algorithm = DEPTH_FIRST except ValueError: if (isinstance(step, basestring)): limit = None algorithm = None while step:
from string import punctuation, whitespace from re import compile as compile_, M, U, L DELIMETER = ("%s%s%s" % (punctuation, whitespace, "\x00")).replace("-", "").replace("_", "") NEWLINE_RE = compile_("\r\n|\n|\r", M|U|L) WORD_PATTERN = compile_("\w+|[-]", U) SCRIBES_MAIN_WINDOW_STARTUP_ID = "ScribesMainWindow" def is_delimeter(character): return character in DELIMETER def is_not_delimeter(character): return not (character in DELIMETER) def calculate_resolution_independence(window, width, height): screen = window.get_screen() number = screen.get_number() rectangle = screen.get_monitor_geometry(number) width = int(rectangle.width/width) height = int(rectangle.height/height) return width, height def create_button(stock_id, string): from gtk import HBox, Image, Label, ICON_SIZE_BUTTON, Alignment alignment = Alignment() alignment.set_property("xalign", 0.5) alignment.set_property("yalign", 0.5) hbox = HBox(False, 3) if stock_id: image = Image() image.set_from_stock(stock_id, ICON_SIZE_BUTTON) hbox.pack_start(image, False, False, 0) label = Label(string)
configPath = path.join(path.dirname(path.realpath(__file__)), 'config.ini') config = ConfigParser() config.read(configPath) headers = {'User-Agent': 'ASF license checker by /u/prTopii'} tokenClient = HTTPBasicAuth(config['Token']['id'], config['Token']['secret']) tokenPost = { 'grant_type': 'password', 'username': config['Login']['user'], 'password': config['Login']['pass'] } ipc = config['DEFAULT']['ipchost'] licensed = config['DEFAULT']['licensed'].split(',') license = compile_(r'!addlicense\s.+?,?(((,?|,\s?)\d+)+)') def getReplies(comments): output = [] for commentInfo in comments['data']['children']: try: comment = license.search(commentInfo['data']['body']) if comment: output = [ c.strip(',').strip(' ') for c in comment[1].split(',') ] if commentInfo['data']['replies']: replies = getReplies(commentInfo['data']['replies']) if replies: output.extend(replies)