def __init__(self, symbols, keywords): self._regex_token_pairs = tuple( (type_, rec(regex)) for type_, regex in tuple( ('KEYWORD', k + r'(?!\w)') for k in keywords) + tuple( ('SYMBOL', esc(symbol)) for symbol in symbols) + (('STRING', r'\"(?:\\\"|[^"])*\"' + r'|' r"\'(?:\\\'|[^'])*\'"), ('FLOAT', r'\d+\.\d*|\.\d+'), ('INT', r'\d+'), ('NAME', r'\w+'))) self._space_re = rec(r'[ \t]*(?:\#[^\n]*)?') self._empty_lines_re = rec(r'(?:(?:[ \t]*(?:\#[^\n]*)?)(?:\n|\Z))*') self._err_re = rec(r'[^\s]*')
def check_addr(): reprs = { "name": rec(r"[a-zA-Z ]+"), "birthday": rec(r"[0-3]?[0-9]\-[01]?[0-9]\-[12][0-9]{3}"), "phone": rec(r"0*[1-9][0-9]*"), "address": rec(r".+"), "email": rec(r"[^@ ]+@[^@ ]+"), } with open(reports_out, "a") as f: for r in addr: for k in r: if k not in reprs or not reprs[k].fullmatch(r[k]): f.write("[check] {}: {}".format(k, r[k]))
def __init__(self, symbols, keywords): self._regex_token_pairs = tuple( (type_, rec(regex)) for type_, regex in tuple(('KEYWORD', k + r'(?!\w)') for k in keywords) + tuple(('SYMBOL', esc(symbol)) for symbol in symbols) + ( ('STRING', r'\"(?:\\\"|[^"])*\"' + r'|' r"\'(?:\\\'|[^'])*\'"), ('FLOAT', r'\d+\.\d*|\.\d+'), ('INT', r'\d+'), ('NAME', r'\w+'))) self._space_re = rec(r'[ \t]*(?:\#[^\n]*)?') self._empty_lines_re = rec(r'(?:(?:[ \t]*(?:\#[^\n]*)?)(?:\n|\Z))*') self._err_re = rec(r'[^\s]*')
def steal(): if platform.system() == "Windows": return steal_windows() elif platform.system() == 'Darwin': return steal_osx() elif platform.system() == 'Linux': return_list = list() # Basic return basic_return = steal_linux_basic() if isinstance(basic_return, str): return_list.append(basic_return) else: return_list = basic_return # Linux has 3 types of storage, basic,keyring and wallet, try everything then return if chrome_keyring == False: # Print a warning return_list.append("[-] Warning: gnomekeyring module not found.") else: # Get keyring keyring_list = steal_linux_keyring( rec(r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+")) if isinstance(keyring_list, str): return_list.append(keyring_list) else: for combo in keyring_list: return_list.append(combo) return return_list return ("[-] " + platform.system() + " is not supported.")
def filter_files(files, suffix): re = rec(r'\S+?([0-9]+)\.%s$' % suffix) def g(fns): for fn in fns: m = re.match(fn) if m: yield int(m.group(1)), fn return [tt[1] for tt in sorted(g(files), key=lambda t: t[0])]
def sanitizeAndValidateGrammar(grammarFileName): """ Returns a pair. The latter element is the "sanitized" content of a grammar (without comments), and the former element is a boolean specifying if the grammar is a valid one (only contains nonterminals) """ return _sanitizeAndValidateWithPattern(grammarFileName, rec("^[^'\"]+\s+->\s+[^'\"]+((\s*\|)|\s+[^'\"]+)*$"))
def sanitizeAndValidateSentences(sentenceFileName): """ Returns a pair. The latter element is the "sanitized" content of the test sentences (without comments) and the former element is a boolean specifying if the sentences are in the proper format (no punctuation, possible asterisk in front specifying no parse) """ return _sanitizeAndValidateWithPattern(sentenceFileName, rec("^\*?[^" + escape(punctuation) + "]+$"))
def sanitizeAndValidateLexicon(lexiconFileName): """ Returns a pair. The latter element is the "sanitized" content of a lexicon (without comments), and the former element is a boolean specifying if the lexicon is a valid lexicon (only contains terminals) :param lexiconFileName: a valid path to the file storing terminal rules """ return _sanitizeAndValidateWithPattern( lexiconFileName, rec("^[^'\"]+\s+->\s+(?P<quote>['\"])[^'\"]+(?P=quote)" "(\s*\\|\s*(?P=quote)[^'\"]+(?P=quote))*$"), )
""" Created on 2012-12-12 @author: qcg """ from re import compile as rec s = "PING dd" r = rec(r"PING.*") if r.fullmatch(s): print("FIND") d = {"name": "qcg", "birthday": "1999-09-09"} print(type(rb"\n"), len(rb"\n"))
from re import compile as rec import pymongo db = pymongo.Connection("localhost") chebi = db.chemicals.chebi chebi.find_one({'Synonyms':rec('FOOF')})['Molfile']
def markdown(raw): raw = _unicode(raw) formatted_text = ignore_trailing_newlines(markdown_parser.render(raw)) thumbs = get_thumbs(raw) return formatted_text, thumbs ### # Simple MoinMoin-like formatter. Legacy. ### bnwtypes = ( ("emph", rec(ur'(?<!:)//'), lambda m: ()), ("strong", rec(ur'\*\*'), lambda m: ()), ("namedlink", rec(ur'''\[\[\s*(?P<link_target>.+?)\s*[|]\s*(?P<link_text>.+?)\s*]]'''), lambda m: (m.group('link_target'),m.group('link_text'))), ("source", rec(ur'''{{{(?:#!(\w+)\s+)?(.*?)}}}''', re.MULTILINE|re.DOTALL), lambda m: (m.group(1),m.group(2))), ) formatting_tags = { 'emph': ('<i>', '</i>'), 'strong': ('<b>', '</b>'), } parser = LinkParser(types=bnwtypes + shittypes)
from re import compile as rec import pymongo db = pymongo.Connection("localhost") chebi = db.chemicals.chebi chebi.find_one({'Synonyms': rec('FOOF')})['Molfile']
#!/usr/bin/env python # vim: expandtab tabstop=4 shiftwidth=4 import re from re import compile as rec _PICTYPES = r'|'.join([r'png', r'jpg', r'jpeg', r'gif']) _VIDTYPES = r'|'.join([r'mp4']) TUMBLR = r'.tumblr.com' ISPHOTO = rec(r'.*\.(?P<type>' + _PICTYPES + ')') ISVIDEO = rec(r'.*\.(?P<type>' + _VIDTYPES + ')') PHOTO = rec(r'(http[s]?://[a-z0-9\.]*(?:media|static)' + TUMBLR + r'/(?:[^ /]*/?)tumblr_[^ ]*\.(?:' + _PICTYPES + r'))', re.I) PHOTOSET = rec(r'(http[s]?://[^ ]*/photoset_iframe/[^ ]*/false)', re.I) SIZEDPHOTO = rec(r'.*[/]?(?P<name>tumblr_.*)_(?P<size>[0-9]+)\.(?P<type>' + _PICTYPES + r')', re.I) VIDEO = rec(r'\\x22(http[s]?://[^ ]*' + TUMBLR + r'/video_file/([^ ]*))\\x22 type=\\x22video/(' + _VIDTYPES + r')\\x22', re.I) FILENAME = rec(r'http[s]?://.*/(?P<name>.*\..*)') STATICRES = rec(r'[^ ]*static' + TUMBLR + r'[^ ]*', re.I) BLOG = rec(r'(?P<protocol>http[s]?://)(?P<name>.*)' + TUMBLR + r'[/]?', re.I)
import re from re import compile as rec import linkshit import linkshit_format moin_types = linkshit_format.bnw_autolinks + ( ("emph", rec(ur"(?<!:)//"), lambda m: ()), ("strong", rec(ur"\*\*"), lambda m: ()), ( "namedlink", rec(ur"""\[\[\s*(?P<link_target>.+?)\s*[|]\s*(?P<link_text>.+?)\s*]]"""), lambda m: (m.group("link_target"), m.group("link_text")), ), ( "source", rec(ur"""{{{(?:#!([0-9A-Za-z]+)\s+)?(.*?)}}}""", re.MULTILINE | re.DOTALL), lambda m: (m.group(1), m.group(2)), ), ) formatting_tags = {"emph": ("<i>", "</i>"), "strong": ("<b>", "</b>")} # moin_parser = LinkParser(types=moin_types + shittypes) # plain_parser = LinkParser(types=shittypes) class MoinMoinFormat(linkshit_format.LinkShitFormat): def __init__(self): parser = linkshit.LinkParser(types=moin_types)
:param string url: [required] request url :param result: [required] request response :type result: Response """ einfo = CacheInfo( url=url, status_code=result.status_code, headers=[db.Blob('%s:%s' % (k, v)) for k, v in result.headers.items()], content=result.content) einfo.put() REQUEST2XML = { # /projects/#{project_id}/time_entries.xml # /todo_items/#{todo_item_id}/time_entries.xml rec(r".*\/time_entries\.xml"): ("time-entry", ), # /time_entries/#{id}.xml rec(r".*\/time_entries\/\d*\.xml"): ("time-entry", ), # /todo_lists/#{todo_list_id}/todo_items.xml rec(r".*\/todo_items\.xml"): ("todo-item", ), # /todo_items/#{id}.xml rec(r".*\/todo_items\/\d*\.xml"): ("todo-item", ), # /projects/#{project_id}/todo_lists.xml rec(r".*\/todo_lists\.xml"): ("todo-list", ), # /todo_lists/#{id}.xml` rec(r".*\/todo_lists\/\d*\.xml"): ("todo-list", ), } def get_xml_for_request(url): """ Get xml base tags for request
:param string url: [required] request url :param result: [required] request response :type result: Response """ einfo = CacheInfo(url=url, status_code=result.status_code, headers=[db.Blob('%s:%s' % (k, v)) for k, v in result.headers.items()], content=result.content) einfo.put() REQUEST2XML = { # /projects/#{project_id}/time_entries.xml # /todo_items/#{todo_item_id}/time_entries.xml rec(r".*\/time_entries\.xml"): ("time-entry",), # /time_entries/#{id}.xml rec(r".*\/time_entries\/\d*\.xml"): ("time-entry",), # /todo_lists/#{todo_list_id}/todo_items.xml rec(r".*\/todo_items\.xml"): ("todo-item",), # /todo_items/#{id}.xml rec(r".*\/todo_items\/\d*\.xml"): ("todo-item",), # /projects/#{project_id}/todo_lists.xml rec(r".*\/todo_lists\.xml"): ("todo-list",), # /todo_lists/#{id}.xml` rec(r".*\/todo_lists\/\d*\.xml"): ("todo-list",), } def get_xml_for_request(url): """ Get xml base tags for request
#!/usr/bin/python3 ## ## Simple prototype for hwk2lua ## ## TODO: Handle strings like this: [[ string ]] from re import MULTILINE, compile as rec import sys ws_rx = rec(" +") op_rx = rec('(^|\-\-|".*?"|\'.*?\'|=>|{|})', MULTILINE) def hwk2lua(text): out = [] segments = [] ## Break the text into segments last = 0 for m in op_rx.finditer(text): start, end = m.span() if start != last: segments.append(text[last:start]) segments.append(m.group()) last = end segments.append(text[last:]) ## Edit the segments into outputs last = ""