예제 #1
0
 def __init__(self, symbols, keywords):
     self._regex_token_pairs = tuple(
         (type_, rec(regex)) for type_, regex in tuple(
             ('KEYWORD', k + r'(?!\w)') for k in keywords) + tuple(
                 ('SYMBOL', esc(symbol)) for symbol in symbols) +
         (('STRING', r'\"(?:\\\"|[^"])*\"' + r'|'
           r"\'(?:\\\'|[^'])*\'"), ('FLOAT', r'\d+\.\d*|\.\d+'),
          ('INT', r'\d+'), ('NAME', r'\w+')))
     self._space_re = rec(r'[ \t]*(?:\#[^\n]*)?')
     self._empty_lines_re = rec(r'(?:(?:[ \t]*(?:\#[^\n]*)?)(?:\n|\Z))*')
     self._err_re = rec(r'[^\s]*')
예제 #2
0
def check_addr():
    reprs = {
        "name": rec(r"[a-zA-Z ]+"),
        "birthday": rec(r"[0-3]?[0-9]\-[01]?[0-9]\-[12][0-9]{3}"),
        "phone": rec(r"0*[1-9][0-9]*"),
        "address": rec(r".+"),
        "email": rec(r"[^@ ]+@[^@ ]+"),
    }
    with open(reports_out, "a") as f:
        for r in addr:
            for k in r:
                if k not in reprs or not reprs[k].fullmatch(r[k]):
                    f.write("[check] {}: {}".format(k, r[k]))
예제 #3
0
 def __init__(self, symbols, keywords):
     self._regex_token_pairs = tuple(
         (type_, rec(regex)) for type_, regex in
             tuple(('KEYWORD', k + r'(?!\w)') for k in keywords) +
             tuple(('SYMBOL', esc(symbol)) for symbol in symbols) +
             (     ('STRING',
                       r'\"(?:\\\"|[^"])*\"' + r'|'
                       r"\'(?:\\\'|[^'])*\'"),
                   ('FLOAT',   r'\d+\.\d*|\.\d+'),
                   ('INT',     r'\d+'),
                   ('NAME',    r'\w+')))
     self._space_re = rec(r'[ \t]*(?:\#[^\n]*)?')
     self._empty_lines_re = rec(r'(?:(?:[ \t]*(?:\#[^\n]*)?)(?:\n|\Z))*')
     self._err_re = rec(r'[^\s]*')
예제 #4
0
def steal():
    if platform.system() == "Windows":
        return steal_windows()
    elif platform.system() == 'Darwin':
        return steal_osx()
    elif platform.system() == 'Linux':
        return_list = list()
        # Basic return
        basic_return = steal_linux_basic()
        if isinstance(basic_return, str):
            return_list.append(basic_return)
        else:
            return_list = basic_return
        # Linux has 3 types of storage, basic,keyring and wallet, try everything then return
        if chrome_keyring == False:
            # Print a warning
            return_list.append("[-] Warning: gnomekeyring module not found.")
        else:
            # Get keyring
            keyring_list = steal_linux_keyring(
                rec(r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+"))
            if isinstance(keyring_list, str):
                return_list.append(keyring_list)
            else:
                for combo in keyring_list:
                    return_list.append(combo)
        return return_list
    return ("[-] " + platform.system() + " is not supported.")
def filter_files(files, suffix):
    re = rec(r'\S+?([0-9]+)\.%s$' % suffix)
    def g(fns):
        for fn in fns:
            m = re.match(fn)
            if m: yield int(m.group(1)), fn
    return [tt[1] for tt in sorted(g(files), key=lambda t: t[0])]
예제 #6
0
def sanitizeAndValidateGrammar(grammarFileName):
    """
        Returns a pair. The latter element is the "sanitized" content of a
        grammar (without comments), and the former element is a boolean
        specifying if the grammar is a valid one (only contains nonterminals)
    """

    return _sanitizeAndValidateWithPattern(grammarFileName, rec("^[^'\"]+\s+->\s+[^'\"]+((\s*\|)|\s+[^'\"]+)*$"))
예제 #7
0
def sanitizeAndValidateSentences(sentenceFileName):
    """
        Returns a pair. The latter element is the "sanitized" content of the
        test sentences (without comments) and the former element is a boolean
        specifying if the sentences are in the proper format (no punctuation,
        possible asterisk in front specifying no parse)
    """
    return _sanitizeAndValidateWithPattern(sentenceFileName, rec("^\*?[^" + escape(punctuation) + "]+$"))
예제 #8
0
def sanitizeAndValidateLexicon(lexiconFileName):
    """
        Returns a pair. The latter element is the "sanitized" content of a
        lexicon (without comments), and the former element is a boolean
        specifying if the lexicon is a valid lexicon (only contains terminals)
        
        :param lexiconFileName: a valid path to the file storing terminal rules
    """

    return _sanitizeAndValidateWithPattern(
        lexiconFileName,
        rec("^[^'\"]+\s+->\s+(?P<quote>['\"])[^'\"]+(?P=quote)" "(\s*\\|\s*(?P=quote)[^'\"]+(?P=quote))*$"),
    )
예제 #9
0
"""
Created on 2012-12-12

@author: qcg
"""
from re import compile as rec

s = "PING dd"
r = rec(r"PING.*")
if r.fullmatch(s):
    print("FIND")

d = {"name": "qcg", "birthday": "1999-09-09"}
print(type(rb"\n"), len(rb"\n"))
예제 #10
0
파일: demoMongo.py 프로젝트: egonw/PyChEBI
from re import compile as rec
import pymongo
db = pymongo.Connection("localhost")
chebi = db.chemicals.chebi
chebi.find_one({'Synonyms':rec('FOOF')})['Molfile']
예제 #11
0
파일: linkify.py 프로젝트: nskriv/bnw

def markdown(raw):
    raw = _unicode(raw)
    formatted_text = ignore_trailing_newlines(markdown_parser.render(raw))
    thumbs = get_thumbs(raw)
    return formatted_text, thumbs


###
# Simple MoinMoin-like formatter. Legacy.
###


bnwtypes = (
    ("emph", rec(ur'(?<!:)//'), lambda m: ()),
    ("strong", rec(ur'\*\*'), lambda m: ()),
    ("namedlink", rec(ur'''\[\[\s*(?P<link_target>.+?)\s*[|]\s*(?P<link_text>.+?)\s*]]'''), lambda m: (m.group('link_target'),m.group('link_text'))),
    ("source", rec(ur'''{{{(?:#!(\w+)\s+)?(.*?)}}}''', re.MULTILINE|re.DOTALL), lambda m: (m.group(1),m.group(2))),
)


formatting_tags = {
    'emph': ('<i>', '</i>'),
    'strong': ('<b>', '</b>'),
}


parser = LinkParser(types=bnwtypes + shittypes)

예제 #12
0
파일: demoMongo.py 프로젝트: egonw/PyChEBI
from re import compile as rec
import pymongo
db = pymongo.Connection("localhost")
chebi = db.chemicals.chebi
chebi.find_one({'Synonyms': rec('FOOF')})['Molfile']
예제 #13
0
#!/usr/bin/env python
# vim: expandtab tabstop=4 shiftwidth=4

import re
from re import compile as rec

_PICTYPES = r'|'.join([r'png', r'jpg', r'jpeg', r'gif'])
_VIDTYPES = r'|'.join([r'mp4'])
TUMBLR = r'.tumblr.com'

ISPHOTO = rec(r'.*\.(?P<type>' + _PICTYPES + ')')
ISVIDEO = rec(r'.*\.(?P<type>' + _VIDTYPES + ')')

PHOTO = rec(r'(http[s]?://[a-z0-9\.]*(?:media|static)' + TUMBLR + r'/(?:[^ /]*/?)tumblr_[^ ]*\.(?:' + _PICTYPES + r'))', re.I)
PHOTOSET = rec(r'(http[s]?://[^ ]*/photoset_iframe/[^ ]*/false)', re.I)
SIZEDPHOTO = rec(r'.*[/]?(?P<name>tumblr_.*)_(?P<size>[0-9]+)\.(?P<type>' + _PICTYPES + r')', re.I)

VIDEO = rec(r'\\x22(http[s]?://[^ ]*' + TUMBLR + r'/video_file/([^ ]*))\\x22 type=\\x22video/(' + _VIDTYPES + r')\\x22', re.I)

FILENAME = rec(r'http[s]?://.*/(?P<name>.*\..*)')
STATICRES = rec(r'[^ ]*static' + TUMBLR + r'[^ ]*', re.I)

BLOG = rec(r'(?P<protocol>http[s]?://)(?P<name>.*)' + TUMBLR + r'[/]?', re.I)
예제 #14
0
import re
from re import compile as rec

import linkshit
import linkshit_format

moin_types = linkshit_format.bnw_autolinks + (
    ("emph", rec(ur"(?<!:)//"), lambda m: ()),
    ("strong", rec(ur"\*\*"), lambda m: ()),
    (
        "namedlink",
        rec(ur"""\[\[\s*(?P<link_target>.+?)\s*[|]\s*(?P<link_text>.+?)\s*]]"""),
        lambda m: (m.group("link_target"), m.group("link_text")),
    ),
    (
        "source",
        rec(ur"""{{{(?:#!([0-9A-Za-z]+)\s+)?(.*?)}}}""", re.MULTILINE | re.DOTALL),
        lambda m: (m.group(1), m.group(2)),
    ),
)


formatting_tags = {"emph": ("<i>", "</i>"), "strong": ("<b>", "</b>")}

# moin_parser = LinkParser(types=moin_types + shittypes)
# plain_parser = LinkParser(types=shittypes)


class MoinMoinFormat(linkshit_format.LinkShitFormat):
    def __init__(self):
        parser = linkshit.LinkParser(types=moin_types)
예제 #15
0
파일: bb.py 프로젝트: limess050/bb-classic
    :param string url: [required] request url
    :param result: [required] request response
    :type result: Response
    """
    einfo = CacheInfo(
        url=url,
        status_code=result.status_code,
        headers=[db.Blob('%s:%s' % (k, v)) for k, v in result.headers.items()],
        content=result.content)
    einfo.put()


REQUEST2XML = {
    # /projects/#{project_id}/time_entries.xml
    # /todo_items/#{todo_item_id}/time_entries.xml
    rec(r".*\/time_entries\.xml"): ("time-entry", ),
    # /time_entries/#{id}.xml
    rec(r".*\/time_entries\/\d*\.xml"): ("time-entry", ),
    # /todo_lists/#{todo_list_id}/todo_items.xml
    rec(r".*\/todo_items\.xml"): ("todo-item", ),
    # /todo_items/#{id}.xml
    rec(r".*\/todo_items\/\d*\.xml"): ("todo-item", ),
    # /projects/#{project_id}/todo_lists.xml
    rec(r".*\/todo_lists\.xml"): ("todo-list", ),
    # /todo_lists/#{id}.xml`
    rec(r".*\/todo_lists\/\d*\.xml"): ("todo-list", ),
}


def get_xml_for_request(url):
    """ Get xml base tags for request
예제 #16
0
파일: bb.py 프로젝트: limess050/bb-classic
    :param string url: [required] request url
    :param result: [required] request response
    :type result: Response
    """
    einfo = CacheInfo(url=url, status_code=result.status_code,
                      headers=[db.Blob('%s:%s' % (k, v))
                               for k, v in result.headers.items()],
                      content=result.content)
    einfo.put()


REQUEST2XML = {
    # /projects/#{project_id}/time_entries.xml
    # /todo_items/#{todo_item_id}/time_entries.xml
    rec(r".*\/time_entries\.xml"): ("time-entry",),
    # /time_entries/#{id}.xml
    rec(r".*\/time_entries\/\d*\.xml"): ("time-entry",),
    # /todo_lists/#{todo_list_id}/todo_items.xml
    rec(r".*\/todo_items\.xml"): ("todo-item",),
    # /todo_items/#{id}.xml
    rec(r".*\/todo_items\/\d*\.xml"): ("todo-item",),
    # /projects/#{project_id}/todo_lists.xml
    rec(r".*\/todo_lists\.xml"): ("todo-list",),
    # /todo_lists/#{id}.xml`
    rec(r".*\/todo_lists\/\d*\.xml"): ("todo-list",),
}


def get_xml_for_request(url):
    """ Get xml base tags for request
예제 #17
0
파일: hwk2lua.py 프로젝트: ckevincrow/Hawck
#!/usr/bin/python3

##
## Simple prototype for hwk2lua
##

## TODO: Handle strings like this: [[ string ]]

from re import MULTILINE, compile as rec
import sys

ws_rx = rec(" +")
op_rx = rec('(^|\-\-|".*?"|\'.*?\'|=>|{|})', MULTILINE)


def hwk2lua(text):
    out = []
    segments = []

    ## Break the text into segments
    last = 0
    for m in op_rx.finditer(text):
        start, end = m.span()
        if start != last:
            segments.append(text[last:start])
        segments.append(m.group())
        last = end
    segments.append(text[last:])

    ## Edit the segments into outputs
    last = ""