Python re_compile Examples, web.re_compile Python Examples

Example #1

0

Show file

 def POST(self, key):
     if web.re_compile('/(people/[^/]+)').match(
             key) and spamcheck.is_spam():
         return render_template(
             'message.html', 'Oops',
             'Something went wrong. Please try again later.')
     return core.edit.POST(self, key)

Example #2

0

Show file

def split_key(bib_key):
    """
        >>> split_key('1234567890')
        ('isbn_10', '1234567890')
        >>> split_key('ISBN:1234567890')
        ('isbn_10', '1234567890')
        >>> split_key('ISBN1234567890')
        ('isbn_10', '1234567890')
        >>> split_key('ISBN1234567890123')
        ('isbn_13', '1234567890123')
        >>> split_key('LCCNsa 64009056')
        ('lccn', 'sa 64009056')
        >>> split_key('badkey')
        (None, None)
    """
    bib_key = bib_key.strip()
    if not bib_key:
        return None, None

    valid_keys = ['isbn', 'lccn', 'oclc', 'ocaid', 'olid']
    key, value = None, None

    # split with : when possible
    if ':' in bib_key:
        key, value = bib_key.split(':', 1)
        key = key.lower()
    else:
        # try prefix match
        for k in valid_keys:
            if bib_key.lower().startswith(k):
                key = k
                value = bib_key[len(k):]
                continue

    # treat plain number as ISBN
    if key is None and bib_key[0].isdigit():
        key = 'isbn'
        value = bib_key

    # treat OLxxxM as OLID
    re_olid = web.re_compile('OL\d+M(@\d+)?')
    if key is None and re_olid.match(bib_key.upper()):
        key = 'olid'
        value = bib_key.upper()

    # decide isbn_10 or isbn_13 based on length.
    if key == 'isbn':
        if len(value.replace('-', '')) == 13:
            key = 'isbn_13'
        else:
            key = 'isbn_10'

    if key == 'oclc':
        key = 'oclc_numbers'

    if key == 'olid':
        key = 'key'
        value = '/books/' + value.upper()

    return key, value

Example #3

0

Show file

File: utils.py Project: Arpanray01/Open-Library

def parse_toc_row(line):
    """Parse one row of table of contents.

        >>> def f(text):
        ...     d = parse_toc_row(text)
        ...     return (d['level'], d['label'], d['title'], d['pagenum'])
        ...
        >>> f("* chapter 1 | Welcome to the real world! | 2")
        (1, 'chapter 1', 'Welcome to the real world!', '2')
        >>> f("Welcome to the real world!")
        (0, '', 'Welcome to the real world!', '')
        >>> f("** | Welcome to the real world! | 2")
        (2, '', 'Welcome to the real world!', '2')
        >>> f("|Preface | 1")
        (0, '', 'Preface', '1')
        >>> f("1.1 | Apple")
        (0, '1.1', 'Apple', '')
    """
    RE_LEVEL = web.re_compile("(\**)(.*)")
    level, text = RE_LEVEL.match(line.strip()).groups()

    if "|" in text:
        tokens = text.split("|", 2)
        label, title, page = pad(tokens, 3, '')
    else:
        title = text
        label = page = ""

    return web.storage(level=len(level),
                       label=label.strip(),
                       title=title.strip(),
                       pagenum=page.strip())

Example #4

0

Show file

def find_page():
    path = web.ctx.path
    encoding = web.ctx.get('encoding')

    # I don't about this mode.
    if encoding not in encodings:
        raise web.HTTPError("406 Not Acceptable", {})

    # encoding can be specified as part of path, strip the encoding part of path.
    if encoding:
        path = web.rstrips(path, "." + encoding)

    for p in get_sorted_paths():
        m = web.re_compile('^' + p + '$').match(path)
        if m:
            cls = pages[p].get(encoding) or pages[p].get(None)
            args = m.groups()

            # FeatureFlags support.
            # A handler can be enabled only if a feature is active.
            if hasattr(cls,
                       "is_enabled") and bool(cls().is_enabled()) is False:
                continue

            return cls, args
    return None, None

Example #5

0

Show file

File: server.py Project: termim/infogami

def request(path, method, data):
    """Fakes the web request.
    Useful when infobase is not run as a separate process.
    """
    web.ctx.infobase_localmode = True
    web.ctx.infobase_input = data or {}
    web.ctx.infobase_method = method
    
    def get_class(classname):
        if '.' in classname:
            modname, classname = classname.rsplit('.', 1)
            mod = __import__(modname, None, None, ['x'])
            fvars = mod.__dict__
        else:
            fvars = globals()
        return fvars[classname]

    try:
        # hack to make cache work for local infobase connections
        cache.loadhook()

        for pattern, classname in web.group(app.mapping, 2):
            m = web.re_compile('^' + pattern + '$').match(path)
            if m:
                args = m.groups()
                cls = get_class(classname)
                tocall = getattr(cls(), method)
                return tocall(*args)
        raise web.notfound()
    finally:
        # hack to make cache work for local infobase connections
        cache.unloadhook()

Example #6

0

Show file

File: utils.py Project: user404d/openlibrary

def fuzzy_find(value, options, stopwords=[]):
    """Try find the option nearest to the value.

        >>> fuzzy_find("O'Reilly", ["O'Reilly Inc", "Addison-Wesley"])
        "O'Reilly Inc"
    """
    if not options:
        return value

    rx = web.re_compile(r"[-_\.&, ]+")

    # build word frequency
    d = defaultdict(list)
    for option in options:
        for t in rx.split(option):
            d[t].append(option)

    # find score for each option
    score = defaultdict(lambda: 0)
    for t in rx.split(value):
        if t.lower() in stopwords:
            continue
        for option in d[t]:
            score[option] += 1

    # take the option with maximum score
    return max(options, key=score.__getitem__)

Example #7

0

Show file

File: dynlinks.py Project: mikemaehr/openlibrary

def split_key(bib_key):
    """
        >>> split_key('1234567890')
        ('isbn_10', '1234567890')
        >>> split_key('ISBN:1234567890')
        ('isbn_10', '1234567890')
        >>> split_key('ISBN1234567890')
        ('isbn_10', '1234567890')
        >>> split_key('ISBN1234567890123')
        ('isbn_13', '1234567890123')
        >>> split_key('LCCNsa 64009056')
        ('lccn', 'sa 64009056')
        >>> split_key('badkey')
        (None, None)
    """
    bib_key = bib_key.strip()
    if not bib_key:
        return None, None

    valid_keys = ['isbn', 'lccn', 'oclc', 'ocaid', 'olid']
    key, value = None, None

    # split with : when possible
    if ':' in bib_key:
        key, value = bib_key.split(':', 1)
        key = key.lower()
    else:
        # try prefix match
        for k in valid_keys:
            if bib_key.lower().startswith(k):
                key = k
                value = bib_key[len(k):]
                continue
                
    # treat plain number as ISBN
    if key is None and bib_key[0].isdigit():
        key = 'isbn'
        value = bib_key
        
    # treat OLxxxM as OLID
    re_olid = web.re_compile('OL\d+M(@\d+)?')
    if key is None and re_olid.match(bib_key.upper()):
        key = 'olid'
        value = bib_key.upper()
    
    # decide isbn_10 or isbn_13 based on length.
    if key == 'isbn':
        if len(value.replace('-', '')) == 13:
            key = 'isbn_13'
        else:
            key = 'isbn_10'

    if key == 'oclc':
        key = 'oclc_numbers'
        
    if key == 'olid':
        key = 'key'
        value = '/books/' + value.upper()

    return key, value

Example #8

0

Show file

File: app.py Project: rlugojr/infogami

def find_page():
    path = web.ctx.path
    encoding = web.ctx.get("encoding")

    # I don't about this mode.
    if encoding not in encodings:
        raise web.HTTPError("406 Not Acceptable", {})

    # encoding can be specified as part of path, strip the encoding part of path.
    if encoding:
        path = web.rstrips(path, "." + encoding)

    for p in get_sorted_paths():
        m = web.re_compile("^" + p + "$").match(path)
        if m:
            cls = pages[p].get(encoding) or pages[p].get(None)
            args = m.groups()

            # FeatureFlags support.
            # A handler can be enabled only if a feature is active.
            if hasattr(cls, "is_enabled") and bool(cls().is_enabled()) is False:
                continue

            return cls, args
    return None, None

Example #9

0

Show file

File: utils.py Project: bmmcginty/openlibrary

def parse_toc_row(line):
    """Parse one row of table of contents.

        >>> def f(text):
        ...     d = parse_toc_row(text)
        ...     return (d['level'], d['label'], d['title'], d['pagenum'])
        ...
        >>> f("* chapter 1 | Welcome to the real world! | 2")
        (1, 'chapter 1', 'Welcome to the real world!', '2')
        >>> f("Welcome to the real world!")
        (0, '', 'Welcome to the real world!', '')
        >>> f("** | Welcome to the real world! | 2")
        (2, '', 'Welcome to the real world!', '2')
        >>> f("|Preface | 1")
        (0, '', 'Preface', '1')
        >>> f("1.1 | Apple")
        (0, '1.1', 'Apple', '')
    """
    RE_LEVEL = web.re_compile("(\**)(.*)")
    level, text = RE_LEVEL.match(line.strip()).groups()

    if "|" in text:
        tokens = text.split("|", 2)
        label, title, page = pad(tokens, 3, '')
    else:
        title = text
        label = page = ""

    return web.storage(level=len(level), label=label.strip(), title=title.strip(), pagenum=page.strip())

Example #10

0

Show file

File: utils.py Project: bmmcginty/openlibrary

def fuzzy_find(value, options, stopwords=[]):
    """Try find the option nearest to the value.
    
        >>> fuzzy_find("O'Reilly", ["O'Reilly Inc", "Addison-Wesley"])
        "O'Reilly Inc"
    """
    if not options:
        return value
        
    rx = web.re_compile("[-_\.&, ]+")
    
    # build word frequency
    d = defaultdict(list)
    for option in options:
        for t in rx.split(option):
            d[t].append(option)
    
    # find score for each option
    score = defaultdict(lambda: 0)
    for t in rx.split(value):
        if t.lower() in stopwords:
            continue
        for option in d[t]:
            score[option] += 1
            
    # take the option with maximum score
    return max(options, key=score.__getitem__)

Example #11

0

Show file

File: mock_infobase.py Project: randomecho/openlibrary

    def filter_index(self, index, name, value):
        operations = {
            "~": lambda i, value: isinstance(i.value, basestring) and i.value.startswith(web.rstrips(value, "*")),
            "<": lambda i, value: i.value < value,
            ">": lambda i, value: i.value > value,
            "!": lambda i, value: i.value != value,
            "=": lambda i, value: i.value == value,
        }
        pattern = ".*([%s])$" % "".join(operations)
        rx = web.re_compile(pattern)
        m = rx.match(name)

        if m:
            op = m.group(1)
            name = name[:-1]
        else:
            op = "="

        f = operations[op]

        if isinstance(value, list): # Match any of the elements in value if it's a list
            for i in index:
                if i.name == name and any(f(i, v) for v in value):
                    yield i.key
        else: # Otherwise just match directly
            for i in index:
                if i.name == name and f(i, value):
                    yield i.key

Example #12

0

Show file

    def filter_index(self, index, name, value):
        operations = {
            "~": lambda i, value: isinstance(i.value, basestring) and i.value.startswith(web.rstrips(value, "*")),
            "<": lambda i, value: i.value < value,
            ">": lambda i, value: i.value > value,
            "!": lambda i, value: i.value != value,
            "=": lambda i, value: i.value == value,
        }
        pattern = ".*([%s])$" % "".join(operations)
        rx = web.re_compile(pattern)
        m = rx.match(name)
        
        if m: 
            op = m.group(1)
            name = name[:-1]
        else:
            op = "="
            
        f = operations[op]

        if isinstance(value, list): # Match any of the elements in value if it's a list
            for i in index:
                if i.name == name and any(f(i, v) for v in value):
                    yield i.key
        else: # Otherwise just match directly
            for i in index:
                if i.name == name and f(i, value):
                    yield i.key

Example #13

0

Show file

File: models.py Project: user404d/openlibrary

 def parse(self, s):
     """Parse the string and return storage object with specified fields and units."""
     pattern = "^" + " *x *".join("([0-9.]*)"
                                  for f in self.fields) + " *(.*)$"
     rx = web.re_compile(pattern)
     m = rx.match(s)
     return m and web.storage(zip(self.fields + ["units"], m.groups()))

Example #14

0

Show file

File: writequery.py Project: khabdrick/infogami

 def validate_properties(self, data):
     rx = web.re_compile('^[a-z][a-z0-9_]*$')
     for key in data:
         if not rx.match(key):
             raise common.BadData(
                 message="Bad Property: %s" % repr(key), at=dict(key=self.key)
             )

Example #15

0

Show file

File: dynlinks.py Project: cdrini/openlibrary

def split_key(bib_key: str) -> tuple[Optional[str], Optional[str]]:
    """
    >>> split_key('1234567890')
    ('isbn_', '1234567890')
    >>> split_key('ISBN:1234567890')
    ('isbn_', '1234567890')
    >>> split_key('ISBN1234567890')
    ('isbn_', '1234567890')
    >>> split_key('ISBN1234567890123')
    ('isbn_', '1234567890123')
    >>> split_key('LCCNsa 64009056')
    ('lccn', 'sa 64009056')
    >>> split_key('badkey')
    (None, None)
    """
    bib_key = bib_key.strip()
    if not bib_key:
        return None, None

    valid_keys = ['isbn', 'lccn', 'oclc', 'ocaid', 'olid']
    key, value = None, None

    # split with : when possible
    if ':' in bib_key:
        key, value = bib_key.split(':', 1)
        key = key.lower()
    else:
        # try prefix match
        for k in valid_keys:
            if bib_key.lower().startswith(k):
                key = k
                value = bib_key[len(k):]
                continue

    # treat plain number as ISBN
    if key is None and bib_key[0].isdigit():
        key = 'isbn'
        value = bib_key

    # treat OLxxxM as OLID
    re_olid = web.re_compile(r'OL\d+M(@\d+)?')
    if key is None and re_olid.match(bib_key.upper()):
        key = 'olid'
        value = bib_key.upper()

    if key == 'isbn':
        # 'isbn_' is a special indexed field that gets both isbn_10 and isbn_13 in the normalized form.
        key = 'isbn_'
        value = (value
                 or "").replace("-", "")  # normalize isbn by stripping hyphens

    if key == 'oclc':
        key = 'oclc_numbers'

    if key == 'olid':
        key = 'key'
        value = '/books/' + (value or "").upper()

    return key, value

Example #16

0

Show file

File: report_errors.py Project: hornc/openlibrary-1

def parse_error(path):
    html = open(path).read(10000)
    soup = BeautifulSoup(html)

    h1 = web.htmlunquote(soup.body.h1.string or "")
    h2 = web.htmlunquote(soup.body.h2.string or "")
    message = h1.split('at')[0].strip() + ': ' + (h2 and h2.splitlines()[0])

    code, url = [web.htmlunquote(td.string) for td in soup.body.table.findAll('td')]

    # strip common prefixes
    code = web.re_compile(".*/(?:staging|production)/(openlibrary|infogami|web)").sub(r'\1', code)

    m = web.re_compile('(\d\d)(\d\d)(\d\d)(\d{6})').match(web.numify(os.path.basename(path)))
    hh, mm, ss, microsec = m.groups()

    return web.storage(url=url, message=message, code=code, time="%s:%s:%s" % (hh, mm, ss))

Example #17

0

Show file

File: dynlinks.py Project: lukasklein/openlibrary

def split_key(bib_key):
    """
        >>> split_key('1234567890')
        ('isbn_', '1234567890')
        >>> split_key('ISBN:1234567890')
        ('isbn_', '1234567890')
        >>> split_key('ISBN1234567890')
        ('isbn_', '1234567890')
        >>> split_key('ISBN1234567890123')
        ('isbn_', '1234567890123')
        >>> split_key('LCCNsa 64009056')
        ('lccn', 'sa 64009056')
        >>> split_key('badkey')
        (None, None)
    """
    bib_key = bib_key.strip()
    if not bib_key:
        return None, None

    valid_keys = ['isbn', 'lccn', 'oclc', 'ocaid', 'olid']
    key, value = None, None

    # split with : when possible
    if ':' in bib_key:
        key, value = bib_key.split(':', 1)
        key = key.lower()
    else:
        # try prefix match
        for k in valid_keys:
            if bib_key.lower().startswith(k):
                key = k
                value = bib_key[len(k):]
                continue

    # treat plain number as ISBN
    if key is None and bib_key[0].isdigit():
        key = 'isbn'
        value = bib_key

    # treat OLxxxM as OLID
    re_olid = web.re_compile('OL\d+M(@\d+)?')
    if key is None and re_olid.match(bib_key.upper()):
        key = 'olid'
        value = bib_key.upper()

    if key == 'isbn':
        # 'isbn_' is a special indexed field that gets both isbn_10 and isbn_13 in the normalized form.
        key = 'isbn_'
        value = value.replace("-", "") # normalize isbn by stripping hyphens

    if key == 'oclc':
        key = 'oclc_numbers'

    if key == 'olid':
        key = 'key'
        value = '/books/' + value.upper()

    return key, value

Example #18

0

Show file

File: code.py Project: candeira/openlibrary

 def get_real_path():
     pat = '^(' + '|'.join(p[0] for p in patterns) + ')(?:/.*)?'
     rx = web.re_compile(pat)
     m = rx.match(web.ctx.path)
     if m:
         path = m.group(1)
         return m.group(1)
     else:
         return web.ctx.path

Example #19

0

Show file

    def delegate(self):
        if web.ctx.path == "/admin":
            return self.handle(admin_index)

        for t in admin_tasks:
            m = web.re_compile('^' + t.path + '$').match(web.ctx.path)
            if m:
                return self.handle(t.cls, m.groups(), librarians=t.librarians)
        raise web.notfound()

Example #20

0

Show file

File: code.py Project: harshadsavant/openlibrary

 def delegate(self):
     if web.ctx.path == "/admin":
         return self.handle(admin_index)
         
     for t in admin_tasks:
         m = web.re_compile('^' + t.path + '$').match(web.ctx.path)
         if m:
             return self.handle(t.cls, m.groups())
     raise web.notfound()

Example #21

0

Show file

File: code.py Project: strogo/openlibrary

 def parse_key(self, key):
     """Returns prefix and path from the key.
     """
     m = web.re_compile(r'/subjects/(place:|time:|person:|)(.+)').match(key)
     if m:
         prefix = "/subjects/" + m.group(1)
         path = m.group(2)
         return prefix, path
     else:
         return None, None

Example #22

0

Show file

File: code.py Project: kamdjouduplex/openlibrary

 def parse_key(self, key):
     """Returns prefix and path from the key.
     """
     m = web.re_compile(r'/subjects/(place:|time:|person:|)(.+)').match(key)
     if m:
         prefix = "/subjects/" + m.group(1)
         path = m.group(2)
         return prefix, path
     else:
         return None, None

Example #23

0

Show file

File: addbook.py Project: amoghravish/openlibrary

 def GET(self, key):
     page = web.ctx.site.get(key)
     
     if web.re_compile('/(authors|books|works)/OL.*').match(key):
         if page is None:
             raise web.seeother(key)
         else:
             raise web.seeother(page.url(suffix="/edit"))
     else:
         return core.edit.GET(self, key)

Example #24

0

Show file

File: addbook.py Project: ZaidBJ/openlibrary

    def GET(self, key):
        page = web.ctx.site.get(key)

        if web.re_compile('/(authors|books|works)/OL.*').match(key):
            if page is None:
                raise web.seeother(key)
            else:
                raise web.seeother(page.url(suffix="/edit"))
        else:
            return core.edit.GET(self, key)

Example #25

0

Show file

File: addbook.py Project: lephemere/openlibrary

    def extract_year(self, value):
        """
        Extract just the 4 digit year from a date string.

        :param str value: A freeform string representing a publication date.
        :rtype: str
        :return: a four digit year
        """
        m = web.re_compile(r"(\d\d\d\d)").search(value)
        return m and m.group(1)

Example #26

0

Show file

    def escape(self, query):
        r"""Escape special characters in the query string

        >>> solr = Solr("")
        >>> solr.escape("a[b]c")
        'a\\[b\\]c'
        """
        chars = r'+-!(){}[]^"~*?:\\'
        pattern = "([%s])" % re.escape(chars)
        return web.re_compile(pattern).sub(r'\\\1', query)

Example #27

0

Show file

File: solr.py Project: internetarchive/openlibrary

    def escape(self, query):
        r"""Escape special characters in the query string

            >>> solr = Solr("")
            >>> solr.escape("a[b]c")
            'a\\[b\\]c'
        """
        chars = r'+-!(){}[]^"~*?:\\'
        pattern = "([%s])" % re.escape(chars)
        return web.re_compile(pattern).sub(r'\\\1', query)

Example #28

0

Show file

File: i18n.py Project: rlugojr/infogami

    def parse_lang_header():
        """Parses HTTP_ACCEPT_LANGUAGE header."""
        accept_language = web.ctx.get('env', {}).get('HTTP_ACCEPT_LANGUAGE', '')

        re_accept_language = web.re_compile(', *')
        tokens = re_accept_language.split(accept_language)

        # take just the language part. ignore other details.
        # for example `en-gb;q=0.8` will be treated just as `en`.
        langs = [t[:2] for t in tokens]
        return langs and langs[0]

Example #29

0

Show file

File: ol_infobase.py Project: amoghravish/openlibrary

 def GET(self, sitename):
     i = server.input("username")
     
     # Don't allows OLIDs to be usernames
     if web.re_compile(r"OL\d+[A-Z]").match(i.username.upper()):
         return True
     
     key = "/user/" + i.username.lower()
     type_user = get_thing_id("/type/user")
     d = get_db().query("SELECT * from thing WHERE lower(key) = $key AND type=$type_user", vars=locals())
     return bool(d)

Example #30

0

Show file

File: ol_infobase.py Project: traceypooh/openlibrary

 def GET(self, sitename):
     i = server.input("username")
     
     # Don't allows OLIDs to be usernames
     if web.re_compile(r"OL\d+[A-Z]").match(i.username.upper()):
         return True
     
     key = "/user/" + i.username.lower()
     type_user = get_thing_id("/type/user")
     d = get_db().query("SELECT * from thing WHERE lower(key) = $key AND type=$type_user", vars=locals())
     return bool(d)

Example #31

0

Show file

File: test_processors.py Project: ahvigil/openlibrary

 def add(self, doc):
     #@@ UGLY!
     doc = common.parse_query(doc)
     doc = client.Site(None, None)._process_dict(doc)
     
     key = doc['key']
     self.docs[key] = client.create_thing(self, key, doc)
     
     olid = key.split("/")[-1]
     if web.re_compile(r'OL\d+[A-Z]').match(olid):
         self.olids[olid] = key

Example #32

0

Show file

    def add(self, doc):
        #@@ UGLY!
        doc = common.parse_query(doc)
        doc = client.Site(None, None)._process_dict(doc)

        key = doc['key']
        self.docs[key] = client.create_thing(self, key, doc)

        olid = key.split("/")[-1]
        if web.re_compile(r'OL\d+[A-Z]').match(olid):
            self.olids[olid] = key

Example #33

0

Show file

File: i18n.py Project: EdwardBetts/infogami

    def parse_lang_header():
        """Parses HTTP_ACCEPT_LANGUAGE header."""
        accept_language = web.ctx.get('env', {}).get('HTTP_ACCEPT_LANGUAGE', '')

        re_accept_language = web.re_compile(', *')
        tokens = re_accept_language.split(accept_language)

        # take just the language part. ignore other details.
        # for example `en-gb;q=0.8` will be treated just as `en`.
        langs = [t[:2] for t in tokens]
        return langs and langs[0]

Example #34

0

Show file

File: code.py Project: candeira/openlibrary

 def get_readable_path():
     path = get_real_path()
     if web.ctx.get('encoding') is not None:
         return web.ctx.path
     
     for pat, type, property in patterns:
         if web.re_compile('^' + pat + '$').match(path):
             thing = web.ctx.site.get(path)
             if thing is not None and thing.type.key == type and thing[property]:
                 title = thing[property].replace(' ', '-').encode('utf-8')
                 return path + '/' + urllib.quote(title)
     return web.ctx.path

Example #35

0

Show file

    def __init__(self, limit, window_size=600, path_regex="/.*"):
        """Creates a rate-limit processor to limit the number of
        requests/ip in the time frame.

        :param limit: the maxinum number of requests allowed in the given time window.
        :param window_size: the time frame in seconds during which the requests are measured.
        :param path_regex: regular expression to specify which urls are rate-limited.
        """
        self.path_regex = web.re_compile(path_regex)
        self.limit = limit
        self.window_size = window_size
        self.reset(None)

Example #36

0

Show file

File: processors.py Project: RaceList/openlibrary

    def __init__(self, limit, window_size=600, path_regex="/.*"):
        """Creates a rate-limit processor to limit the number of
        requests/ip in the time frame.

        :param limit: the maxinum number of requests allowed in the given time window.
        :param window_size: the time frame in seconds during which the requests are measured.
        :param path_regex: regular expression to specify which urls are rate-limited.
        """
        self.path_regex = web.re_compile(path_regex)
        self.limit = limit
        self.window_size = window_size
        self.reset(None)

Example #37

0

Show file

File: events.py Project: RaceList/openlibrary

 def find_lists(self, changeset):
     """Returns the list entires effected by this change.
     
     When a list is modified, the data of the user and the data of each
     seed are invalidated.
     """
     docs = changeset['docs'] + changeset['old_docs']
     rx = web.re_compile("(/people/[^/]*)/lists/OL\d+L")
     for doc in docs:
         match = doc and rx.match(doc['key'])
         if match:
             yield "d" + match.group(1) # d/users/foo
             for seed in doc.get('seeds', []):
                 yield "d" + self.seed_to_key(seed)

Example #38

0

Show file

File: view.py Project: cclauss/infogami

def parse_db_url(dburl):
    """Parses db url and returns db parameters dictionary.

    >>> parse_db_url("sqlite:///test.db")
    {'dbn': 'sqlite', 'db': 'test.db'}
    >>> parsed = parse_db_url("postgres://*****:*****@dbhost:1234/test")
    >>> sorted(parsed.items())  # doctest: +NORMALIZE_WHITESPACE
    [('db', 'test'), ('dbn', 'postgres'), ('host', 'dbhost'),
     ('port', '1234'), ('pw', 'secret'), ('user', 'joe')]
    >>> sorted(parse_db_url("postgres://joe@/test").items())
    [('db', 'test'), ('dbn', 'postgres'), ('pw', ''), ('user', 'joe')]

    Note: this should be part of web.py
    """
    rx = web.re_compile(
        r"""
        (?P<dbn>\w+)://
        (?:
            (?P<user>\w+)
            (?::(?P<pw>\w+))?
            @
        )?
        (?:
            (?P<host>\w+)
            (?::(?P<port>\w+))?
        )?
        /(?P<db>.*)
    """,
        re.X,
    )
    m = rx.match(dburl)
    if m:
        d = m.groupdict()

        if d['host'] is None:
            del d['host']

        if d['port'] is None:
            del d['port']

        if d['pw'] is None:
            d['pw'] = ''

        if d['user'] is None:
            del d['user']
            del d['pw']

        return d
    else:
        raise ValueError("Invalid database url: %s" % repr(dburl))

Example #39

0

Show file

    def jsemit_ForNode(self, node, indent):
        tok = PythonTokenizer(node.stmt)
        tok.consume_till('in')
        a = node.stmt[:tok.index].strip()  # for i in
        a = a[len("for"):-len("in")].strip()  # strip `for` and `in`

        b = node.stmt[tok.index:-1].strip()  # rest of for stmt excluding :
        b = web.re_compile(r"loop.setup\((.*)\)").match(b).group(1)

        text = ""
        text += indent + f"foreach({py2js(b)}, loop, function(loop, {a}) {{\n"
        text += self.jsemit(node.suite, indent + INDENT)
        text += indent + "});\n"
        return text

Example #40

0

Show file

    def find_lists(self, changeset):
        """Returns the list entries effected by this change.

        When a list is modified, the data of the user and the data of each
        seed are invalidated.
        """
        docs = changeset['docs'] + changeset['old_docs']
        rx = web.re_compile(r"(/people/[^/]*)/lists/OL\d+L")
        for doc in docs:
            match = doc and rx.match(doc['key'])
            if match:
                yield "d" + match.group(1)  # d/users/foo
                for seed in doc.get('seeds', []):
                    yield "d" + self.seed_to_key(seed)

Example #41

0

Show file

File: jsdef.py Project: artmedlar/openlibrary

    def jsemit_ForNode(self, node, indent):
        tok = PythonTokenizer(node.stmt)
        tok.consume_till('in')
        a = node.stmt[:tok.index].strip() # for i in
        a = a[len("for"):-len("in")].strip() # strip `for` and `in`
        
        b = node.stmt[tok.index:-1].strip() # rest of for stmt excluding :
        b = web.re_compile("loop.setup\((.*)\)").match(b).group(1)

        text = ""
        text += indent + "foreach(%s, loop, function(loop, %s) {\n" % (py2js(b), a)
        text += self.jsemit(node.suite, indent + INDENT)
        text += indent + "});\n"
        return text

Example #42

0

Show file

File: processors.py Project: artmedlar/openlibrary

 def get_object(self, key):
     obj = web.ctx.site.get(key)
     if obj is None and key.startswith("/a/"):
         key = "/authors/" + key[len("/a/"):]
         obj = key and web.ctx.site.get(key)
         
     if obj is None and key.startswith("/b/"):
         key = "/books/" + key[len("/b/"):]
         obj = key and web.ctx.site.get(key)
     
     if obj is None and web.re_compile(r"/.*/OL\d+[A-Z]"):
         olid = web.safestr(key).split("/")[-1]
         key = web.ctx.site._request("/olid_to_key?" + urllib.urlencode({"olid": olid})).key
         obj = key and web.ctx.site.get(key)
     return obj

Example #43

0

Show file

    def preload_documents(self, keys):
        identifiers = [k.replace("/books/ia:", "") for k in keys if k.startswith("/books/ia:")]
        #self.preload_ia_items(identifiers)
        re_key = web.re_compile("/(books|works|authors)/OL\d+[MWA]")

        keys2 = set(k for k in keys if re_key.match(k))
        #keys2.update(k for k in self.ia_redirect_cache.values() if k is not None)
        self.preload_documents0(keys2)
        self._preload_works()
        self._preload_authors()
        self._preload_editions()
        self._preload_metadata_of_editions()

        # for all works and authors, find redirects as they'll requested later
        keys3 = [k for k in self.cache if k.startswith("/works/") or k.startswith("/authors/")]
        self.preload_redirects(keys3)

Example #44

0

Show file

    def preload_documents(self, keys):
        identifiers = [k.replace("/books/ia:", "") for k in keys if k.startswith("/books/ia:")]
        #self.preload_ia_items(identifiers)
        re_key = web.re_compile(r"/(books|works|authors)/OL\d+[MWA]")

        keys2 = set(k for k in keys if re_key.match(k))
        #keys2.update(k for k in self.ia_redirect_cache.values() if k is not None)
        self.preload_documents0(keys2)
        self._preload_works()
        self._preload_authors()
        self._preload_editions()
        self._preload_metadata_of_editions()

        # for all works and authors, find redirects as they'll requested later
        keys3 = [k for k in self.cache if k.startswith("/works/") or k.startswith("/authors/")]
        self.preload_redirects(keys3)

Example #45

0

Show file

    def match(path):
        for pat, type, property, default_title in patterns:
            m = web.re_compile('^' + pat).match(path)
            if m:
                prefix = m.group()
                extra = web.lstrips(path, prefix)
                tokens = extra.split("/", 2)

                # `extra` starts with "/". So first token is always empty.
                middle = web.listget(tokens, 1, "")
                suffix = web.listget(tokens, 2, "")
                if suffix:
                    suffix = "/" + suffix

                return type, property, default_title, prefix, middle, suffix
        return None, None, None, None, None, None

Example #46

0

Show file

File: readableurls.py Project: internetarchive/openlibrary

    def match(path):
        for pat, type, property, default_title in patterns:
            m = web.re_compile('^' + pat).match(path)
            if m:
                prefix = m.group()
                extra = web.lstrips(path, prefix)
                tokens = extra.split("/", 2)

                # `extra` starts with "/". So first token is always empty.
                middle = web.listget(tokens, 1, "")
                suffix = web.listget(tokens, 2, "")
                if suffix:
                    suffix = "/" + suffix

                return type, property, default_title, prefix, middle, suffix
        return None, None, None, None, None, None

Example #47

0

Show file

File: code.py Project: anandology/infogami

def get_custom_headers():
    opt = web.ctx.env.get('HTTP_OPT')
    if opt is None:
        return {}
        
    import re
    rx = web.re_compile(r'"(.*)"; ns=(\d\d)')
    m = rx.match(opt.strip())
    
    if m:
        decl_uri, ns = m.groups()
        expected_decl_uri = infogami.config.get('http_ext_header_uri', 'http://infogami.org/api')
        if expected_decl_uri == decl_uri:
            prefix = 'HTTP_%s_' % ns
            return dict((web.lstrips(k, prefix).lower(), v) for k, v in web.ctx.env.items() if k.startswith(prefix))
    else:
        return {}

Example #48

0

Show file

File: view.py Project: paulproteus/infogami

def parse_db_url(dburl):
    """Parses db url and returns db parameters dictionary.

    >>> parse_db_url("sqlite:///test.db")
    {'dbn': 'sqlite', 'db': 'test.db'}
    >>> parse_db_url("postgres://*****:*****@dbhost:1234/test")
    {'pw': 'secret', 'dbn': 'postgres', 'db': 'test', 'host': 'dbhost', 'user': '******', 'port': '1234'}
    >>> parse_db_url("postgres://joe@/test")
    {'pw': '', 'dbn': 'postgres', 'db': 'test', 'user': '******'}

    Note: this should be part of web.py
    """
    rx = web.re_compile(
        """
        (?P<dbn>\w+)://
        (?:
            (?P<user>\w+)
            (?::(?P<pw>\w+))?
            @
        )?
        (?:
            (?P<host>\w+)
            (?::(?P<port>\w+))?
        )?
        /(?P<db>.*)
    """, re.X)
    m = rx.match(dburl)
    if m:
        d = m.groupdict()

        if d['host'] is None:
            del d['host']

        if d['port'] is None:
            del d['port']

        if d['pw'] is None:
            d['pw'] = ''

        if d['user'] is None:
            del d['user']
            del d['pw']

        return d
    else:
        raise ValueError("Invalid database url: %s" % repr(dburl))

Example #49

0

Show file

File: models.py Project: ziwar/openlibrary

 def set_classifications(self, classifications):
     names = ["dewey_decimal_class", "lc_classifications"]
     d = defaultdict(list)
     for c in classifications:
         if 'name' not in c or 'value' not in c or not web.re_compile("[a-z0-9_]*").match(c['name']):
             continue
         d[c['name']].append(c['value'])
         
     for name in names:
         self._getdata().pop(name, None)
     self.classifications = {}
     
     for name, value in d.items():
         if name in names:
             self[name] = value
         else:
             self.classifications[name] = value

Example #50

0

Show file

    def set_classifications(self, classifications):
        names = ["dewey_decimal_class", "lc_classifications"]
        d = defaultdict(list)
        for c in classifications:
            if 'name' not in c or 'value' not in c or not web.re_compile("[a-z0-9_]*").match(c['name']):
                continue
            d[c['name']].append(c['value'])

        for name in names:
            self._getdata().pop(name, None)
        self.classifications = {}

        for name, value in d.items():
            if name in names:
                self[name] = value
            else:
                self.classifications[name] = value

Example #51

0

Show file

File: view.py Project: internetarchive/infogami

def parse_db_url(dburl):
    """Parses db url and returns db parameters dictionary.

    >>> parse_db_url("sqlite:///test.db")
    {'dbn': 'sqlite', 'db': 'test.db'}
    >>> parse_db_url("postgres://*****:*****@dbhost:1234/test")
    {'pw': 'secret', 'dbn': 'postgres', 'db': 'test', 'host': 'dbhost', 'user': '******', 'port': '1234'}
    >>> parse_db_url("postgres://joe@/test")
    {'pw': '', 'dbn': 'postgres', 'db': 'test', 'user': '******'}

    Note: this should be part of web.py
    """
    rx = web.re_compile("""
        (?P<dbn>\w+)://
        (?:
            (?P<user>\w+)
            (?::(?P<pw>\w+))?
            @
        )?
        (?:
            (?P<host>\w+)
            (?::(?P<port>\w+))?
        )?
        /(?P<db>.*)
    """, re.X)
    m = rx.match(dburl)
    if m:
        d = m.groupdict()

        if d['host'] is None:
            del d['host']

        if d['port'] is None:
            del d['port']

        if d['pw'] is None:
            d['pw'] = ''

        if d['user'] is None:
            del d['user']
            del d['pw']

        return d
    else:
        raise ValueError("Invalid database url: %s" % repr(dburl))

Example #52

0

Show file

def get_custom_headers():
    opt = web.ctx.env.get('HTTP_OPT')
    if opt is None:
        return {}

    rx = web.re_compile(r'"(.*)"; ns=(\d\d)')
    m = rx.match(opt.strip())

    if m:
        decl_uri, ns = m.groups()
        expected_decl_uri = infogami.config.get('http_ext_header_uri',
                                                'http://infogami.org/api')
        if expected_decl_uri == decl_uri:
            prefix = 'HTTP_%s_' % ns
            return dict((web.lstrips(k, prefix).lower(), v)
                        for k, v in web.ctx.env.items()
                        if k.startswith(prefix))
    else:
        return {}

Example #53

0

Show file

File: readableurls.py Project: strogo/openlibrary

def _get_object(site, key):
    """Returns the object with the given key.
    
    If the key has an OLID and no object is found with that key, it tries to
    find object with the same OLID. OL database makes sures that OLIDs are
    unique.
    """
    obj = site.get(key)
    if obj is None and key.startswith("/a/"):
        key = "/authors/" + key[len("/a/") :]
        obj = key and site.get(key)

    if obj is None and key.startswith("/b/"):
        key = "/books/" + key[len("/b/") :]
        obj = key and site.get(key)

    if obj is None and web.re_compile(r"/.*/OL\d+[A-Z]"):
        olid = web.safestr(key).split("/")[-1]
        key = site._request("/olid_to_key", data={"olid": olid}).key
        obj = key and site.get(key)
    return obj

Example #54

0

Show file

File: server.py Project: asenchi/infogami

def request(path, method, data):
    """Fakes the web request.
    Useful when infobase is not run as a separate process.
    """
    web.ctx.infobase_localmode = True
    web.ctx.infobase_input = data or {}
    web.ctx.infobase_method = method
    
    def get_class(classname):
        if '.' in classname:
            modname, classname = classname.rsplit('.', 1)
            mod = __import__(modname, None, None, ['x'])
            fvars = mod.__dict__
        else:
            fvars = globals()
        return fvars[classname]

    try:
        # hack to make cache work for local infobase connections
        cache.loadhook()

        mapping = app.mapping

        # Before web.py<0.36, the mapping is a list and need to be grouped. 
        # From web.py 0.36 onwards, the mapping is already grouped.
        # Checking the type to see if we need to group them here.
        if mapping and not isinstance(mapping[0], (list, tuple)):
            mapping = web.group(mapping, 2)

        for pattern, classname in mapping:
            m = web.re_compile('^' + pattern + '$').match(path)
            if m:
                args = m.groups()
                cls = get_class(classname)
                tocall = getattr(cls(), method)
                return tocall(*args)
        raise web.notfound()
    finally:
        # hack to make cache work for local infobase connections
        cache.unloadhook()

Example #55

0

Show file

def request(path, method, data):
    """Fakes the web request.
    Useful when infobase is not run as a separate process.
    """
    web.ctx.infobase_localmode = True
    web.ctx.infobase_input = data or {}
    web.ctx.infobase_method = method

    def get_class(classname):
        if '.' in classname:
            modname, classname = classname.rsplit('.', 1)
            mod = __import__(modname, None, None, ['x'])
            fvars = mod.__dict__
        else:
            fvars = globals()
        return fvars[classname]

    try:
        # hack to make cache work for local infobase connections
        cache.loadhook()

        mapping = app.mapping

        # Before web.py<0.36, the mapping is a list and need to be grouped.
        # From web.py 0.36 onwards, the mapping is already grouped.
        # Checking the type to see if we need to group them here.
        if mapping and not isinstance(mapping[0], (list, tuple)):
            mapping = web.group(mapping, 2)

        for pattern, classname in mapping:
            m = web.re_compile('^' + pattern + '$').match(path)
            if m:
                args = m.groups()
                cls = get_class(classname)
                tocall = getattr(cls(), method)
                return tocall(*args)
        raise web.notfound()
    finally:
        # hack to make cache work for local infobase connections
        cache.unloadhook()

Example #56

0

Show file

 def filter_index(self, index, name, value):
     operations = {
         "~": lambda i, value: isinstance(i.value, basestring) and i.value.startswith(web.rstrips(value, "*")),
         "<": lambda i, value: i.value < value,
         ">": lambda i, value: i.value > value,
         "!": lambda i, value: i.value != value,
         "=": lambda i, value: i.value == value,
     }
     pattern = ".*([%s])$" % "".join(operations)
     rx = web.re_compile(pattern)
     m = rx.match(name)
     
     if m: 
         op = m.group(1)
         name = name[:-1]
     else:
         op = "="
         
     f = operations[op]
     for i in index:
         if i.name == name and f(i, value):
             yield i.key

Example #57

0

Show file

File: mocksite.py Project: mikemaehr/openlibrary

 def filter_index(self, index, name, value):
     operations = {
         "~": lambda i, value: isinstance(i.value, basestring) and i.value.startswith(web.rstrips(value, "*")),
         "<": lambda i, value: i.value < value,
         ">": lambda i, value: i.value > value,
         "!": lambda i, value: i.value != value,
         "=": lambda i, value: i.value == value,
     }
     pattern = ".*([%s])$" % "".join(operations)
     rx = web.re_compile(pattern)
     m = rx.match(name)
     
     if m: 
         op = m.group(1)
         name = name[:-1]
     else:
         op = "="
         
     f = operations[op]
     for i in index:
         if i.name == name and f(i, value):
             yield i.key

Example #58

0

Show file

File: utils.py Project: user404d/openlibrary

def _get_recent_changes():
    site = web.ctx.get('site') or delegate.create_site()
    web.ctx.setdefault("ip", "127.0.0.1")

    # The recentchanges can have multiple revisions for a document if it has been modified more than once.
    # Take only the most recent revision in that case.
    visited = set()

    def is_visited(key):
        if key in visited:
            return True
        else:
            visited.add(key)
            return False

    # ignore reverts
    re_revert = web.re_compile(r"reverted to revision \d+")

    def is_revert(r):
        return re_revert.match(r.comment or "")

    # take the 100 recent changes, filter them and take the first 50
    q = {"bot": False, "limit": 100}
    result = site.versions(q)
    result = [r for r in result if not is_visited(r.key) and not is_revert(r)]
    result = result[:50]

    def process_thing(thing):
        t = web.storage()
        for k in ["key", "title", "name", "displayname"]:
            t[k] = thing[k]
        t['type'] = web.storage(key=thing.type.key)
        return t

    for r in result:
        r.author = r.author and process_thing(r.author)
        r.thing = process_thing(site.get(r.key, r.revision))

    return result

Example #59

0

Show file

File: utils.py Project: bmmcginty/openlibrary

def _get_recent_changes():
    site = web.ctx.get('site') or delegate.create_site()
    web.ctx.setdefault("ip", "127.0.0.1")
    
    # The recentchanges can have multiple revisions for a document if it has been modified more than once. 
    # Take only the most recent revision in that case.
    visited = set()
    def is_visited(key):
        if key in visited:
            return True
        else:
            visited.add(key)
            return False
       
    # ignore reverts
    re_revert = web.re_compile("reverted to revision \d+")
    def is_revert(r):
        return re_revert.match(r.comment or "")

    # take the 100 recent changes, filter them and take the first 50
    q = {"bot": False, "limit": 100}
    result = site.versions(q)
    result = [r for r in result if not is_visited(r.key) and not is_revert(r)]
    result = result[:50]

    def process_thing(thing):
        t = web.storage()
        for k in ["key", "title", "name", "displayname"]:
            t[k] = thing[k]
        t['type'] = web.storage(key=thing.type.key)
        return t
    
    for r in result:
        r.author = r.author and process_thing(r.author)
        r.thing = process_thing(site.get(r.key, r.revision))
        
    return result

Example #60

0

Show file

File: models.py Project: iwtga/openlibrary

 def get_owner(self):
     match = web.re_compile(r"(/people/[^/]+)/lists/OL\d+L").match(self.key)
     if match:
         key = match.group(1)
         return self._site.get(key)