Example #1
0
    def __init__(self):
        """Parser for instruction.

        Example:
            {{<a>},{<a>},{<a>},{<a>}}
            {{<!>},{<!>},{<!>},{<a>}}
            <{o"i!a,<{i<a>
        """
        debug = False
        self.garbo_count = 0

        LBRACK, RBRACK, LBRACE, RBRACE, BANG = map(Suppress, "<>{}!")
        nonspecial = CharsNotIn('<>{}!')
        ignored = Word('!', printables, exact=2)
        enclosed_garbo = SkipTo(Literal('>'), ignore=ignored)

        val_str = Forward()
        garbo_str = Forward()
        item = Forward()

        # a parsed item
        item = (ignored | garbo_str | val_str | nonspecial).setDebug(debug)

        # stuff in {}s
        val_str << nestedExpr('{', '}', content=item,
                              ignoreExpr=None).setDebug(debug)
        # stuff in <>s (suppressed)
        garbo_str << (LBRACK + Optional(enclosed_garbo) +
                      RBRACK).setDebug(debug)

        def cvt_list(toks):
            return toks.asList()

        val_str.setParseAction(cvt_list)

        def take_garbo(s, loc, toks):
            m = toks[0]
            ig_str = re.sub(r'!.', '', m)
            ln = len(ig_str)
            self.garbo_count += ln
            return f"<GARBO: {ln}>"

        enclosed_garbo.setParseAction(take_garbo)
        ignored.setParseAction(lambda: '!IGNORED')

        # pattern build
        self._pattern = item
def patt(cs_list):
    '''
   Remove the cs with its arguments
   with recursion on curly brackets
    '''
    cs_lit_list = oneOf(cs_list).suppress()
    bslash = Literal('\\').suppress()
    lbrace = Literal('{').suppress()
    rbrace = Literal('}').suppress()
    parens = Word("()%\\")
    inside = SkipTo(rbrace)
    allchars = Word(printables, excludeChars="{}")
    inside = ZeroOrMore(allchars)
    inside.setParseAction(lambda tok: " ".join(tok))
    content = Forward()
    content << OneOrMore(allchars | (lbrace + ZeroOrMore(content) + rbrace))
    #content << (allchars + lbrace + ZeroOrMore(content) + rbrace)
    content.setParseAction(lambda tok: " ".join(tok))

    return bslash + cs_lit_list + lbrace + content + rbrace
Example #3
0
# URL extractor
# Copyright 2004, Paul McGuire
from pyparsing import makeHTMLTags, SkipTo, pyparsing_common
import urllib.request
from contextlib import closing
import pprint

linkOpenTag, linkCloseTag = makeHTMLTags('a')

linkBody = SkipTo(linkCloseTag)
linkBody.setParseAction(pyparsing_common.stripHTMLTags)
linkBody.addParseAction(lambda toks: ' '.join(toks[0].strip().split()))

link = linkOpenTag + linkBody("body") + linkCloseTag.suppress()

# Go get some HTML with some links in it.
with closing(
        urllib.request.urlopen("https://www.yahoo.com/")) as serverListPage:
    htmlText = serverListPage.read().decode("UTF-8")

# scanString is a generator that loops through the input htmlText, and for each
# match yields the tokens and start and end locations (for this application, we are
# not interested in the start and end values).
for toks, strt, end in link.scanString(htmlText):
    print(toks.asList())

# Create dictionary from list comprehension, assembled from each pair of tokens returned
# from a matched URL.
pprint.pprint(
    {toks.body: toks.href
     for toks, strt, end in link.scanString(htmlText)})
Example #4
0
# URL extractor
# Copyright 2004, Paul McGuire
from pyparsing import makeHTMLTags, SkipTo, pyparsing_common
import urllib.request
from contextlib import closing
import pprint

linkOpenTag, linkCloseTag = makeHTMLTags('a')

linkBody = SkipTo(linkCloseTag)
linkBody.setParseAction(pyparsing_common.stripHTMLTags)
linkBody.addParseAction(lambda toks: ' '.join(toks[0].strip().split()))

link = linkOpenTag + linkBody("body") + linkCloseTag.suppress()

# Go get some HTML with some links in it.
with closing(urllib.request.urlopen("http://www.yahoo.com")) as serverListPage:
    htmlText = serverListPage.read().decode("UTF-8")

# scanString is a generator that loops through the input htmlText, and for each
# match yields the tokens and start and end locations (for this application, we are
# not interested in the start and end values).
for toks,strt,end in link.scanString(htmlText):
    print(toks.asList())

# Create dictionary from list comprehension, assembled from each pair of tokens returned 
# from a matched URL.
pprint.pprint( 
    dict((toks.body, toks.href) for toks,strt,end in link.scanString(htmlText))
    )
Example #5
0
with open('../tests/tex_files/reinhardt/reinhardt-optimal-control.tex', 'r') as rein_file:
    rein = rein_file.read()
#with open('../tests/tex_files/short_xymatrix_example.tex') as xymatrix_file:
#    short_example = xymatrix_file.read()
#with open('../../stacks-tests/orig/perfect.tex') as xymatrix_file:
#    stacks_example = xymatrix_file.read()

# +
cstikzfig = oneOf(["\\tikzfig", "\\mathcal"]).suppress()
lbrace = Literal('{').suppress()
rbrace = Literal('}').suppress()
parens = Word("()%\\")
inside = SkipTo(rbrace)
allchars = Word(printables, excludeChars="{}")
inside = ZeroOrMore(allchars)
inside.setParseAction(lambda tok: " ".join(tok))
content = Forward()
content << OneOrMore(allchars|(lbrace + ZeroOrMore(content) + rbrace))
#content << (allchars + lbrace + ZeroOrMore(content) + rbrace)
content.setParseAction(lambda tok: " ".join(tok))
tikzfig = cstikzfig + lbrace + inside + rbrace + lbrace + inside + rbrace + lbrace + content + rbrace

csxymatrix = oneOf(["\\xymatrix","\\mathcal"]).suppress()
xymatrix = csxymatrix + lbrace + content + rbrace

search_res = tikzfig.searchString(rein)
search_res = xymatrix.searchString(short_example)

#tikzfig.setParseAction(lambda s: ' ')
#clean_str = tikzfig.transformString(rein)
Example #6
0
# URL extractor
# Copyright 2004, Paul McGuire
from pyparsing import makeHTMLTags, SkipTo, pyparsing_common as ppc
import urllib.request
from contextlib import closing
import pprint

linkOpenTag, linkCloseTag = makeHTMLTags('a')

linkBody = SkipTo(linkCloseTag)
linkBody.setParseAction(ppc.stripHTMLTags)
linkBody.addParseAction(lambda toks: ' '.join(toks[0].strip().split()))

link = linkOpenTag + linkBody("body") + linkCloseTag.suppress()

# Go get some HTML with some links in it.
with closing(
        urllib.request.urlopen("https://www.yahoo.com/")) as serverListPage:
    htmlText = serverListPage.read().decode("UTF-8")

# scanString is a generator that loops through the input htmlText, and for each
# match yields the tokens and start and end locations (for this application, we are
# not interested in the start and end values).
for toks, strt, end in link.scanString(htmlText):
    print(toks.asList())

# Create dictionary from list comprehension, assembled from each pair of tokens returned
# from a matched URL.
pprint.pprint(
    {toks.body: toks.href
     for toks, strt, end in link.scanString(htmlText)})