Python markup_fragmentの例、amara.bindery.html.markup_fragment Pythonの例

コード例 #1

0

ファイルを表示

ファイル: application_xml.py プロジェクト: pombredanne/akara

 def rawHTML(self, markup):
     # output = htmlparse(markup).html.body.xml_encode() if markup else ''
     if markup:
         body = markup_fragment(inputsource.text(markup))
         for child in body.xml_children:
             self._curr.xml_append(child)
     # self._curr.xml_append(tree.text(output.decode(config.charset)))
     # print "rawHTML", htmlparse(markup).xml_encode()
     return ""

コード例 #2

0

ファイルを表示

 def rawHTML(self, markup):
     #output = htmlparse(markup).html.body.xml_encode() if markup else ''
     if markup:
         body = markup_fragment(inputsource.text(markup))
         for child in body.xml_children:
             self._curr.xml_append(child)
     #self._curr.xml_append(tree.text(output.decode(config.charset)))
     #print "rawHTML", htmlparse(markup).xml_encode()
     return ''

コード例 #3

0

ファイルを表示

ファイル: application_xml.py プロジェクト: pombredanne/akara

 def macro(self, macro_obj, name, args, markup=None):
     # Macro response are (unescaped) markup.  Do what little clean-up we camn, and cross fingers
     output = FormatterBase.macro(self, macro_obj, name, args, markup=markup)
     # response is Unicode
     if output:
         output_body = markup_fragment(inputsource.text(output.encode(config.charset)))
         # print "macro 2", repr(output)
         self._curr.xml_append(output_body)
     return ""

コード例 #4

0

ファイルを表示

 def from_rss2(feedxml):
     '''
     feedxml - an input source with an RSS 2.0 document
     '''
     #WARNING: Quite broken!  Probably need feedparser to e.g. deal with crap rss 2 dates
     source = bindery.parse(feedxml)#, model=FEED_MODEL)
     title = html.markup_fragment(inputsource.text(str(source.rss.channel.title))).xml_encode()
     #FIXME: bindery modeling FTW!
     try:
         updated = unicode(source.rss.channel.pubDate)
     except AttributeError:
         updated = None
     link = unicode(source.rss.channel.link)
     try:
         summary = html.markup_fragment(inputsource.text(str(source.rss.channel.description))).xml_encode()
     except AttributeError:
         summary = None
     f = feed(title=title, updated=updated, id=link)
     for item in source.rss.channel.item:
         title = html.markup_fragment(inputsource.text(str(item.title))).xml_encode()
         try:
             summary = html.markup_fragment(inputsource.text(str(item.description))).xml_encode()
         except AttributeError:
             summary = None
         #author is dc:creator?
         #category is category/@domain?
         #try:
         #    authors = [ (u'%s, %s, %s'%(U(metadata[a][u'LastName']), U(metadata[a].get(u'FirstName', [u''])[0]), U(metadata[a][u'Initials'])), None, None) for a in resource.get(u'Author', []) ]
         #except:
         #    authors = []
         links = [
             #FIXME: self?
             (U(item.link), u'alternate'),
         ]
         f.append(
             U(item.link),
             title,
             updated = unicode(item.pubDate),
             summary=summary,
             #authors=authors,
             links=links,
         )
     return f

コード例 #5

0

ファイルを表示

 def macro(self, macro_obj, name, args, markup=None):
     #Macro response are (unescaped) markup.  Do what little clean-up we camn, and cross fingers
     output = FormatterBase.macro(self,
                                  macro_obj,
                                  name,
                                  args,
                                  markup=markup)
     #response is Unicode
     if output:
         output_body = markup_fragment(
             inputsource.text(output.encode(config.charset)))
         #print "macro 2", repr(output)
         self._curr.xml_append(output_body)
     return ''

コード例 #6

0

ファイルを表示

ファイル: md.py プロジェクト: erimille/versa

def from_markdown(md, output, encoding='utf-8', config=None):
    """
    Translate the Versa Markdown syntax into Versa model relationships

    md -- markdown source text
    output -- Versa model to take the output relationship
    encoding -- character encoding (defaults to UTF-8)

    No return value
    """
    #Set up configuration to interpret the conventions for the Markdown
    config = config or {}
    #This mapping takes syntactical elements such as the various header levels in Markdown and associates a resource type with the specified resources
    syntaxtypemap = {}
    if config.get('autotype-h1'): syntaxtypemap[u'h1'] = config.get('autotype-h1')
    if config.get('autotype-h2'): syntaxtypemap[u'h2'] = config.get('autotype-h2')
    if config.get('autotype-h3'): syntaxtypemap[u'h3'] = config.get('autotype-h3')
    interp = config.get('interpretations', {})
    #Map the interpretation IRIs to functions to do the data prep
    for prop, interp_key in interp.iteritems():
        if interp_key in PREP_METHODS:
            interp[prop] = PREP_METHODS[interp_key]
        else:
            #just use the identity, i.e. no-op
            interp[prop] = lambda x, **kwargs: x

    #Parse the Markdown
    h = markdown.markdown(md.decode(encoding))

    doc = html.markup_fragment(inputsource.text(h.encode('utf-8')))
    #Each section contains one resource description, but the special one named @docheader contains info to help interpret the rest
    top_section_fields = results_until(doc.xml_select(u'//h1[1]/following-sibling::h2'), u'self::h1')

    docheader = doc.xml_select(u'//h1[.="@docheader"]')[0]
    sections = doc.xml_select(u'//h1|h2|h3[not(.="@docheader")]')

    def fields(sect):
        '''
        Each section represents a resource and contains a list with its properties
        This generator parses the list and yields the key value pairs representing the properties
        Some properties have attributes, expressed in markdown as a nested list. If present these attributes
        Are yielded as well, else None is yielded
        '''
        #import logging; logging.debug(repr(sect))
        #Pull all the list elements until the next header. This accommodates multiple lists in a section
        sect_body_items = results_until(sect.xml_select(u'following-sibling::*'), u'self::h1|self::h2|self::h3')
        #field_list = [ U(li) for ul in sect.xml_select(u'following-sibling::ul') for li in ul.xml_select(u'./li') ]
        field_list = [ li for elem in sect_body_items for li in elem.xml_select(u'li') ]

        def parse_pair(pair):
            '''
            Parse each list item into a property pair
            '''
            if pair.strip():
                matched = REL_PAT.match(pair)
                if not matched:
                    raise ValueError(_(u'Syntax error in relationship expression: {0}'.format(field)))
                prop = matched.group(1).strip()
                val = matched.group(2).strip()
                #prop, val = [ part.strip() for part in U(li.xml_select(u'string(.)')).split(u':', 1) ]
                #import logging; logging.debug(repr((prop, val)))
                return prop, val
            return None, None

        #Go through each list item
        for li in field_list:
            #Is there a nested list, which expresses attributes on a property
            if li.xml_select(u'ul'):
                main = ''.join([ U(node) for node in results_until(li.xml_select(u'node()'), u'self::ul') ])
                #main = li.xml_select(u'string(ul/preceding-sibling::node())')
                prop, val = parse_pair(main)
                subfield_list = [ sli for sli in li.xml_select(u'ul/li') ]
                subfield_dict = dict([ parse_pair(U(pair)) for pair in subfield_list ])
                if None in subfield_dict: del subfield_dict[None]
                yield prop, val, subfield_dict
            #Just a regular, unadorned property
            else:
                prop, val = parse_pair(U(li))
                if prop: yield prop, val, None

    #Gather the document-level metadata
    base = propbase = rbase = None
    for prop, val, subfield_dict in fields(docheader):
        if prop == '@base':
            base = val
        if prop == '@property-base':
            propbase = val
        if prop == '@resource-base':
            rbase = val
    if not propbase: propbase = base
    if not rbase: rbase = base

    #Go through the resources expressed in remaining sections
    for sect in sections:
        #The header can take one of 4 forms: "ResourceID" "ResourceID [ResourceType]" "[ResourceType]" or "[]"
        #The 3rd form is for an anonymous resource with specified type and the 4th an anonymous resource with unspecified type
        matched = RESOURCE_PAT.match(U(sect))
        if not matched:
            raise ValueError(_(u'Syntax error in resource header: {0}'.format(U(sect))))
        rid = matched.group(1)
        rtype = matched.group(3)
        if rid:
            rid = I(iri.absolutize(rid, base))
        if not rid:
            rid = I(iri.absolutize(output.generate_resource(), base))
        if rtype:
            rtype = I(iri.absolutize(rtype, base))
        #Resource type might be set by syntax config
        if not rtype:
            rtype = syntaxtypemap.get(sect.xml_local)
        if rtype:
            output.add(rid, RDFTYPE, rtype)
        #Add the property 
        for prop, val, subfield_dict in fields(sect):
            attrs = subfield_dict or {}
            fullprop = I(iri.absolutize(prop, propbase))
            resinfo = AB_RESOURCE_PAT.match(val)
            if resinfo:
                val = resinfo.group(1)
                valtype = resinfo.group(3)
                if not val: val = output.generate_resource()
                if valtype: attrs[RDFTYPE] = valtype
            if fullprop in interp:
                val = interp[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=output)
                if val is not None: output.add(rid, fullprop, val)
            else:
                output.add(rid, fullprop, val, attrs)

    return base

コード例 #7

0

ファイルを表示

ファイル: build_testcase.py プロジェクト: zepheira/bibframe-testcases

def from_markdown(md, dest, stem, index):
    h = markdown.markdown(md.decode('utf-8'))
    doc = html.markup_fragment(inputsource.text(h.encode('utf-8')))
    #print doc.xml_encode()
    output = TURTLE_TOP_TEMPLATE
    graphoutput = TURTLE_TOP_TEMPLATE

    #The top section contains all the test metadata
    top_section_fields = results_until(doc.xml_select(u'//h1[1]/following-sibling::h2'), u'self::h1')

    #Note: Top level fields are rendered into dicts, others are turned into lists of tuples
    #fields = dict(map(lambda y: [part.strip() for part in y.split(u':', 1)], U(top_section_fields).split(u'\n')))
    fields = {}
    #subsections = top_section_fields[0].xml_select(u'following-sibling::h2')
    fields["relatedtests"] = ""
    for s in top_section_fields:
        prop = U(s).strip()
        value = s.xml_select(u'./following-sibling::p|following-sibling::ul')
        if value:
            #Encoding to XML makes it a string again, so turn it back to Unicode
            #fields[property] = value[0].xml_encode().decode('utf-8')
            #Use XPath to strip markup
            if value[0].xml_local == u'ul':
                fields[prop] = [ li.xml_select(u'string(.)') for li in value[0].xml_select(u'./li') ]
                if prop == "relatedtests":
                    issues_links = ""
                    for i in fields[prop]:
                        issues_links += TESTS_LINKS_TEMPLATE.format(i) + ", "
                    if issues_links.endswith(', '):
                        issues_links = issues_links[:-2]
                    fields[prop] = issues_links
                elif prop == "issues":
                    issues_links = ""
                    for i in fields[prop]:
                        tid = time.time() + random.randint(1,1000)
                        tid = str(tid).replace('.', '')
                        json = JSON2BASE64ENCODE
                        json = json.replace('%ID', tid)
                        json = json.replace('%F1VAL%', i)
                        json = json.replace('"%F2VAL%"', '')
                        json = json.replace('"%F3VAL%"', '')
                        link = BASE64_LINK
                        link = link.replace('%BASE64%', base64.b64encode(json))
                        link = link.replace('%LABEL%', i)
                        issues_links += link + ", "
                    if issues_links.endswith(', '):
                        issues_links = issues_links[:-2]
                    fields["issueslinks"] = issues_links
                elif prop == "status":
                    issues_links = ""
                    for i in fields[prop]:
                        tid = time.time() + random.randint(1,1000)
                        tid = str(tid).replace('.', '')
                        json = JSON2BASE64ENCODE
                        json = json.replace('%ID', tid)
                        json = json.replace('"%F1VAL%"', '')
                        json = json.replace('"%F2VAL%"', '')
                        json = json.replace('%F3VAL%', i)
                        link = BASE64_LINK
                        link = link.replace('%BASE64%', base64.b64encode(json))
                        link = link.replace('%LABEL%', i)
                        issues_links += link + ", "
                    if issues_links.endswith(', '):
                        issues_links = issues_links[:-2]
                    fields["statuslinks"] = issues_links
            else:
                fields[prop] = value[0].xml_select(u'string(.)')
                # if prop.lower() == "description":
                #    fields["label"] = value[0].xml_select(u'string(.)')
                if prop.lower() == "id":
                    fields["test-id"] = value[0].xml_select(u'string(.)')

    # add fileroot to exhibit json
    fields['fileroot'] = stem

    testinfo = fields.copy()
    #for k, v in testinfo.items():
        #testinfo.append(shred_if_needed(k, v))
    #    testinfo[k] = shred_if_needed(k, v)
    index.append(testinfo)

    #output += TURTLE_RESOURCE_TEMPLATE.format(rid=TEST_ID_BASE + fields[u'id'])
    #output += u'    a bf:TestCase ;\n'
    #for k, v in fields.items():
    #    if matches_uri_syntax(v):
    #        output += u'    bf:{k} <{v}> ;\n'.format(k=k, v=v)
    #    else:
    #        output += u'    bf:{k} "{v}" ;\n'.format(k=k, v=v)
    #output = output.rsplit(u';\n', 1)[0]
    #output += u'.\n'

    sections = doc.xml_select(u'//h1[not(.="Header")]')
    for sect in sections:
        rtype = U(sect)
        #fields = U(sect.xml_select(u'following-sibling::p'))
        field_list = sect.xml_select(u'following-sibling::ul')[0]
        fields = []
        #fields = map(lambda y: [part.strip() for part in y.split(u':', 1)], fields.split(u'\n'))
        for li in field_list.xml_select(u'./li'):
            if U(li).strip():
                prop, val = [ part.strip() for part in U(li.xml_select(u'string(.)')).split(u':', 1) ]
                fields.append((prop, val))

        subsections = results_until(sect.xml_select(u'./following-sibling::h2'), u'self::h1')
        for s in subsections:
            prop = U(s).strip()
            value = s.xml_select(u'./following-sibling::p|following-sibling::ul')
            #print (prop, value)
            if value:
                #Encoding to XML makes it a string again, so turn it back to Unicode
                #fields[property] = value[0].xml_encode().decode('utf-8')
                #Use XPath to strip markup
                if value[0].xml_local == u'ul':
                    fields.append((prop, [ U(li.xml_select(u'string(.)')) for li in value[0].xml_select(u'./li') ]))
                else:
                    fields.append((prop, U(value[0].xml_select(u'string(.)'))))

        #desc = U(sect.xml_select(u'following-sibling::h2[.="Description"]/following-sibling::p'))
        #note = U(sect.xml_select(u'following-sibling::h2[.="Note"]/following-sibling::p'))
        to_remove = []
        for k, v in fields:
            if k == u'id':
                rid = absolutize(v, TEST_ID_BASE)
                to_remove.append((k, v))
        for pair in to_remove:
            fields.remove(pair)
        atype = None
        output += TURTLE_RESOURCE_TEMPLATE.format(rid=rid)
        if ' ' in rtype:
            #Derive the actual annotation type
            rtype, atype = rtype.split()
            output += u'    a bf:{atype}, bf:{rtype} ;\n'.format(rtype=rtype, atype=atype)
        else:
            output += u'    a bf:{rtype} ;\n'.format(rtype=rtype)

        #print fields
        for k, v in fields:
            if matches_uri_syntax(v):
                output += u'    bf:{k} <{v}> ;\n'.format(k=k, v=v)
            elif v.startswith("["):
                output += u'    bf:{k} {v} ;\n'.format(k=k, v=v)
            else:
                output += u'    bf:{k} "{v}" ;\n'.format(k=k, v=v)
        output = output.rsplit(u';\n', 1)[0]
        output += u'.\n'
        
        # Create RDf that only includes resource-resource relations
        # for graphical display
        graphoutput += TURTLE_RESOURCE_TEMPLATE.format(rid=rid)
        #print fields
        for k, v in fields:
            if matches_uri_syntax(v):
                graphoutput += u'    bf:{k} <{v}> ;\n'.format(k=k, v=v)
            elif v.startswith("["):
                graphoutput += u'    bf:{k} {v} ;\n'.format(k=k, v=v)
        graphoutput = graphoutput.rsplit(u';\n', 1)[0]
        graphoutput += u'.\n'

    turtlefname = os.path.join(dest, stem + os.path.extsep + 'ttl')
    turtlef = open(turtlefname, 'w')
    turtlef.write(output.encode('utf-8'))
    turtlef.close()
    
    eyecandyfname = os.path.join(dest, stem + "-eyecandy" + os.path.extsep + 'ttl')
    eyecandyf = open(eyecandyfname, 'w')
    eyecandyf.write(graphoutput.encode('utf-8'))
    eyecandyf.close()
    
    #Copying testinfo because from_turtle will modify it in place
    return output, testinfo.copy()

コード例 #8

0

ファイルを表示

def from_markdown(md, output, encoding='utf-8', config=None):
    """
    Translate the Versa Markdown syntax into Versa model relationships

    md -- markdown source text
    output -- Versa model to take the output relationship
    encoding -- character encoding (defaults to UTF-8)

    No return value
    """
    #Set up configuration to interpret the conventions for the Markdown
    config = config or {}
    #This mapping takes syntactical elements such as the various header levels in Markdown and associates a resource type with the specified resources
    syntaxtypemap = {}
    if config.get('autotype-h1'):
        syntaxtypemap[u'h1'] = config.get('autotype-h1')
    if config.get('autotype-h2'):
        syntaxtypemap[u'h2'] = config.get('autotype-h2')
    if config.get('autotype-h3'):
        syntaxtypemap[u'h3'] = config.get('autotype-h3')
    interp = config.get('interpretations', {})
    #Map the interpretation IRIs to functions to do the data prep
    for prop, interp_key in interp.iteritems():
        if interp_key in PREP_METHODS:
            interp[prop] = PREP_METHODS[interp_key]
        else:
            #just use the identity, i.e. no-op
            interp[prop] = lambda x, **kwargs: x

    #Parse the Markdown
    h = markdown.markdown(md.decode(encoding))

    doc = html.markup_fragment(inputsource.text(h.encode('utf-8')))
    #Each section contains one resource description, but the special one named @docheader contains info to help interpret the rest
    top_section_fields = results_until(
        doc.xml_select(u'//h1[1]/following-sibling::h2'), u'self::h1')

    docheader = doc.xml_select(u'//h1[.="@docheader"]')[0]
    sections = doc.xml_select(u'//h1|h2|h3[not(.="@docheader")]')

    def fields(sect):
        '''
        Each section represents a resource and contains a list with its properties
        This generator parses the list and yields the key value pairs representing the properties
        Some properties have attributes, expressed in markdown as a nested list. If present these attributes
        Are yielded as well, else None is yielded
        '''
        #import logging; logging.debug(repr(sect))
        #Pull all the list elements until the next header. This accommodates multiple lists in a section
        sect_body_items = results_until(
            sect.xml_select(u'following-sibling::*'),
            u'self::h1|self::h2|self::h3')
        #field_list = [ U(li) for ul in sect.xml_select(u'following-sibling::ul') for li in ul.xml_select(u'./li') ]
        field_list = [
            li for elem in sect_body_items for li in elem.xml_select(u'li')
        ]

        def parse_pair(pair):
            '''
            Parse each list item into a property pair
            '''
            if pair.strip():
                matched = REL_PAT.match(pair)
                if not matched:
                    raise ValueError(
                        _(u'Syntax error in relationship expression: {0}'.
                          format(field)))
                prop = matched.group(1).strip()
                val = matched.group(2).strip()
                #prop, val = [ part.strip() for part in U(li.xml_select(u'string(.)')).split(u':', 1) ]
                #import logging; logging.debug(repr((prop, val)))
                return prop, val
            return None, None

        #Go through each list item
        for li in field_list:
            #Is there a nested list, which expresses attributes on a property
            if li.xml_select(u'ul'):
                main = ''.join([
                    U(node) for node in results_until(li.xml_select(u'node()'),
                                                      u'self::ul')
                ])
                #main = li.xml_select(u'string(ul/preceding-sibling::node())')
                prop, val = parse_pair(main)
                subfield_list = [sli for sli in li.xml_select(u'ul/li')]
                subfield_dict = dict(
                    [parse_pair(U(pair)) for pair in subfield_list])
                if None in subfield_dict: del subfield_dict[None]
                yield prop, val, subfield_dict
            #Just a regular, unadorned property
            else:
                prop, val = parse_pair(U(li))
                if prop: yield prop, val, None

    #Gather the document-level metadata
    base = propbase = rbase = None
    for prop, val, subfield_dict in fields(docheader):
        if prop == '@base':
            base = val
        if prop == '@property-base':
            propbase = val
        if prop == '@resource-base':
            rbase = val
    if not propbase: propbase = base
    if not rbase: rbase = base

    #Go through the resources expressed in remaining sections
    for sect in sections:
        #The header can take one of 4 forms: "ResourceID" "ResourceID [ResourceType]" "[ResourceType]" or "[]"
        #The 3rd form is for an anonymous resource with specified type and the 4th an anonymous resource with unspecified type
        matched = RESOURCE_PAT.match(U(sect))
        if not matched:
            raise ValueError(
                _(u'Syntax error in resource header: {0}'.format(U(sect))))
        rid = matched.group(1)
        rtype = matched.group(3)
        if rid:
            rid = I(iri.absolutize(rid, base))
        if not rid:
            rid = I(iri.absolutize(output.generate_resource(), base))
        if rtype:
            rtype = I(iri.absolutize(rtype, base))
        #Resource type might be set by syntax config
        if not rtype:
            rtype = syntaxtypemap.get(sect.xml_local)
        if rtype:
            output.add(rid, RDFTYPE, rtype)
        #Add the property
        for prop, val, subfield_dict in fields(sect):
            attrs = subfield_dict or {}
            fullprop = I(iri.absolutize(prop, propbase))
            resinfo = AB_RESOURCE_PAT.match(val)
            if resinfo:
                val = resinfo.group(1)
                valtype = resinfo.group(3)
                if not val: val = output.generate_resource()
                if valtype: attrs[RDFTYPE] = valtype
            if fullprop in interp:
                val = interp[fullprop](val,
                                       rid=rid,
                                       fullprop=fullprop,
                                       base=base,
                                       model=output)
                if val is not None: output.add(rid, fullprop, val)
            else:
                output.add(rid, fullprop, val, attrs)

    return base