def rawHTML(self, markup): # output = htmlparse(markup).html.body.xml_encode() if markup else '' if markup: body = markup_fragment(inputsource.text(markup)) for child in body.xml_children: self._curr.xml_append(child) # self._curr.xml_append(tree.text(output.decode(config.charset))) # print "rawHTML", htmlparse(markup).xml_encode() return ""
def rawHTML(self, markup): #output = htmlparse(markup).html.body.xml_encode() if markup else '' if markup: body = markup_fragment(inputsource.text(markup)) for child in body.xml_children: self._curr.xml_append(child) #self._curr.xml_append(tree.text(output.decode(config.charset))) #print "rawHTML", htmlparse(markup).xml_encode() return ''
def macro(self, macro_obj, name, args, markup=None): # Macro response are (unescaped) markup. Do what little clean-up we camn, and cross fingers output = FormatterBase.macro(self, macro_obj, name, args, markup=markup) # response is Unicode if output: output_body = markup_fragment(inputsource.text(output.encode(config.charset))) # print "macro 2", repr(output) self._curr.xml_append(output_body) return ""
def from_rss2(feedxml): ''' feedxml - an input source with an RSS 2.0 document ''' #WARNING: Quite broken! Probably need feedparser to e.g. deal with crap rss 2 dates source = bindery.parse(feedxml)#, model=FEED_MODEL) title = html.markup_fragment(inputsource.text(str(source.rss.channel.title))).xml_encode() #FIXME: bindery modeling FTW! try: updated = unicode(source.rss.channel.pubDate) except AttributeError: updated = None link = unicode(source.rss.channel.link) try: summary = html.markup_fragment(inputsource.text(str(source.rss.channel.description))).xml_encode() except AttributeError: summary = None f = feed(title=title, updated=updated, id=link) for item in source.rss.channel.item: title = html.markup_fragment(inputsource.text(str(item.title))).xml_encode() try: summary = html.markup_fragment(inputsource.text(str(item.description))).xml_encode() except AttributeError: summary = None #author is dc:creator? #category is category/@domain? #try: # authors = [ (u'%s, %s, %s'%(U(metadata[a][u'LastName']), U(metadata[a].get(u'FirstName', [u''])[0]), U(metadata[a][u'Initials'])), None, None) for a in resource.get(u'Author', []) ] #except: # authors = [] links = [ #FIXME: self? (U(item.link), u'alternate'), ] f.append( U(item.link), title, updated = unicode(item.pubDate), summary=summary, #authors=authors, links=links, ) return f
def macro(self, macro_obj, name, args, markup=None): #Macro response are (unescaped) markup. Do what little clean-up we camn, and cross fingers output = FormatterBase.macro(self, macro_obj, name, args, markup=markup) #response is Unicode if output: output_body = markup_fragment( inputsource.text(output.encode(config.charset))) #print "macro 2", repr(output) self._curr.xml_append(output_body) return ''
def from_markdown(md, output, encoding='utf-8', config=None): """ Translate the Versa Markdown syntax into Versa model relationships md -- markdown source text output -- Versa model to take the output relationship encoding -- character encoding (defaults to UTF-8) No return value """ #Set up configuration to interpret the conventions for the Markdown config = config or {} #This mapping takes syntactical elements such as the various header levels in Markdown and associates a resource type with the specified resources syntaxtypemap = {} if config.get('autotype-h1'): syntaxtypemap[u'h1'] = config.get('autotype-h1') if config.get('autotype-h2'): syntaxtypemap[u'h2'] = config.get('autotype-h2') if config.get('autotype-h3'): syntaxtypemap[u'h3'] = config.get('autotype-h3') interp = config.get('interpretations', {}) #Map the interpretation IRIs to functions to do the data prep for prop, interp_key in interp.iteritems(): if interp_key in PREP_METHODS: interp[prop] = PREP_METHODS[interp_key] else: #just use the identity, i.e. no-op interp[prop] = lambda x, **kwargs: x #Parse the Markdown h = markdown.markdown(md.decode(encoding)) doc = html.markup_fragment(inputsource.text(h.encode('utf-8'))) #Each section contains one resource description, but the special one named @docheader contains info to help interpret the rest top_section_fields = results_until(doc.xml_select(u'//h1[1]/following-sibling::h2'), u'self::h1') docheader = doc.xml_select(u'//h1[.="@docheader"]')[0] sections = doc.xml_select(u'//h1|h2|h3[not(.="@docheader")]') def fields(sect): ''' Each section represents a resource and contains a list with its properties This generator parses the list and yields the key value pairs representing the properties Some properties have attributes, expressed in markdown as a nested list. If present these attributes Are yielded as well, else None is yielded ''' #import logging; logging.debug(repr(sect)) #Pull all the list elements until the next header. This accommodates multiple lists in a section sect_body_items = results_until(sect.xml_select(u'following-sibling::*'), u'self::h1|self::h2|self::h3') #field_list = [ U(li) for ul in sect.xml_select(u'following-sibling::ul') for li in ul.xml_select(u'./li') ] field_list = [ li for elem in sect_body_items for li in elem.xml_select(u'li') ] def parse_pair(pair): ''' Parse each list item into a property pair ''' if pair.strip(): matched = REL_PAT.match(pair) if not matched: raise ValueError(_(u'Syntax error in relationship expression: {0}'.format(field))) prop = matched.group(1).strip() val = matched.group(2).strip() #prop, val = [ part.strip() for part in U(li.xml_select(u'string(.)')).split(u':', 1) ] #import logging; logging.debug(repr((prop, val))) return prop, val return None, None #Go through each list item for li in field_list: #Is there a nested list, which expresses attributes on a property if li.xml_select(u'ul'): main = ''.join([ U(node) for node in results_until(li.xml_select(u'node()'), u'self::ul') ]) #main = li.xml_select(u'string(ul/preceding-sibling::node())') prop, val = parse_pair(main) subfield_list = [ sli for sli in li.xml_select(u'ul/li') ] subfield_dict = dict([ parse_pair(U(pair)) for pair in subfield_list ]) if None in subfield_dict: del subfield_dict[None] yield prop, val, subfield_dict #Just a regular, unadorned property else: prop, val = parse_pair(U(li)) if prop: yield prop, val, None #Gather the document-level metadata base = propbase = rbase = None for prop, val, subfield_dict in fields(docheader): if prop == '@base': base = val if prop == '@property-base': propbase = val if prop == '@resource-base': rbase = val if not propbase: propbase = base if not rbase: rbase = base #Go through the resources expressed in remaining sections for sect in sections: #The header can take one of 4 forms: "ResourceID" "ResourceID [ResourceType]" "[ResourceType]" or "[]" #The 3rd form is for an anonymous resource with specified type and the 4th an anonymous resource with unspecified type matched = RESOURCE_PAT.match(U(sect)) if not matched: raise ValueError(_(u'Syntax error in resource header: {0}'.format(U(sect)))) rid = matched.group(1) rtype = matched.group(3) if rid: rid = I(iri.absolutize(rid, base)) if not rid: rid = I(iri.absolutize(output.generate_resource(), base)) if rtype: rtype = I(iri.absolutize(rtype, base)) #Resource type might be set by syntax config if not rtype: rtype = syntaxtypemap.get(sect.xml_local) if rtype: output.add(rid, RDFTYPE, rtype) #Add the property for prop, val, subfield_dict in fields(sect): attrs = subfield_dict or {} fullprop = I(iri.absolutize(prop, propbase)) resinfo = AB_RESOURCE_PAT.match(val) if resinfo: val = resinfo.group(1) valtype = resinfo.group(3) if not val: val = output.generate_resource() if valtype: attrs[RDFTYPE] = valtype if fullprop in interp: val = interp[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=output) if val is not None: output.add(rid, fullprop, val) else: output.add(rid, fullprop, val, attrs) return base
def from_markdown(md, dest, stem, index): h = markdown.markdown(md.decode('utf-8')) doc = html.markup_fragment(inputsource.text(h.encode('utf-8'))) #print doc.xml_encode() output = TURTLE_TOP_TEMPLATE graphoutput = TURTLE_TOP_TEMPLATE #The top section contains all the test metadata top_section_fields = results_until(doc.xml_select(u'//h1[1]/following-sibling::h2'), u'self::h1') #Note: Top level fields are rendered into dicts, others are turned into lists of tuples #fields = dict(map(lambda y: [part.strip() for part in y.split(u':', 1)], U(top_section_fields).split(u'\n'))) fields = {} #subsections = top_section_fields[0].xml_select(u'following-sibling::h2') fields["relatedtests"] = "" for s in top_section_fields: prop = U(s).strip() value = s.xml_select(u'./following-sibling::p|following-sibling::ul') if value: #Encoding to XML makes it a string again, so turn it back to Unicode #fields[property] = value[0].xml_encode().decode('utf-8') #Use XPath to strip markup if value[0].xml_local == u'ul': fields[prop] = [ li.xml_select(u'string(.)') for li in value[0].xml_select(u'./li') ] if prop == "relatedtests": issues_links = "" for i in fields[prop]: issues_links += TESTS_LINKS_TEMPLATE.format(i) + ", " if issues_links.endswith(', '): issues_links = issues_links[:-2] fields[prop] = issues_links elif prop == "issues": issues_links = "" for i in fields[prop]: tid = time.time() + random.randint(1,1000) tid = str(tid).replace('.', '') json = JSON2BASE64ENCODE json = json.replace('%ID', tid) json = json.replace('%F1VAL%', i) json = json.replace('"%F2VAL%"', '') json = json.replace('"%F3VAL%"', '') link = BASE64_LINK link = link.replace('%BASE64%', base64.b64encode(json)) link = link.replace('%LABEL%', i) issues_links += link + ", " if issues_links.endswith(', '): issues_links = issues_links[:-2] fields["issueslinks"] = issues_links elif prop == "status": issues_links = "" for i in fields[prop]: tid = time.time() + random.randint(1,1000) tid = str(tid).replace('.', '') json = JSON2BASE64ENCODE json = json.replace('%ID', tid) json = json.replace('"%F1VAL%"', '') json = json.replace('"%F2VAL%"', '') json = json.replace('%F3VAL%', i) link = BASE64_LINK link = link.replace('%BASE64%', base64.b64encode(json)) link = link.replace('%LABEL%', i) issues_links += link + ", " if issues_links.endswith(', '): issues_links = issues_links[:-2] fields["statuslinks"] = issues_links else: fields[prop] = value[0].xml_select(u'string(.)') # if prop.lower() == "description": # fields["label"] = value[0].xml_select(u'string(.)') if prop.lower() == "id": fields["test-id"] = value[0].xml_select(u'string(.)') # add fileroot to exhibit json fields['fileroot'] = stem testinfo = fields.copy() #for k, v in testinfo.items(): #testinfo.append(shred_if_needed(k, v)) # testinfo[k] = shred_if_needed(k, v) index.append(testinfo) #output += TURTLE_RESOURCE_TEMPLATE.format(rid=TEST_ID_BASE + fields[u'id']) #output += u' a bf:TestCase ;\n' #for k, v in fields.items(): # if matches_uri_syntax(v): # output += u' bf:{k} <{v}> ;\n'.format(k=k, v=v) # else: # output += u' bf:{k} "{v}" ;\n'.format(k=k, v=v) #output = output.rsplit(u';\n', 1)[0] #output += u'.\n' sections = doc.xml_select(u'//h1[not(.="Header")]') for sect in sections: rtype = U(sect) #fields = U(sect.xml_select(u'following-sibling::p')) field_list = sect.xml_select(u'following-sibling::ul')[0] fields = [] #fields = map(lambda y: [part.strip() for part in y.split(u':', 1)], fields.split(u'\n')) for li in field_list.xml_select(u'./li'): if U(li).strip(): prop, val = [ part.strip() for part in U(li.xml_select(u'string(.)')).split(u':', 1) ] fields.append((prop, val)) subsections = results_until(sect.xml_select(u'./following-sibling::h2'), u'self::h1') for s in subsections: prop = U(s).strip() value = s.xml_select(u'./following-sibling::p|following-sibling::ul') #print (prop, value) if value: #Encoding to XML makes it a string again, so turn it back to Unicode #fields[property] = value[0].xml_encode().decode('utf-8') #Use XPath to strip markup if value[0].xml_local == u'ul': fields.append((prop, [ U(li.xml_select(u'string(.)')) for li in value[0].xml_select(u'./li') ])) else: fields.append((prop, U(value[0].xml_select(u'string(.)')))) #desc = U(sect.xml_select(u'following-sibling::h2[.="Description"]/following-sibling::p')) #note = U(sect.xml_select(u'following-sibling::h2[.="Note"]/following-sibling::p')) to_remove = [] for k, v in fields: if k == u'id': rid = absolutize(v, TEST_ID_BASE) to_remove.append((k, v)) for pair in to_remove: fields.remove(pair) atype = None output += TURTLE_RESOURCE_TEMPLATE.format(rid=rid) if ' ' in rtype: #Derive the actual annotation type rtype, atype = rtype.split() output += u' a bf:{atype}, bf:{rtype} ;\n'.format(rtype=rtype, atype=atype) else: output += u' a bf:{rtype} ;\n'.format(rtype=rtype) #print fields for k, v in fields: if matches_uri_syntax(v): output += u' bf:{k} <{v}> ;\n'.format(k=k, v=v) elif v.startswith("["): output += u' bf:{k} {v} ;\n'.format(k=k, v=v) else: output += u' bf:{k} "{v}" ;\n'.format(k=k, v=v) output = output.rsplit(u';\n', 1)[0] output += u'.\n' # Create RDf that only includes resource-resource relations # for graphical display graphoutput += TURTLE_RESOURCE_TEMPLATE.format(rid=rid) #print fields for k, v in fields: if matches_uri_syntax(v): graphoutput += u' bf:{k} <{v}> ;\n'.format(k=k, v=v) elif v.startswith("["): graphoutput += u' bf:{k} {v} ;\n'.format(k=k, v=v) graphoutput = graphoutput.rsplit(u';\n', 1)[0] graphoutput += u'.\n' turtlefname = os.path.join(dest, stem + os.path.extsep + 'ttl') turtlef = open(turtlefname, 'w') turtlef.write(output.encode('utf-8')) turtlef.close() eyecandyfname = os.path.join(dest, stem + "-eyecandy" + os.path.extsep + 'ttl') eyecandyf = open(eyecandyfname, 'w') eyecandyf.write(graphoutput.encode('utf-8')) eyecandyf.close() #Copying testinfo because from_turtle will modify it in place return output, testinfo.copy()
def from_markdown(md, output, encoding='utf-8', config=None): """ Translate the Versa Markdown syntax into Versa model relationships md -- markdown source text output -- Versa model to take the output relationship encoding -- character encoding (defaults to UTF-8) No return value """ #Set up configuration to interpret the conventions for the Markdown config = config or {} #This mapping takes syntactical elements such as the various header levels in Markdown and associates a resource type with the specified resources syntaxtypemap = {} if config.get('autotype-h1'): syntaxtypemap[u'h1'] = config.get('autotype-h1') if config.get('autotype-h2'): syntaxtypemap[u'h2'] = config.get('autotype-h2') if config.get('autotype-h3'): syntaxtypemap[u'h3'] = config.get('autotype-h3') interp = config.get('interpretations', {}) #Map the interpretation IRIs to functions to do the data prep for prop, interp_key in interp.iteritems(): if interp_key in PREP_METHODS: interp[prop] = PREP_METHODS[interp_key] else: #just use the identity, i.e. no-op interp[prop] = lambda x, **kwargs: x #Parse the Markdown h = markdown.markdown(md.decode(encoding)) doc = html.markup_fragment(inputsource.text(h.encode('utf-8'))) #Each section contains one resource description, but the special one named @docheader contains info to help interpret the rest top_section_fields = results_until( doc.xml_select(u'//h1[1]/following-sibling::h2'), u'self::h1') docheader = doc.xml_select(u'//h1[.="@docheader"]')[0] sections = doc.xml_select(u'//h1|h2|h3[not(.="@docheader")]') def fields(sect): ''' Each section represents a resource and contains a list with its properties This generator parses the list and yields the key value pairs representing the properties Some properties have attributes, expressed in markdown as a nested list. If present these attributes Are yielded as well, else None is yielded ''' #import logging; logging.debug(repr(sect)) #Pull all the list elements until the next header. This accommodates multiple lists in a section sect_body_items = results_until( sect.xml_select(u'following-sibling::*'), u'self::h1|self::h2|self::h3') #field_list = [ U(li) for ul in sect.xml_select(u'following-sibling::ul') for li in ul.xml_select(u'./li') ] field_list = [ li for elem in sect_body_items for li in elem.xml_select(u'li') ] def parse_pair(pair): ''' Parse each list item into a property pair ''' if pair.strip(): matched = REL_PAT.match(pair) if not matched: raise ValueError( _(u'Syntax error in relationship expression: {0}'. format(field))) prop = matched.group(1).strip() val = matched.group(2).strip() #prop, val = [ part.strip() for part in U(li.xml_select(u'string(.)')).split(u':', 1) ] #import logging; logging.debug(repr((prop, val))) return prop, val return None, None #Go through each list item for li in field_list: #Is there a nested list, which expresses attributes on a property if li.xml_select(u'ul'): main = ''.join([ U(node) for node in results_until(li.xml_select(u'node()'), u'self::ul') ]) #main = li.xml_select(u'string(ul/preceding-sibling::node())') prop, val = parse_pair(main) subfield_list = [sli for sli in li.xml_select(u'ul/li')] subfield_dict = dict( [parse_pair(U(pair)) for pair in subfield_list]) if None in subfield_dict: del subfield_dict[None] yield prop, val, subfield_dict #Just a regular, unadorned property else: prop, val = parse_pair(U(li)) if prop: yield prop, val, None #Gather the document-level metadata base = propbase = rbase = None for prop, val, subfield_dict in fields(docheader): if prop == '@base': base = val if prop == '@property-base': propbase = val if prop == '@resource-base': rbase = val if not propbase: propbase = base if not rbase: rbase = base #Go through the resources expressed in remaining sections for sect in sections: #The header can take one of 4 forms: "ResourceID" "ResourceID [ResourceType]" "[ResourceType]" or "[]" #The 3rd form is for an anonymous resource with specified type and the 4th an anonymous resource with unspecified type matched = RESOURCE_PAT.match(U(sect)) if not matched: raise ValueError( _(u'Syntax error in resource header: {0}'.format(U(sect)))) rid = matched.group(1) rtype = matched.group(3) if rid: rid = I(iri.absolutize(rid, base)) if not rid: rid = I(iri.absolutize(output.generate_resource(), base)) if rtype: rtype = I(iri.absolutize(rtype, base)) #Resource type might be set by syntax config if not rtype: rtype = syntaxtypemap.get(sect.xml_local) if rtype: output.add(rid, RDFTYPE, rtype) #Add the property for prop, val, subfield_dict in fields(sect): attrs = subfield_dict or {} fullprop = I(iri.absolutize(prop, propbase)) resinfo = AB_RESOURCE_PAT.match(val) if resinfo: val = resinfo.group(1) valtype = resinfo.group(3) if not val: val = output.generate_resource() if valtype: attrs[RDFTYPE] = valtype if fullprop in interp: val = interp[fullprop](val, rid=rid, fullprop=fullprop, base=base, model=output) if val is not None: output.add(rid, fullprop, val) else: output.add(rid, fullprop, val, attrs) return base