def _schema_nodes(self): """parse self._ontology_file into a graph""" name, ext = os.path.splitext(self._ontology_file) if ext in ['.ttl']: self._ontology_parser_function = \ lambda s: rdflib.Graph().parse(s, format='n3') else: self._ontology_parser_function = \ lambda s: pyRdfa().graph_from_source(s) if not self._ontology_parser_function: raise ValueError( "No function found to parse ontology. %s" % self.errorstring_base) if not self._ontology_file: raise ValueError( "No ontology file specified. %s" % self.errorstring_base) if not self.lexicon: raise ValueError( "No lexicon object assigned. %s" % self.errorstring_base) latest_file = self._read_schema() try: self.graph = self._ontology_parser_function(latest_file) except: raise IOError("Error parsing ontology at %s" % latest_file) for subj, pred, obj in self.graph: self.ontology[subj].append((pred, obj)) yield (subj, pred, obj)
def _process(self, graph, pgraph, baseURI, orig_source, media_type="", rdfa_version=None, embedded_rdf=False, space_preserve=True, vocab_expansion=False, vocab_cache=False, vocab_cache_report=False, refresh_vocab_cache=False, check_lite=False): from rdflib.plugins.parsers.pyRdfa import pyRdfa, Options from rdflib import Graph processor_graph = pgraph if pgraph is not None else Graph() self.options = Options(output_processor_graph=True, embedded_rdf=embedded_rdf, space_preserve=space_preserve, vocab_expansion=vocab_expansion, vocab_cache=vocab_cache, vocab_cache_report=vocab_cache_report, refresh_vocab_cache=refresh_vocab_cache, check_lite=check_lite) if media_type is None: media_type = "" processor = pyRdfa(self.options, base=baseURI, media_type=media_type, rdfa_version=rdfa_version) processor.graph_from_source(orig_source, graph=graph, pgraph=processor_graph, rdfOutput=False) # This may result in an exception if the graph parsing led to an error _check_error(processor_graph)
def _schema_nodes(self): """parse self._ontology_file into a graph""" name, ext = os.path.splitext(self._ontology_file) if ext in ['.ttl']: self._ontology_parser_function = lambda s: rdflib.Graph().parse( s, format='n3') else: self._ontology_parser_function = lambda s: pyRdfa( ).graph_from_source(s) if not self._ontology_parser_function: raise ValueError("No function found to parse ontology. %s" % errorstring_base) if not self._ontology_file: raise ValueError("No ontology file specified. %s" % errorstring_base) if not self.lexicon: raise ValueError("No lexicon object assigned. %s" % errorstring_base) latest_file = self._read_schema() try: self.graph = self._ontology_parser_function(latest_file) except: raise IOError("Error parsing ontology at %s" % latest_file) for subj, pred, obj in self.graph: self.ontology[subj].append((pred, obj)) yield (subj, pred, obj)
def __init__(self, source): super(CompoundGraph, self).__init__() try: self.microdata_graph = pyMicrodata().graph_from_source(source) except: self.microdata_graph = None try: self.rdfa_graph = pyRdfa().graph_from_source(source) except: self.rdfa_graph = None
def parse(self): """ Parse the RDFa input and store the processor and default graphs. The final media type in the class instance also updated. *Implementation note:* this method goes down into the "guts" of the RDFa parser plugin of `RDFLib`, instead of simply executing a simple parsing. The reason is that the parser does not "expose", on the top level, an extra "transformer" function that checks the RDFa 1.1 Lite features (and adds warning triples to the processor graph), and this can only be added to the parser using one step deeper into the plugin code. (See the :py:func:`rdflib.plugins.parsers.pyRdfa.transform.lite.lite_prune` function). """ transformers = [] if self.check_lite: from rdflib.plugins.parsers.pyRdfa.transform.lite import lite_prune transformers.append(lite_prune) options = Options(output_default_graph = True, output_processor_graph = True, transformers = transformers, vocab_expansion = self.vocab_expansion, embedded_rdf = self.embedded_rdf, add_informational_messages = True) processor = pyRdfa(options = options, base = self.base, media_type = self.media_type) processor.graph_from_source(self.uri, graph = self.default_graph, pgraph = self.processor_graph, rdfOutput = True) # Extracting some parameters for the error messages self.processor = processor
def main(eusol_id): """ Reads in all the accessions page stored in the data folder and print all the information gathered. :arg eusol_id: the eusol identifier of the genome to investigate. """ proc = pyRdfa() graph = rdflib.Graph() eusol_url = EUSOL_URL % eusol_id proc.graph_from_source(eusol_url, graph) # Agregate more information by querying the seeAlso page for sub, pred, obj in graph: if pred == RDFS['seeAlso'] or pred == RDFS2['seeAlso']: if DEBUG: print 'Getting more info at %s' % obj graph += pyRdfa().graph_from_source(obj) # Expand the graph info by retrieving the information from the # cropontology website. for sub, pred, obj in graph: #print '--', sub, pred, obj if isinstance(pred, rdflib.term.URIRef) \ and 'cropontology' in str(pred): stream = urllib2.urlopen(pred) text = stream.read() stream.close() text = text.replace('%3A', ':') graph = graph + graph.parse(StringIO.StringIO(text), format="nt") if DEBUG: print '\nGraph contains:' for sub, pred, obj in graph: print sub, pred, obj # Temporary hack until the version in test is the same as the version in # prod eusol_url = EUSOL2_URL % eusol_id subjects = [eusol_url] info = {} info = get_info_accession(graph, eusol_url, info) # Dynamically retrieve the CGN identifier from the EU-SOL information if 'donor accession number' in info: cgn_id = info['donor accession number'][eusol_url][0] cgn_url = CGN_URL % cgn_id info = get_info_accession(graph, cgn_url, info) subjects.append(cgn_url) images = get_images_in_graph(graph, subjects) origins = set() origins_info = {} for trait in info: for source in info[trait]: url = urllib.splitquery(source)[0].rsplit('/')[2] if url not in origins_info: origins.add(url) origins_info[url] = source return (info, origins, origins_info, images)
def __init__(self, graph, doc_lines, url=""): super(RdfValidator, self).__init__(graph, doc_lines, url=url) self.parser = pyRdfa() # use the rdfa half of the compound graph self.graph = self.graph.rdfa_graph
def __init__(self, graph, doc_lines, url=""): super(RdfValidator, self).__init__(graph, doc_lines, url=url) self.parser = pyRdfa() self.graph = self.graph.rdfa_graph # use the rdfa half of the compound graph
xslt_transform = None limit = 0 if not args.testnumber else int(args.testnumber) fobj = open(args.outfile, 'w') if args.outfile else StringIO() try: for doc in docs(args.database, args.collection, limit): html_doc = cmdl.main(doc) verbose(html_doc) if not (args.transform or args.verbose): print html_doc else: if not xslt_transform: xslt_transform = etree.XSLT(etree.XML(open(args.transform).read())) etree.XML(html_doc) rdfa_doc = str(xslt_transform(etree.XML(html_doc))) verbose(rdfa_doc) _, tmpf = tempfile.mkstemp(suffix='.html') with open(tmpf, 'w') as _tf: _tf.write(rdfa_doc) ttl_doc = pyRdfa.pyRdfa().rdf_from_source(tmpf, rdfOutput=True) #os.remove(tmpf) verbose(ttl_doc) fobj.write(ttl_doc) finally: fobj.close()
xslt_transform = None limit = 0 if not args.testnumber else int(args.testnumber) fobj = open(args.outfile, 'w') if args.outfile else StringIO() try: for doc in docs(args.database, args.collection, limit): html_doc = cmdl.main(doc) verbose(html_doc) if not (args.transform or args.verbose): print html_doc else: if not xslt_transform: xslt_transform = etree.XSLT( etree.XML(open(args.transform).read())) etree.XML(html_doc) rdfa_doc = str(xslt_transform(etree.XML(html_doc))) verbose(rdfa_doc) _, tmpf = tempfile.mkstemp(suffix='.html') with open(tmpf, 'w') as _tf: _tf.write(rdfa_doc) ttl_doc = pyRdfa.pyRdfa().rdf_from_source(tmpf, rdfOutput=True) #os.remove(tmpf) verbose(ttl_doc) fobj.write(ttl_doc) finally: fobj.close()