Beispiel #1
0
    def _schema_nodes(self):
        """parse self._ontology_file into a graph"""
        name, ext = os.path.splitext(self._ontology_file)
        if ext in ['.ttl']:
            self._ontology_parser_function = \
                lambda s: rdflib.Graph().parse(s, format='n3')
        else:
            self._ontology_parser_function = \
                lambda s: pyRdfa().graph_from_source(s)
        if not self._ontology_parser_function:
            raise ValueError(
                "No function found to parse ontology. %s" %
                self.errorstring_base)
        if not self._ontology_file:
            raise ValueError(
                "No ontology file specified. %s" % self.errorstring_base)
        if not self.lexicon:
            raise ValueError(
                "No lexicon object assigned. %s" % self.errorstring_base)

        latest_file = self._read_schema()

        try:
            self.graph = self._ontology_parser_function(latest_file)
        except:
            raise IOError("Error parsing ontology at %s" % latest_file)

        for subj, pred, obj in self.graph:
            self.ontology[subj].append((pred, obj))
            yield (subj, pred, obj)
Beispiel #2
0
    def _process(self, graph, pgraph, baseURI, orig_source,
                 media_type="",
                 rdfa_version=None,
                 embedded_rdf=False,
                 space_preserve=True,
                 vocab_expansion=False,
                 vocab_cache=False,
                 vocab_cache_report=False,
                 refresh_vocab_cache=False,
                 check_lite=False):
        from rdflib.plugins.parsers.pyRdfa import pyRdfa, Options
        from rdflib import Graph
        processor_graph = pgraph if pgraph is not None else Graph()
        self.options = Options(output_processor_graph=True,
                               embedded_rdf=embedded_rdf,
                               space_preserve=space_preserve,
                               vocab_expansion=vocab_expansion,
                               vocab_cache=vocab_cache,
                               vocab_cache_report=vocab_cache_report,
                               refresh_vocab_cache=refresh_vocab_cache,
                               check_lite=check_lite)

        if media_type is None:
            media_type = ""
        processor = pyRdfa(self.options,
                           base=baseURI,
                           media_type=media_type,
                           rdfa_version=rdfa_version)
        processor.graph_from_source(orig_source, graph=graph, pgraph=processor_graph, rdfOutput=False)
        # This may result in an exception if the graph parsing led to an error
        _check_error(processor_graph)
Beispiel #3
0
    def _schema_nodes(self):
        """parse self._ontology_file into a graph"""
        name, ext = os.path.splitext(self._ontology_file)
        if ext in ['.ttl']:
            self._ontology_parser_function = lambda s: rdflib.Graph().parse(
                s, format='n3')
        else:
            self._ontology_parser_function = lambda s: pyRdfa(
            ).graph_from_source(s)
        if not self._ontology_parser_function:
            raise ValueError("No function found to parse ontology. %s" %
                             errorstring_base)
        if not self._ontology_file:
            raise ValueError("No ontology file specified. %s" %
                             errorstring_base)
        if not self.lexicon:
            raise ValueError("No lexicon object assigned. %s" %
                             errorstring_base)

        latest_file = self._read_schema()

        try:
            self.graph = self._ontology_parser_function(latest_file)
        except:
            raise IOError("Error parsing ontology at %s" % latest_file)

        for subj, pred, obj in self.graph:
            self.ontology[subj].append((pred, obj))
            yield (subj, pred, obj)
Beispiel #4
0
    def __init__(self, source):
        super(CompoundGraph, self).__init__()
        try:
            self.microdata_graph = pyMicrodata().graph_from_source(source)
        except:
            self.microdata_graph = None

        try:
            self.rdfa_graph = pyRdfa().graph_from_source(source)
        except:
            self.rdfa_graph = None
Beispiel #5
0
	def parse(self):
		"""
		Parse the RDFa input and store the processor and default graphs. The final media type in the class instance also updated.

		*Implementation note:* this method goes down into the "guts" of the RDFa parser plugin of `RDFLib`, instead of simply executing a simple parsing. The reason is that the parser does not "expose", on the top level, an extra "transformer" function that checks the RDFa 1.1 Lite features (and adds warning triples to the processor graph), and this can only be added to the parser using one step deeper into the plugin code. (See the :py:func:`rdflib.plugins.parsers.pyRdfa.transform.lite.lite_prune` function).
		"""
		transformers = []
		if self.check_lite:
			from rdflib.plugins.parsers.pyRdfa.transform.lite import lite_prune
			transformers.append(lite_prune)

		options = Options(output_default_graph = True, output_processor_graph = True,
						  transformers    = transformers,
						  vocab_expansion = self.vocab_expansion,
						  embedded_rdf    = self.embedded_rdf,
						  add_informational_messages = True)
		processor = pyRdfa(options = options, base = self.base, media_type = self.media_type)
		processor.graph_from_source(self.uri, graph = self.default_graph, pgraph = self.processor_graph, rdfOutput = True)
		# Extracting some parameters for the error messages
		self.processor 	= processor
Beispiel #6
0
def main(eusol_id):
    """ Reads in all the accessions page stored in the data folder and
    print all the information gathered.

    :arg eusol_id: the eusol identifier of the genome to investigate.

    """
    proc = pyRdfa()
    graph = rdflib.Graph()
    eusol_url = EUSOL_URL % eusol_id
    proc.graph_from_source(eusol_url, graph)

    # Agregate more information by querying the seeAlso page
    for sub, pred, obj in graph:
        if pred == RDFS['seeAlso'] or pred == RDFS2['seeAlso']:
            if DEBUG:
                print 'Getting more info at %s' % obj
            graph += pyRdfa().graph_from_source(obj)

    # Expand the graph info by retrieving the information from the
    # cropontology website.
    for sub, pred, obj in graph:
        #print '--', sub, pred, obj
        if isinstance(pred, rdflib.term.URIRef) \
                and 'cropontology' in str(pred):
            stream = urllib2.urlopen(pred)
            text = stream.read()
            stream.close()
            text = text.replace('%3A', ':')
            graph = graph + graph.parse(StringIO.StringIO(text), format="nt")

    if DEBUG:
        print '\nGraph contains:'
        for sub, pred, obj in graph:
            print sub, pred, obj
    # Temporary hack until the version in test is the same as the version in
    # prod
    eusol_url = EUSOL2_URL % eusol_id

    subjects = [eusol_url]
    info = {}
    info = get_info_accession(graph, eusol_url, info)

    # Dynamically retrieve the CGN identifier from the EU-SOL information
    if 'donor accession number' in info:
        cgn_id = info['donor accession number'][eusol_url][0]
        cgn_url = CGN_URL % cgn_id
        info = get_info_accession(graph, cgn_url, info)
        subjects.append(cgn_url)

    images = get_images_in_graph(graph, subjects)

    origins = set()
    origins_info = {}
    for trait in info:
        for source in info[trait]:
            url = urllib.splitquery(source)[0].rsplit('/')[2]
            if url not in origins_info:
                origins.add(url)
                origins_info[url] = source

    return (info, origins, origins_info, images)
Beispiel #7
0
 def __init__(self, graph, doc_lines, url=""):
     super(RdfValidator, self).__init__(graph, doc_lines, url=url)
     self.parser = pyRdfa()
     # use the rdfa half of the compound graph
     self.graph = self.graph.rdfa_graph
Beispiel #8
0
 def __init__(self, graph, doc_lines, url=""):
     super(RdfValidator, self).__init__(graph, doc_lines, url=url)
     self.parser = pyRdfa()
     self.graph = self.graph.rdfa_graph  # use the rdfa half of the compound graph
Beispiel #9
0
def main(eusol_id):
    """ Reads in all the accessions page stored in the data folder and
    print all the information gathered.

    :arg eusol_id: the eusol identifier of the genome to investigate.

    """
    proc = pyRdfa()
    graph = rdflib.Graph()
    eusol_url = EUSOL_URL % eusol_id
    proc.graph_from_source(eusol_url, graph)

    # Agregate more information by querying the seeAlso page
    for sub, pred, obj in graph:
        if pred == RDFS['seeAlso'] or pred == RDFS2['seeAlso']:
            if DEBUG:
                print 'Getting more info at %s' % obj
            graph += pyRdfa().graph_from_source(obj)

    # Expand the graph info by retrieving the information from the
    # cropontology website.
    for sub, pred, obj in graph:
        #print '--', sub, pred, obj
        if isinstance(pred, rdflib.term.URIRef) \
                and 'cropontology' in str(pred):
            stream = urllib2.urlopen(pred)
            text = stream.read()
            stream.close()
            text = text.replace('%3A', ':')
            graph = graph + graph.parse(StringIO.StringIO(text), format="nt")

    if DEBUG:
        print '\nGraph contains:'
        for sub, pred, obj in graph:
            print sub, pred, obj
    # Temporary hack until the version in test is the same as the version in
    # prod
    eusol_url = EUSOL2_URL % eusol_id

    subjects = [eusol_url]
    info = {}
    info = get_info_accession(graph, eusol_url, info)

    # Dynamically retrieve the CGN identifier from the EU-SOL information
    if 'donor accession number' in info:
        cgn_id = info['donor accession number'][eusol_url][0]
        cgn_url = CGN_URL % cgn_id
        info = get_info_accession(graph, cgn_url, info)
        subjects.append(cgn_url)

    images = get_images_in_graph(graph, subjects)

    origins = set()
    origins_info = {}
    for trait in info:
        for source in info[trait]:
            url = urllib.splitquery(source)[0].rsplit('/')[2]
            if url not in origins_info:
                origins.add(url)
                origins_info[url] = source

    return (info, origins, origins_info, images)
Beispiel #10
0
    xslt_transform = None
    limit = 0 if not args.testnumber else int(args.testnumber)

    fobj = open(args.outfile, 'w') if args.outfile else StringIO()

    try:
        for doc in docs(args.database, args.collection, limit):
            html_doc =  cmdl.main(doc)
            verbose(html_doc)

            if not (args.transform or args.verbose):
                print html_doc
            else:
                if not xslt_transform:
                    xslt_transform = etree.XSLT(etree.XML(open(args.transform).read()))
                etree.XML(html_doc)
                rdfa_doc = str(xslt_transform(etree.XML(html_doc)))
                verbose(rdfa_doc)
                _, tmpf = tempfile.mkstemp(suffix='.html')
                with open(tmpf, 'w') as _tf:
                    _tf.write(rdfa_doc)
                ttl_doc = pyRdfa.pyRdfa().rdf_from_source(tmpf, rdfOutput=True)
                #os.remove(tmpf)
                verbose(ttl_doc)

                fobj.write(ttl_doc)
    finally:
        fobj.close()


Beispiel #11
0
    xslt_transform = None
    limit = 0 if not args.testnumber else int(args.testnumber)

    fobj = open(args.outfile, 'w') if args.outfile else StringIO()

    try:
        for doc in docs(args.database, args.collection, limit):
            html_doc = cmdl.main(doc)
            verbose(html_doc)

            if not (args.transform or args.verbose):
                print html_doc
            else:
                if not xslt_transform:
                    xslt_transform = etree.XSLT(
                        etree.XML(open(args.transform).read()))
                etree.XML(html_doc)
                rdfa_doc = str(xslt_transform(etree.XML(html_doc)))
                verbose(rdfa_doc)
                _, tmpf = tempfile.mkstemp(suffix='.html')
                with open(tmpf, 'w') as _tf:
                    _tf.write(rdfa_doc)
                ttl_doc = pyRdfa.pyRdfa().rdf_from_source(tmpf, rdfOutput=True)
                #os.remove(tmpf)
                verbose(ttl_doc)

                fobj.write(ttl_doc)
    finally:
        fobj.close()