Exemple #1
0
    def _schema_nodes(self):
        """parse self._ontology_file into a graph"""

        name, ext = os.path.splitext(self._ontology_file)
        if ext in ['.ttl']:
            self._ontology_parser_function = lambda s: rdflib.Graph().parse(s, format='n3')
        else:
            self._ontology_parser_function = lambda s: pyRdfa().graph_from_source(s)

        errorstring = "Are you calling parse_ontology from the base SchemaDef class?"
        if not self._ontology_parser_function:
            raise ValueError("No function found to parse ontology. %s" % errorstring)
        if not self._ontology_file:
            raise ValueError("No ontology file specified. %s" % errorstring)
        if not self.lexicon:
            raise ValueError("No lexicon object assigned. %s" % errorstring)

        latest_file = self._pull_standard()

        try:
            self.graph = self._ontology_parser_function(latest_file)
        except:
            raise IOError("Error parsing ontology at %s" % latest_file)

        for subj, pred, obj in self.graph:
            self.ontology[subj].append((pred, obj))
            yield (subj, pred, obj)
Exemple #2
0
def extrair_rdfa(url):
    options = Options(embedded_rdf=True)
    #r = requests.get(url)
    #print pyRdfa(options=options).rdf_from_source(url,outputFormat='pretty-xml')
    g1 = pyRdfa(options=options).rdf_from_source(url,
                                                 outputFormat='pretty-xml')
    #print g1#g2 = pyRdfa(options=options).rdf_from_source('http://rbarbosa.me/ex.html',outputFormat='pretty-xml')
    g = Graph()
    g.parse(io.BytesIO(g1))
    return g
Exemple #3
0
    def __init__(self, source):
        super(CompoundGraph, self).__init__()
        try:
            self.microdata_graph = pyMicrodata().graph_from_source(source)
        except:
            self.microdata_graph = None

        try:
            self.rdfa_graph = pyRdfa().graph_from_source(source)
        except:
            self.rdfa_graph = None
Exemple #4
0
	def parse(self, source, graph,
			  pgraph                 = None,
			  embedded_rdf           = True,
			  vocab_expansion        = False,
			  vocab_cache            = False,
			  rdfOutput              = False) :
		"""
		@param source: one of the input sources that the RDFLib package defined
		@type source: InputSource class instance
		@param graph: target graph for the triples; output graph, in RDFa spec. parlance
		@type graph: RDFLib Graph
		@keyword pgraph: target for error and warning triples; processor graph, in RDFa spec. parlance. If set to None, these triples are ignored
		@type pgraph: RDFLib Graph
		@keyword embedded_rdf: some formats allow embedding RDF in other formats: (X)HTML can contain turtle in a special <script> element, SVG can have RDF/XML embedded in a <metadata> element. This flag controls whether those triples should be interpreted and added to the output graph. Some languages (e.g., SVG) require this, and the flag is ignored.
		@type embedded_rdf: Boolean
		@keyword vocab_expansion: whether the RDFa @vocab attribute should also mean vocabulary expansion (see the RDFa 1.1 spec for further details)
		@type vocab_expansion: Boolean
		@keyword vocab_cache: in case vocab expansion is used, whether the expansion data (i.e., vocabulary) should be cached locally. This requires the ability for the local application to write on the local file system
		@type vocab_chache: Boolean
		@keyword rdfOutput: whether Exceptions should be catched and added, as triples, to the processor graph, or whether they should be raised.
		@type rdfOutput: Boolean
		"""
                if isinstance(source, StringInputSource) :
                        orig_source = source.getByteStream()
                elif isinstance(source, URLInputSource) :
                        orig_source = source.url
                elif isinstance(source, FileInputSource) :
                        orig_source = source.file.name
                        source.file.close()
                baseURI      = source.getPublicId()

                # The RDFa part
                from pyRdfa import pyRdfa, Options				
                self.options = Options(output_processor_graph = (pgraph != None),
                                                           embedded_rdf           = embedded_rdf,
                                                           vocab_expansion        = vocab_expansion,
                                                           vocab_cache            = vocab_cache)

                processor = pyRdfa(self.options, base = baseURI, media_type = 'text/html', rdfa_version = '1.1')
                processor.graph_from_source(orig_source, graph=graph, pgraph=pgraph, rdfOutput = rdfOutput)

                # The Microdata part
                try: 
                    from pyMicrodata import pyMicrodata
                    processor    = pyMicrodata(base = baseURI, vocab_expansion = vocab_expansion, vocab_cache = vocab_cache)
                    processor.graph_from_source(orig_source, graph=graph, rdfOutput = rdfOutput)
                except ImportError:
                    # no pyMicrodata installed!
                    pass
Exemple #5
0
	def parse(self, source, graph,
			  pgraph                 = None,
			  media_type             = None,
			  rdfa_version           = None,
			  embedded_rdf           = False,
			  vocab_expansion        = False,
			  vocab_cache            = False,
			  rdfOutput              = False) :
		"""
		@param source: one of the input sources that the RDFLib package defined
		@type source: InputSource class instance
		@param graph: target graph for the triples; output graph, in RDFa spec. parlance
		@type graph: RDFLib Graph
		@keyword pgraph: target for error and warning triples; processor graph, in RDFa spec. parlance. If set to None, these triples are ignored
		@type pgraph: RDFLib Graph
		@keyword media_type: explicit setting of the preferred media type (a.k.a. content type) of the the RDFa source. None means the content type of the HTTP result is used, or a guess is made based on the suffix of a file
		@type media_type: string
		@keyword rdfa_version: 1.0 or 1.1. If the value is None, then, by default, 1.1 is used unless the source has explicit signals to use 1.0 (e.g., using a @version attribute, using a DTD set up for 1.0, etc)
		@type rdfa_version: string
		@keyword embedded_rdf: some formats allow embedding RDF in other formats: (X)HTML can contain turtle in a special <script> element, SVG can have RDF/XML embedded in a <metadata> element. This flag controls whether those triples should be interpreted and added to the output graph. Some languages (e.g., SVG) require this, and the flag is ignored.
		@type embedded_rdf: Boolean
		@keyword vocab_expansion: whether the RDFa @vocab attribute should also mean vocabulary expansion (see the RDFa 1.1 spec for further details)
		@type vocab_expansion: Boolean
		@keyword vocab_cache: in case vocab expansion is used, whether the expansion data (i.e., vocabulary) should be cached locally. This requires the ability for the local application to write on the local file system
		@type vocab_chache: Boolean
		@keyword rdfOutput: whether Exceptions should be catched and added, as triples, to the processor graph, or whether they should be raised.
		@type rdfOutput: Boolean
		"""
                from pyRdfa import pyRdfa, Options

                if isinstance(source, StringInputSource) :
                        orig_source = source.getByteStream()
                elif isinstance(source, URLInputSource) :
                        orig_source = source.url
                elif isinstance(source, FileInputSource) :
                        orig_source = source.file.name
                        source.file.close()

                self.options = Options(output_processor_graph = (pgraph != None),
                                                           embedded_rdf           = embedded_rdf,
                                                           vocab_expansion        = vocab_expansion,
                                                           vocab_cache            = vocab_cache)

                baseURI      = source.getPublicId()
                processor    = pyRdfa(self.options, base = baseURI, media_type = media_type, rdfa_version = rdfa_version)
                processor.graph_from_source(orig_source, graph=graph, pgraph=pgraph, rdfOutput = rdfOutput)
Exemple #6
0
 def __init__(self, url, impl):
     self.ns_ont = {}
     self.attribs_by_class = defaultdict(list)
     self.ontologies = [] # are these initializations necessary
     self.attributes = []
     self.source = url
     self.impl = impl
     if 'rdfa' == impl:
         self.range_uri = "http://www.w3.org/2000/01/rdf-schema#range"
         self.domain_uri = "http://www.w3.org/2000/01/rdf-schema#domain"
         self.type_uri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
         self.subclass_uri = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
         self.parser = pyRdfa()
     elif 'microdata' == impl:
         self.range_uri = "http://schema.org/range"
         self.domain_uri = "http://schema.org/domain"
         self.type_uri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
         self.subclass_uri = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
         self.parser = pyMicrodata()
     return super(Graph, self).__init__()
    def parse(self):
        """
		Parse the RDFa input and store the processor and default graphs. The final media type is also updated.
		"""
        transformers = []
        if self.rdfa_lite:
            from pyRdfa.transform.lite import lite_prune
            transformers.append(lite_prune)

        options = Options(output_default_graph=True,
                          output_processor_graph=True,
                          transformers=transformers,
                          vocab_expansion=self.vocab_expansion,
                          embedded_rdf=self.embedded_rdf,
                          add_informational_messages=True)
        processor = pyRdfa(options=options,
                           base=self.base,
                           media_type=self.media_type)
        processor.graph_from_source(self.uri,
                                    graph=self.default_graph,
                                    pgraph=self.processor_graph,
                                    rdfOutput=True)
        # Extracting some parameters for the error messages
        self.processor = processor
Exemple #8
0
    def _schema_nodes(self):
        """parse self._ontology_file into a graph"""
        name, ext = os.path.splitext(self._ontology_file)
        if ext in [".ttl"]:
            self._ontology_parser_function = lambda s: rdflib.Graph().parse(s, format="n3")
        else:
            self._ontology_parser_function = lambda s: pyRdfa().graph_from_source(s)
        if not self._ontology_parser_function:
            raise ValueError("No function found to parse ontology. %s" % errorstring_base)
        if not self._ontology_file:
            raise ValueError("No ontology file specified. %s" % errorstring_base)
        if not self.lexicon:
            raise ValueError("No lexicon object assigned. %s" % errorstring_base)

        latest_file = self._read_schema()

        try:
            self.graph = self._ontology_parser_function(latest_file)
        except:
            raise IOError("Error parsing ontology at %s" % latest_file)

        for subj, pred, obj in self.graph:
            self.ontology[subj].append((pred, obj))
            yield (subj, pred, obj)
Exemple #9
0
def return_graph(uri, options, newCache=False):
    """Parse a file, and return an RDFLib Graph. The URI's content type is checked and either one of
	RDFLib's parsers is invoked (for the Turtle, RDF/XML, and N Triple cases) or a separate RDFa processing is invoked
	on the RDFa content.
			
	The Accept header of the HTTP request gives a preference to Turtle, followed by RDF/XML and then HTML (RDFa), in case content negotiation is used.
	
	This function is used to retreive the vocabulary file and turn it into an RDFLib graph.
	
	@param uri: URI for the graph
	@param options: used as a place where warnings can be sent
	@param newCache: in case this is used with caching, whether a new cache is generated; that modifies the warning text
	@return: A tuple consisting of an RDFLib Graph instance and an expiration date); None if the dereferencing or the parsing was unsuccessful
	"""
    def return_to_cache(msg):
        if newCache:
            options.add_warning(err_unreachable_vocab % uri,
                                warning_type=VocabReferenceError)
        else:
            options.add_warning(err_outdated_cache % uri,
                                warning_type=VocabReferenceError)

    retval = None
    expiration_date = None
    content = None

    try:
        content = URIOpener(
            uri, {
                'Accept':
                'text/html;q=0.8, application/xhtml+xml;q=0.8, text/turtle;q=1.0, application/rdf+xml;q=0.9'
            })
    except HTTPError:
        (type, value, traceback) = sys.exc_info()
        return_to_cache(value)
        return (None, None)
    except RDFaError:
        (type, value, traceback) = sys.exc_info()
        return_to_cache(value)
        return (None, None)
    except Exception:
        (type, value, traceback) = sys.exc_info()
        return_to_cache(value)
        return (None, None)

    # Store the expiration date of the newly accessed data
    expiration_date = content.expiration_date

    if content.content_type == MediaTypes.turtle:
        try:
            retval = Graph()
            retval.parse(content.data, format="n3")
        except:
            (type, value, traceback) = sys.exc_info()
            options.add_warning(err_unparsable_Turtle_vocab % (uri, value))
    elif content.content_type == MediaTypes.rdfxml:
        try:
            retval = Graph()
            retval.parse(content.data)
        except:
            (type, value, traceback) = sys.exc_info()
            options.add_warning(err_unparsable_Turtle_vocab % (uri, value))
    elif content.content_type == MediaTypes.nt:
        try:
            retval = Graph()
            retval.parse(content.data, format="nt")
        except:
            (type, value, traceback) = sys.exc_info()
            options.add_warning(err_unparsable_ntriples_vocab % (uri, value))
    elif content.content_type in [
            MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml
    ] or xml_application_media_type.match(content.content_type) != None:
        try:
            from pyRdfa import pyRdfa
            from pyRdfa.options import Options
            options = Options()
            retval = pyRdfa(options).graph_from_source(content.data)
        except:
            (type, value, traceback) = sys.exc_info()
            options.add_warning(err_unparsable_rdfa_vocab % (uri, value))
    else:
        options.add_warning(err_unrecognised_vocab_type %
                            (uri, content.content_type))

    return (retval, expiration_date)
Exemple #10
0
 def _parse_func(s):
     return pyRdfa().graph_from_source(s)
Exemple #11
0
        try:
            retval = Graph()
            retval.parse(content.data, format="nt")
        except:
            (type, value, traceback) = sys.exc_info()
            options.add_warning(err_unparsable_ntriples_vocab % (uri, value))
    elif (
        content.content_type in [MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml]
        or xml_application_media_type.match(content.content_type) != None
    ):
        try:
            from pyRdfa import pyRdfa
            from pyRdfa.options import Options

            options = Options()
            retval = pyRdfa(options).graph_from_source(content.data)
        except:
            (type, value, traceback) = sys.exc_info()
            options.add_warning(err_unparsable_rdfa_vocab % (uri, value))
    else:
        options.add_warning(err_unrecognised_vocab_type % (uri, content.content_type))

    return (retval, expiration_date)


############################################################################################
type = ns_rdf["type"]
Property = ns_rdf["Property"]
Class = ns_rdfs["Class"]
subClassOf = ns_rdfs["subClassOf"]
subPropertyOf = ns_rdfs["subPropertyOf"]
Exemple #12
0
 def __init__(self, graph, doc_lines, url=""):
     super(RdfValidator, self).__init__(graph, doc_lines, url=url)
     self.parser = pyRdfa()
     self.graph = self.graph.rdfa_graph  # use the rdfa half of the compound graph
Exemple #13
0
 def __init__(self, graph, doc_lines):
     super(RdfValidator, self).__init__(graph, doc_lines)
     self.parser = pyRdfa()
     self.graph = self.graph.rdfa_graph # use the rdfa half of the compound graph
     log.info("in RdfValidator init %s" % self.graph)
Exemple #14
0
                output_processor_graph = True
            elif a == "default":
                output_default_graph = True
                output_processor_graph = False
        else:
            usage()
            sys.exit(1)
except:
    usage()
    sys.exit(1)

options = Options(output_default_graph=output_default_graph,
                  output_processor_graph=output_processor_graph,
                  space_preserve=space_preserve,
                  transformers=extras,
                  embedded_rdf=embedded_rdf,
                  vocab_expansion=vocab_expansion,
                  vocab_cache=vocab_cache,
                  vocab_cache_report=vocab_cache_report,
                  refresh_vocab_cache=refresh_vocab_cache)

processor = pyRdfa(options, base)
if len(value) >= 1:
    print processor.rdf_from_sources(value,
                                     outputFormat=format,
                                     rdfOutput=rdfOutput)
else:
    print processor.rdf_from_source(sys.stdin,
                                    outputFormat=format,
                                    rdfOutput=rdfOutput)
Exemple #15
0
 def test_url(self):
     g = pyRdfa().rdf_from_source('http://oreilly.com/catalog/9780596516499/')
     self.assert_(self.target1 in g)
Exemple #16
0
def return_graph(uri, options, newCache = False) :
	"""Parse a file, and return an RDFLib Graph. The URI's content type is checked and either one of
	RDFLib's parsers is invoked (for the Turtle, RDF/XML, and N Triple cases) or a separate RDFa processing is invoked
	on the RDFa content.

	The Accept header of the HTTP request gives a preference to Turtle, followed by RDF/XML and then HTML (RDFa), in case content negotiation is used.

	This function is used to retreive the vocabulary file and turn it into an RDFLib graph.

	@param uri: URI for the graph
	@param options: used as a place where warnings can be sent
	@param newCache: in case this is used with caching, whether a new cache is generated; that modifies the warning text
	@return: A tuple consisting of an RDFLib Graph instance and an expiration date); None if the dereferencing or the parsing was unsuccessful
	"""
	def return_to_cache(msg) :
		if newCache :
			options.add_warning(err_unreachable_vocab % uri, warning_type=VocabReferenceError)
		else :
			options.add_warning(err_outdated_cache % uri, warning_type=VocabReferenceError)

	retval 			= None
	expiration_date = None
	content			= None

	try :
		content = URIOpener(uri,
							{'Accept' : 'text/html;q=0.8, application/xhtml+xml;q=0.8, text/turtle;q=1.0, application/rdf+xml;q=0.9'})
	except HTTPError :
		(type,value,traceback) = sys.exc_info()
		return_to_cache(value)
		return (None,None)
	except RDFaError :
		(type,value,traceback) = sys.exc_info()
		return_to_cache(value)
		return (None,None)
	except Exception :
		(type,value,traceback) = sys.exc_info()
		return_to_cache(value)
		return (None,None)

	# Store the expiration date of the newly accessed data
	expiration_date = content.expiration_date

	if content.content_type == MediaTypes.turtle :
		try :
			retval = Graph()
			retval.parse(content.data, format="n3")
		except :
			(type,value,traceback) = sys.exc_info()
			options.add_warning(err_unparsable_Turtle_vocab % (uri,value))
	elif content.content_type == MediaTypes.rdfxml :
		try :
			retval = Graph()
			retval.parse(content.data)
		except :
			(type,value,traceback) = sys.exc_info()
			options.add_warning(err_unparsable_Turtle_vocab % (uri,value))
	elif content.content_type == MediaTypes.nt :
		try :
			retval = Graph()
			retval.parse(content.data, format="nt")
		except :
			(type,value,traceback) = sys.exc_info()
			options.add_warning(err_unparsable_ntriples_vocab % (uri,value))
	elif content.content_type in [MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml] or xml_application_media_type.match(content.content_type) != None :
		try :
			from pyRdfa import pyRdfa
			from pyRdfa.options	import Options
			options = Options()
			retval = pyRdfa(options).graph_from_source(content.data)
		except :
			(type,value,traceback) = sys.exc_info()
			options.add_warning(err_unparsable_rdfa_vocab % (uri,value))
	else :
		options.add_warning(err_unrecognised_vocab_type % (uri, content.content_type))

	return (retval, expiration_date)
Exemple #17
0
            options.add_warning(err_unparsable_Turtle_vocab % (uri, value))
    elif content.content_type == MediaTypes.nt:
        try:
            retval = Graph()
            retval.parse(content.data, format="nt")
        except:
            (type, value, traceback) = sys.exc_info()
            options.add_warning(err_unparsable_ntriples_vocab % (uri, value))
    elif content.content_type in [
            MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml
    ] or xml_application_media_type.match(content.content_type) != None:
        try:
            from pyRdfa import pyRdfa
            from pyRdfa.options import Options
            options = Options()
            retval = pyRdfa(options).graph_from_source(content.data)
        except:
            (type, value, traceback) = sys.exc_info()
            options.add_warning(err_unparsable_rdfa_vocab % (uri, value))
    else:
        options.add_warning(err_unrecognised_vocab_type %
                            (uri, content.content_type))

    return (retval, expiration_date)


############################################################################################
type = ns_rdf["type"]
Property = ns_rdf["Property"]
Class = ns_rdfs["Class"]
subClassOf = ns_rdfs["subClassOf"]
Exemple #18
0
args = parser.parse_args()

EXLIST = []
for ex in args.example:
    EXLIST.extend(ex)



import io
import re
import rdflib
from rdflib.serializer import Serializer
from rdflib.parser import Parser

from pyRdfa import pyRdfa
RDFaProcessor = pyRdfa("")


from schemaexamples import SchemaExamples, Example


def validate():
    COUNT = 0
    ERRORCOUNT = 0

    SchemaExamples.loadExamplesFiles("default")
    print("Loaded %d examples " % (SchemaExamples.count()))

    print("Processing")

Exemple #19
0
 def test_file(self):
     g = pyRdfa().rdf_from_source('test/rdfa/oreilly.html')
     self.assert_(self.target2.encode('utf-8') in g)
Exemple #20
0
 def test_file(self):
     g = pyRdfa().rdf_from_source('test/rdfa/oreilly.html')
     self.assert_(self.target2 in g)
    def test_templates_course_detail_rdfa(self):
        """
        Extract RDFa tags from the HTML markup and check that it is complete as expected.
        """
        # Create organizations
        main_organization = OrganizationFactory(page_title="Main org",
                                                fill_logo=True,
                                                should_publish=True)
        other_organization = OrganizationFactory(page_title="Other org",
                                                 fill_logo=True,
                                                 should_publish=True)

        # Create persons
        author1 = PersonFactory(page_title="François", fill_portrait=True)
        placeholder = author1.extended_object.placeholders.get(slot="bio")
        add_plugin(
            language="en",
            placeholder=placeholder,
            plugin_type="PlainTextPlugin",
            body="La bio de François",
        )
        author2 = PersonFactory(page_title="Jeanne",
                                fill_portrait=True,
                                should_publish=True)

        # Create a course with cover image, team and organizations
        licence_content, licence_participation = LicenceFactory.create_batch(2)
        course = CourseFactory(
            code="abcde",
            effort=[3, "hour"],
            page_title="Very interesting course",
            fill_cover=True,
            fill_organizations=[main_organization, other_organization],
            fill_team=[author1, author2],
            fill_licences=[
                ("course_license_content", licence_content),
                ("course_license_participation", licence_participation),
            ],
        )

        # Add an introduction to the course
        placeholder = course.extended_object.placeholders.get(
            slot="course_introduction")
        add_plugin(
            language="en",
            placeholder=placeholder,
            plugin_type="PlainTextPlugin",
            body="Introduction to interesting course",
        )

        # Create an ongoing open course run that will be published (created before
        # publishing the page)
        now = datetime(2030, 6, 15, tzinfo=timezone.utc)
        CourseRunFactory(
            direct_course=course,
            start=datetime(2030, 6, 30, tzinfo=timezone.utc),
            end=datetime(2030, 8, 1, tzinfo=timezone.utc),
            enrollment_start=datetime(2030, 6, 14, tzinfo=timezone.utc),
            enrollment_end=datetime(2030, 6, 16, tzinfo=timezone.utc),
            languages=["en", "fr"],
        )
        CourseRunFactory(
            direct_course=course,
            start=datetime(2030, 6, 1, tzinfo=timezone.utc),
            end=datetime(2030, 7, 10, tzinfo=timezone.utc),
            enrollment_start=datetime(2030, 6, 13, tzinfo=timezone.utc),
            enrollment_end=datetime(2030, 6, 20, tzinfo=timezone.utc),
            languages=["de"],
        )

        author1.extended_object.publish("en")
        course.extended_object.publish("en")

        url = course.extended_object.get_absolute_url()
        with mock.patch.object(timezone, "now", return_value=now):
            response = self.client.get(url)
        self.assertEqual(response.status_code, 200)

        processor = pyRdfa()
        content = str(response.content)
        parser = html5lib.HTMLParser(
            tree=html5lib.treebuilders.getTreeBuilder("dom"))
        dom = parser.parse(io.StringIO(content))
        graph = processor.graph_from_DOM(dom)

        # Retrieve the course top node (body)
        (subject, ) = graph.subjects(
            URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
            URIRef("https://schema.org/Course"),
        )
        self.assertEqual(len(list(graph.triples((subject, None, None)))), 38)

        # Opengraph
        self.assertTrue((
            subject,
            URIRef("http://ogp.me/ns#url"),
            Literal("http://example.com/en/very-interesting-course/"),
        ) in graph)
        self.assertTrue((subject, URIRef("http://ogp.me/ns#site_name"),
                         Literal("example.com")) in graph)
        self.assertTrue((subject, URIRef("http://ogp.me/ns#type"),
                         Literal("website")) in graph)
        self.assertTrue((subject, URIRef("http://ogp.me/ns#locale"),
                         Literal("en")) in graph)
        self.assertTrue((subject, URIRef("http://ogp.me/ns#determiner"),
                         Literal("")) in graph)
        self.assertTrue((
            subject,
            URIRef("http://ogp.me/ns#title"),
            Literal("Very interesting course"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("http://ogp.me/ns#description"),
            Literal("Introduction to interesting course"),
        ) in graph)

        (image_value, ) = graph.objects(subject,
                                        URIRef("http://ogp.me/ns#image"))
        pattern = (
            r"/media/filer_public_thumbnails/filer_public/.*cover\.jpg__"
            r"1200x630_q85_crop_replace_alpha-%23FFFFFF_subject_location")
        self.assertIsNotNone(re.search(pattern, str(image_value)))

        # Schema.org
        # - Course
        self.assertTrue((
            subject,
            URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
            URIRef("https://schema.org/Course"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/name"),
            Literal("Very interesting course"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/description"),
            Literal("Introduction to interesting course"),
        ) in graph)
        self.assertTrue((subject, URIRef("https://schema.org/courseCode"),
                         Literal("ABCDE")) in graph)
        self.assertTrue((subject,
                         URIRef("https://schema.org/isAccessibleForFree"),
                         Literal("true")) in graph)
        self.assertTrue((subject, URIRef("https://schema.org/timeRequired"),
                         Literal("PT3H")) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/stylesheet"),
            URIRef("/static/richie/css/main.css"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/shortcut"),
            URIRef("/static/richie/favicon/favicon.ico"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/icon"),
            URIRef("/static/richie/favicon/favicon.ico"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/icon"),
            URIRef("/static/richie/favicon/favicon-16x16.png"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/icon"),
            URIRef("/static/richie/favicon/favicon-32x32.png"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/apple-touch-icon"),
            URIRef("/static/richie/favicon/apple-touch-icon.png"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/mask-icon"),
            URIRef("/static/richie/favicon/safari-pinned-tab.svg"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/manifest"),
            URIRef("/static/richie/favicon/site.webmanifest"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/noreferrer"),
            URIRef("https://www.facebook.com/example"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/noopener"),
            URIRef("https://www.facebook.com/example"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/alternate"),
            URIRef("http://example.com/en/very-interesting-course/"),
        ) in graph)
        self.assertTrue((
            subject,
            URIRef("https://schema.org/alternate"),
            URIRef("http://example.com/fr/very-interesting-course/"),
        ) in graph)

        (image_value, ) = graph.objects(subject,
                                        URIRef("https://schema.org/image"))
        pattern = (
            r"/media/filer_public_thumbnails/filer_public/.*cover\.jpg__"
            r"300x170_q85_crop_replace_alpha-%23FFFFFF_subject_location")
        self.assertIsNotNone(re.search(pattern, str(image_value)))

        self.assertTrue((subject, URIRef("https://schema.org/license"),
                         URIRef(licence_content.url)) in graph)
        self.assertTrue((
            None,
            URIRef("https://schema.org/license"),
            URIRef(licence_participation.url),
        ) not in graph)
        # - Main organization (Provider)
        self.assertTrue((subject, URIRef("https://schema.org/provider"),
                         URIRef("/en/main-org/")) in graph)
        self.assertTrue((
            URIRef("/en/main-org/"),
            URIRef("https://schema.org/name"),
            Literal("Main org"),
        ) in graph)
        self.assertTrue((
            URIRef("/en/main-org/"),
            URIRef("https://schema.org/url"),
            Literal("http://example.com/en/main-org/"),
        ) in graph)

        (logo_value, ) = graph.objects(URIRef("/en/main-org/"),
                                       URIRef("https://schema.org/logo"))
        pattern = (r"/media/filer_public_thumbnails/filer_public/.*logo.jpg__"
                   r"200x113_q85_replace_alpha-%23FFFFFF_subject_location")
        self.assertIsNotNone(re.search(pattern, str(logo_value)))

        # - Organizations (Contributor)
        contributor_subjects = list(
            graph.objects(subject, URIRef("https://schema.org/contributor")))
        self.assertEqual(len(contributor_subjects), 2)

        self.assertTrue((
            contributor_subjects[0],
            URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
            URIRef("https://schema.org/CollegeOrUniversity"),
        ) in graph)
        self.assertTrue((
            contributor_subjects[1],
            URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
            URIRef("https://schema.org/CollegeOrUniversity"),
        ) in graph)

        self.assertTrue((
            URIRef("/en/main-org/"),
            URIRef("https://schema.org/name"),
            Literal("Main org"),
        ) in graph)

        self.assertTrue((
            URIRef("/en/other-org/"),
            URIRef("https://schema.org/name"),
            Literal("Other org"),
        ) in graph)

        self.assertTrue((
            URIRef("/en/main-org/"),
            URIRef("https://schema.org/url"),
            Literal("http://example.com/en/main-org/"),
        ) in graph)

        self.assertTrue((
            URIRef("/en/other-org/"),
            URIRef("https://schema.org/url"),
            Literal("http://example.com/en/other-org/"),
        ) in graph)

        pattern = (r"/media/filer_public_thumbnails/filer_public/.*logo.jpg__"
                   r"200x113_q85_replace_alpha-%23FFFFFF_subject_location")
        (logo_value, ) = graph.objects(URIRef("/en/main-org/"),
                                       URIRef("https://schema.org/logo"))
        self.assertIsNotNone(re.search(pattern, str(logo_value)))

        (logo_value, ) = graph.objects(URIRef("/en/other-org/"),
                                       URIRef("https://schema.org/logo"))
        self.assertIsNotNone(re.search(pattern, str(logo_value)))

        # - Team (Person)
        author_subjects = list(
            graph.objects(subject, URIRef("https://schema.org/author")))
        self.assertEqual(len(author_subjects), 2)

        self.assertTrue((
            author_subjects[0],
            URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
            URIRef("https://schema.org/Person"),
        ) in graph)
        self.assertTrue((
            author_subjects[1],
            URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
            URIRef("https://schema.org/Person"),
        ) in graph)

        for name in ["Fran\\xc3\\xa7ois", "Jeanne"]:
            (author_subject, ) = graph.subjects(
                URIRef("https://schema.org/name"), Literal(name))
            self.assertTrue(author_subject in author_subjects)

        (author_subject, ) = graph.subjects(
            URIRef("https://schema.org/description"),
            Literal("La bio de Fran\\xc3\\xa7ois"),
        )
        self.assertTrue(author_subject in author_subjects)

        for url in [
                "http://example.com/en/francois/",
                "http://example.com/en/jeanne/"
        ]:
            (author_subject, ) = graph.subjects(
                URIRef("https://schema.org/url"), Literal(url))
            self.assertTrue(author_subject in author_subjects)

        pattern = (
            r"/media/filer_public_thumbnails/filer_public/.*portrait.jpg__"
            r"200x200_q85_crop_replace_alpha-%23FFFFFF_subject_location")
        for author_subject in author_subjects:
            (portrait_value, ) = graph.objects(
                author_subject, URIRef("https://schema.org/image"))
            self.assertIsNotNone(re.search(pattern, str(portrait_value)))

        # - Course runs (CourseInstance)
        course_run_subjects = list(
            graph.objects(subject,
                          URIRef("https://schema.org/hasCourseInstance")))
        self.assertEqual(len(course_run_subjects), 2)

        self.assertTrue((
            course_run_subjects[0],
            URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
            URIRef("https://schema.org/CourseInstance"),
        ) in graph)
        self.assertTrue((
            course_run_subjects[0],
            URIRef("https://schema.org/courseMode"),
            Literal("online"),
        ) in graph)
        self.assertTrue((
            course_run_subjects[1],
            URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
            URIRef("https://schema.org/CourseInstance"),
        ) in graph)
        self.assertTrue((
            course_run_subjects[1],
            URIRef("https://schema.org/courseMode"),
            Literal("online"),
        ) in graph)

        for start_date in ["2030-06-01", "2030-06-30"]:
            (subject, ) = graph.subjects(
                URIRef("https://schema.org/startDate"), Literal(start_date))
            self.assertTrue(subject in course_run_subjects)

        for end_date in ["2030-07-10", "2030-08-01"]:
            (subject, ) = graph.subjects(URIRef("https://schema.org/endDate"),
                                         Literal(end_date))
            self.assertTrue(subject in course_run_subjects)
Exemple #22
0
				output_processor_graph 	= True
			elif a == "processor,default" or a == "default,processor" :
				output_processor_graph 	= True
			elif a == "default" :				
				output_default_graph 	= True
				output_processor_graph 	= False			
		else :
			usage()
			sys.exit(1)
except :
	usage()
	sys.exit(1)

options = Options(output_default_graph = output_default_graph,
				  output_processor_graph = output_processor_graph,
				  space_preserve=space_preserve,
				  transformers = extras,
				  embedded_rdf = embedded_rdf,
				  vocab_expansion = vocab_expansion,
				  vocab_cache = vocab_cache,
				  vocab_cache_report = vocab_cache_report,
				  refresh_vocab_cache = refresh_vocab_cache
)

processor = pyRdfa(options, base)
if len(value) >= 1 :
	print processor.rdf_from_sources(value, outputFormat = format, rdfOutput = rdfOutput)
else :
	print processor.rdf_from_source(sys.stdin, outputFormat = format, rdfOutput = rdfOutput)

Exemple #23
0
 def test_url(self):
     g = pyRdfa().rdf_from_source(
         'http://oreilly.com/catalog/9780596516499/')
     self.assert_(self.target1.encode('utf-8') in g)