def loadTerms(): global LOADEDTERMS if not LOADEDTERMS: LOADEDTERMS = True print("Loading triples files") SdoTermSource.loadSourceGraph("default") print ("loaded %s triples - %s terms" % (len(SdoTermSource.sourceGraph()),len(SdoTermSource.getAllTerms())) )
def exportrdf(exportType): global allGraph, currentGraph if not allGraph: allGraph = rdflib.Graph() allGraph.bind("schema",VOCABURI) currentGraph = rdflib.Graph() currentGraph.bind("schema",VOCABURI) allGraph += SdoTermSource.sourceGraph() protocol, altprotocol = protocols() deloddtriples = """DELETE {?s ?p ?o} WHERE { ?s ?p ?o. FILTER (! strstarts(str(?s), "%s://schema.org") ). }""" % (protocol) allGraph.update(deloddtriples) currentGraph += allGraph desuperseded="""PREFIX schema: <%s://schema.org/> DELETE {?s ?p ?o} WHERE{ ?s ?p ?o; schema:supersededBy ?sup. }""" % (protocol) #Currenty superseded terms are not suppressed from 'current' file dumps #Whereas they are suppressed from the UI #currentGraph.update(desuperseded) delattic="""PREFIX schema: <%s://schema.org/> DELETE {?s ?p ?o} WHERE{ ?s ?p ?o; schema:isPartOf <%s://attic.schema.org>. }""" % (protocol,protocol) currentGraph.update(delattic) formats = ["json-ld", "turtle", "nt", "nquads", "rdf"] extype = exportType[len("RDFExport."):] if exportType == "RDFExports": for format in sorted(formats): _exportrdf(format,allGraph,currentGraph) elif extype in formats: _exportrdf(extype,allGraph,currentGraph) else: raise Exception("Unknown export format: %s" % exportType)
TRIPLESFILESGLOB = ["data/*.ttl", "data/ext/*/*.ttl"] EXAMPLESFILESGLOB = ["data/*examples.txt", "data/ext/*/*examples.txt"] schema_path = './data/schema.ttl' examples_path = './data/examples.txt' andstr = "\n AND\n " TYPECOUNT_UPPERBOUND = 1500 TYPECOUNT_LOWERBOUND = 500 logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) SdoTermSource.loadSourceGraph("default") print("loaded %s triples - %s terms" % (len(SdoTermSource.sourceGraph()), len(SdoTermSource.getAllTerms()))) print("Loading examples files") SchemaExamples.loadExamplesFiles("default") print("Loaded %d examples" % SchemaExamples.count()) # Tests to probe the health of both schemas and code. # Note that known failings can be annotated with @unittest.expectedFailure or @skip("reason...") class BallparkCountTests(unittest.TestCase): def test_alltypes(self): # ballpark estimates. self.assertTrue( len(SdoTermSource.getAllTypes()) > TYPECOUNT_LOWERBOUND,
def loadGraph(self): self.list(SdoTermSource.sourceGraph())
def loadGraphs(self): SdoTermSource.loadSourceGraph("default") self.rdflib_data = SdoTermSource.sourceGraph()
def createcontext(): """Generates a basic JSON-LD context file for schema.org.""" SCHEMAURI = "http://schema.org/" jsonldcontext = [] jsonldcontext.append("{\n \"@context\": {\n") jsonldcontext.append(" \"type\": \"@type\",\n") jsonldcontext.append(" \"id\": \"@id\",\n") jsonldcontext.append(" \"HTML\": { \"@id\": \"rdf:HTML\" },\n") #jsonldcontext.append(" \"@vocab\": \"%s\",\n" % SdoTermSource.vocabUri()) jsonldcontext.append(" \"@vocab\": \"%s\",\n" % SCHEMAURI) ns = SdoTermSource.sourceGraph().namespaces() done = [] for n in ns: for n in ns: pref, pth = n pref = str(pref) if not pref in done: done.append(pref) if pref == "schema": pth = SCHEMAURI #Overide vocab setting to maintain http compatability jsonldcontext.append(" \"%s\": \"%s\",\n" % (pref, pth)) datatypepre = "schema:" vocablines = "" externalines = "" typins = "" for t in SdoTermSource.getAllTerms(expanded=True, supressSourceLinks=True): if t.termType == SdoTerm.PROPERTY: range = t.rangeIncludes types = [] #If Text in range don't output a @type value if not "Text" in range: if "URL" in range: types.append("@id") if "Date" in range: types.append("Date") if "Datetime" in range: types.append("DateTime") typins = "" for typ in types: typins += ", \"@type\": \"" + typ + "\"" line = " \"" + t.id + "\": { \"@id\": \"" + prefixedIdFromUri( t.uri) + "\"" + typins + "}," elif t.termType == SdoTerm.REFERENCE: continue else: line = " \"" + t.id + "\": {\"@id\": \"" + prefixedIdFromUri( t.uri) + "\"}," if t.id.startswith("http:") or t.id.startswith("https:"): externalines += line else: vocablines += line jsonldcontext.append(vocablines) #jsonldcontext.append(externalines) jsonldcontext.append("}}\n") ret = "".join(jsonldcontext) ret = ret.replace("},}}", "}\n }\n}") ret = ret.replace("},", "},\n") return ret