Ejemplo n.º 1
0
def loadTerms():
    global LOADEDTERMS
    if not LOADEDTERMS:
        LOADEDTERMS = True
        print("Loading triples files")
        SdoTermSource.loadSourceGraph("default")
        print ("loaded %s triples - %s terms" % (len(SdoTermSource.sourceGraph()),len(SdoTermSource.getAllTerms())) )
Ejemplo n.º 2
0
def exportrdf(exportType):
    global allGraph, currentGraph
    
    if not allGraph:
        allGraph = rdflib.Graph()
        allGraph.bind("schema",VOCABURI)
        currentGraph = rdflib.Graph()
        currentGraph.bind("schema",VOCABURI)

        allGraph += SdoTermSource.sourceGraph()

        protocol, altprotocol = protocols()

        deloddtriples = """DELETE {?s ?p ?o}
            WHERE {
                ?s ?p ?o.
                FILTER (! strstarts(str(?s), "%s://schema.org") ).
            }""" % (protocol)
        allGraph.update(deloddtriples)
        currentGraph += allGraph
    
    
        desuperseded="""PREFIX schema: <%s://schema.org/>
        DELETE {?s ?p ?o}
        WHERE{
            ?s ?p ?o;
                schema:supersededBy ?sup.
        }""" % (protocol)
        #Currenty superseded terms are not suppressed from 'current' file dumps
        #Whereas they are suppressed from the UI
        #currentGraph.update(desuperseded)
 
        delattic="""PREFIX schema: <%s://schema.org/>
        DELETE {?s ?p ?o}
        WHERE{
            ?s ?p ?o;
                schema:isPartOf <%s://attic.schema.org>.
        }""" % (protocol,protocol)
        currentGraph.update(delattic)
 
    formats =  ["json-ld", "turtle", "nt", "nquads", "rdf"]
    extype = exportType[len("RDFExport."):]
    if exportType == "RDFExports":
        for format in sorted(formats):
            _exportrdf(format,allGraph,currentGraph)
    elif extype in formats:
        _exportrdf(extype,allGraph,currentGraph)
    else:
        raise Exception("Unknown export format: %s" % exportType)
Ejemplo n.º 3
0
TRIPLESFILESGLOB = ["data/*.ttl", "data/ext/*/*.ttl"]
EXAMPLESFILESGLOB = ["data/*examples.txt", "data/ext/*/*examples.txt"]

schema_path = './data/schema.ttl'
examples_path = './data/examples.txt'

andstr = "\n AND\n  "
TYPECOUNT_UPPERBOUND = 1500
TYPECOUNT_LOWERBOUND = 500

logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)

SdoTermSource.loadSourceGraph("default")
print("loaded %s triples - %s terms" %
      (len(SdoTermSource.sourceGraph()), len(SdoTermSource.getAllTerms())))

print("Loading examples files")
SchemaExamples.loadExamplesFiles("default")
print("Loaded %d examples" % SchemaExamples.count())

# Tests to probe the health of both schemas and code.
# Note that known failings can be annotated with @unittest.expectedFailure or @skip("reason...")


class BallparkCountTests(unittest.TestCase):
    def test_alltypes(self):

        # ballpark estimates.
        self.assertTrue(
            len(SdoTermSource.getAllTypes()) > TYPECOUNT_LOWERBOUND,
Ejemplo n.º 4
0
 def loadGraph(self):
     self.list(SdoTermSource.sourceGraph())
Ejemplo n.º 5
0
 def loadGraphs(self):
     SdoTermSource.loadSourceGraph("default")
     self.rdflib_data = SdoTermSource.sourceGraph()
Ejemplo n.º 6
0
def createcontext():
    """Generates a basic JSON-LD context file for schema.org."""

    SCHEMAURI = "http://schema.org/"

    jsonldcontext = []
    jsonldcontext.append("{\n  \"@context\": {\n")
    jsonldcontext.append("        \"type\": \"@type\",\n")
    jsonldcontext.append("        \"id\": \"@id\",\n")
    jsonldcontext.append("        \"HTML\": { \"@id\": \"rdf:HTML\" },\n")
    #jsonldcontext.append("        \"@vocab\": \"%s\",\n" % SdoTermSource.vocabUri())
    jsonldcontext.append("        \"@vocab\": \"%s\",\n" % SCHEMAURI)
    ns = SdoTermSource.sourceGraph().namespaces()
    done = []
    for n in ns:
        for n in ns:
            pref, pth = n
            pref = str(pref)
            if not pref in done:
                done.append(pref)
                if pref == "schema":
                    pth = SCHEMAURI  #Overide vocab setting to maintain http compatability

                jsonldcontext.append("        \"%s\": \"%s\",\n" % (pref, pth))

    datatypepre = "schema:"
    vocablines = ""
    externalines = ""
    typins = ""
    for t in SdoTermSource.getAllTerms(expanded=True, supressSourceLinks=True):
        if t.termType == SdoTerm.PROPERTY:
            range = t.rangeIncludes

            types = []

            #If Text in range don't output a @type value
            if not "Text" in range:
                if "URL" in range:
                    types.append("@id")
                if "Date" in range:
                    types.append("Date")
                if "Datetime" in range:
                    types.append("DateTime")

            typins = ""
            for typ in types:
                typins += ", \"@type\": \"" + typ + "\""

            line = "        \"" + t.id + "\": { \"@id\": \"" + prefixedIdFromUri(
                t.uri) + "\"" + typins + "},"
        elif t.termType == SdoTerm.REFERENCE:
            continue
        else:
            line = "        \"" + t.id + "\": {\"@id\": \"" + prefixedIdFromUri(
                t.uri) + "\"},"

        if t.id.startswith("http:") or t.id.startswith("https:"):
            externalines += line
        else:
            vocablines += line

    jsonldcontext.append(vocablines)
    #jsonldcontext.append(externalines)
    jsonldcontext.append("}}\n")
    ret = "".join(jsonldcontext)
    ret = ret.replace("},}}", "}\n    }\n}")
    ret = ret.replace("},", "},\n")
    return ret