def buildequivs(format):
    s_p = "http://schema.org/"
    s_s = "https://schema.org/"
    outGraph = rdflib.Graph()
    outGraph.bind("schema_p", s_p)
    outGraph.bind("schema_s", s_s)
    outGraph.bind("owl", OWL)

    for t in SdoTermSource.getAllTerms(expanded=True):
        if not t.retired:  #drops non-schema terms and those in attic
            eqiv = OWL.equivalentClass
            if t.termType == SdoTerm.PROPERTY:
                eqiv = OWL.equivalentProperty

            p = URIRef(s_p + t.id)
            s = URIRef(s_s + t.id)
            outGraph.add((p, eqiv, s))
            outGraph.add((s, eqiv, p))
            #log.info("%s " % t.uri)

    for ftype in exts:
        if format != "all" and format != ftype:
            continue
        ext = exts[ftype]
        kwargs = {'sort_keys': True}
        format = ftype
        if format == "rdf":
            format = "pretty-xml"
        return outGraph.serialize(format=format, auto_compact=True, **kwargs)
Beispiel #2
0
def sitemap(page):
    node = """ <url>
   <loc>https://schema.org/%s</loc>
   <lastmod>%s</lastmod>
 </url>
"""
    STATICPAGES = [
        "docs/schemas.html", "docs/full.html", "docs/gs.html",
        "docs/about.html", "docs/howwework.html", "docs/releases.html",
        "docs/faq.html", "docs/datamodel.html", "docs/developers.html",
        "docs/extension.html", "docs/meddocs.html", "docs/hotels.html"
    ]

    output = []
    output.append("""<?xml version="1.0" encoding="utf-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
""")
    terms = SdoTermSource.getAllTerms(supressSourceLinks=True)
    ver = getVersionDate(getVersion())
    for term in terms:
        if not (term.startswith("http://") or term.startswith("https://")):
            output.append(node % (term, ver))
    for term in STATICPAGES:
        output.append(node % (term, ver))
    output.append("</urlset>\n")
    return "".join(output)
Beispiel #3
0
def exportcsv(page):
    protocol, altprotocol = protocols()

    typeFields = [
        "id", "label", "comment", "subTypeOf", "enumerationtype",
        "equivalentClass", "properties", "subTypes", "supersedes",
        "supersededBy", "isPartOf"
    ]
    propFields = [
        "id", "label", "comment", "subPropertyOf", "equivalentProperty",
        "subproperties", "domainIncludes", "rangeIncludes", "inverseOf",
        "supersedes", "supersededBy", "isPartOf"
    ]
    typedata = []
    typedataAll = []
    propdata = []
    propdataAll = []
    terms = SdoTermSource.getAllTerms(expanded=True, supressSourceLinks=True)
    for term in terms:
        if term.termType == SdoTerm.REFERENCE or term.id.startswith(
                "http://") or term.id.startswith("https://"):
            continue
        row = {}
        row["id"] = term.uri
        row["label"] = term.label
        row["comment"] = term.comment
        row["supersedes"] = uriwrap(term.supersedes)
        row["supersededBy"] = uriwrap(term.supersededBy)
        #row["isPartOf"] = term.isPartOf
        row["isPartOf"] = ""
        if term.termType == SdoTerm.PROPERTY:
            row["subPropertyOf"] = uriwrap(term.supers)
            row["equivalentProperty"] = array2str(term.equivalents)
            row["subproperties"] = uriwrap(term.subs)
            row["domainIncludes"] = uriwrap(term.domainIncludes)
            row["rangeIncludes"] = uriwrap(term.rangeIncludes)
            row["inverseOf"] = uriwrap(term.inverse)
            propdataAll.append(row)
            if not term.retired:
                propdata.append(row)
        else:
            row["subTypeOf"] = uriwrap(term.supers)
            if term.termType == SdoTerm.ENUMERATIONVALUE:
                row["enumerationtype"] = uriwrap(term.enumerationParent)
            else:
                row["properties"] = uriwrap(term.allproperties)
            row["equivalentClass"] = array2str(term.equivalents)
            row["subTypes"] = uriwrap(term.subs)
            typedataAll.append(row)
            if not term.retired:
                typedata.append(row)

    writecsvout("properties", propdata, propFields, "current", protocol,
                altprotocol)
    writecsvout("properties", propdataAll, propFields, "all", protocol,
                altprotocol)
    writecsvout("types", typedata, typeFields, "current", protocol,
                altprotocol)
    writecsvout("types", typedataAll, typeFields, "all", protocol, altprotocol)
Beispiel #4
0
def loadTerms():
    global LOADEDTERMS
    if not LOADEDTERMS:
        LOADEDTERMS = True
        print("Loading triples files")
        SdoTermSource.loadSourceGraph("default")
        print("loaded %s triples - %s terms" % (len(
            SdoTermSource.sourceGraph()), len(SdoTermSource.getAllTerms())))
Beispiel #5
0
def homePage(page):
    global STRCLASSVAL
    title = SITENAME
    template = "docs/Home.j2"
    filt = None
    overrideclassval = None
    if page == "PendingHome":
        title += " - Pending"
        template = "docs/PendingHome.j2"
        filt = "pending"
        overrideclassval = 'class="ext ext-pending"'
    elif page == "AtticHome":
        title += " - Retired"
        template = "docs/AtticHome.j2"
        filt = "attic"
        overrideclassval = 'class="ext ext-attic"'
    sectionterms = {}
    termcount = 0
    if filt:
        terms = SdoTermSource.getAllTerms(layer=filt, expanded=True)
        terms.sort(key=lambda u: (u.category, u.id))
        first = True
        cat = None
        for t in terms:
            if first or t.category != cat:
                first = False
                cat = t.category
                ttypes = {}
                sectionterms[cat] = ttypes
                ttypes[SdoTerm.TYPE] = []
                ttypes[SdoTerm.PROPERTY] = []
                ttypes[SdoTerm.DATATYPE] = []
                ttypes[SdoTerm.ENUMERATION] = []
                ttypes[SdoTerm.ENUMERATIONVALUE] = []
            if t.termType == SdoTerm.REFERENCE:
                continue
            ttypes[t.termType].append(t)
            termcount += 1

    extra_vars = {
        'home_page': "True",
        'title': SITENAME,
        'termcount': termcount,
        'sectionterms': sectionterms
    }
    STRCLASSVAL = overrideclassval
    ret = docsTemplateRender(template, extra_vars)
    STRCLASSVAL = None
    return ret
Beispiel #6
0
def buildTerms(terms):
    all = ["ALL", "All", "all"]
    for a in all:
        if a in terms:
            terms = SdoTermSource.getAllTerms(supressSourceLinks=True)
            break
    import time, datetime
    start = datetime.datetime.now()
    lastCount = 0
    if len(terms):
        print("\nBuilding term pages...\n")
    for t in terms:
        tic = datetime.datetime.now()  #diagnostics
        term = SdoTermSource.getTerm(t, expanded=True)
        if not term:
            print("No such term: %s\n" % t)
            continue

        if term.termType == SdoTerm.REFERENCE:  #Don't create pages for reference types
            continue
        examples = SchemaExamples.examplesForTerm(term.id)
        pageout = termtemplateRender(term, examples)
        f = open(termFileName(term.id), "w")
        f.write(pageout)
        f.close()

        #diagnostics ##########################################
        termsofar = len(SdoTermSource.termCache())  #diagnostics
        termscreated = termsofar - lastCount  #diagnostics
        lastCount = termsofar  #diagnostics
        print("Term: %s (%d) - %s" %
              (t, termscreated,
               str(datetime.datetime.now() - tic)))  #diagnostics
        #      Note: (%d) = number of individual newly created (not cached) term definitions to
        #            build this expanded definition. ie. All Properties associated with a Type, etc.

    if len(terms):
        print()
        print("All terms took %s seconds" %
              str(datetime.datetime.now() - start))  #diagnostics
Beispiel #7
0
TRIPLESFILESGLOB = ["data/*.ttl", "data/ext/*/*.ttl"]
EXAMPLESFILESGLOB = ["data/*examples.txt", "data/ext/*/*examples.txt"]

schema_path = './data/schema.ttl'
examples_path = './data/examples.txt'

andstr = "\n AND\n  "
TYPECOUNT_UPPERBOUND = 1500
TYPECOUNT_LOWERBOUND = 500

logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)

SdoTermSource.loadSourceGraph("default")
print("loaded %s triples - %s terms" %
      (len(SdoTermSource.sourceGraph()), len(SdoTermSource.getAllTerms())))

print("Loading examples files")
SchemaExamples.loadExamplesFiles("default")
print("Loaded %d examples" % SchemaExamples.count())

# Tests to probe the health of both schemas and code.
# Note that known failings can be annotated with @unittest.expectedFailure or @skip("reason...")


class BallparkCountTests(unittest.TestCase):
    def test_alltypes(self):

        # ballpark estimates.
        self.assertTrue(
            len(SdoTermSource.getAllTypes()) > TYPECOUNT_LOWERBOUND,
Beispiel #8
0
def homePage(page):
    global STRCLASSVAL
    title = SITENAME
    template = "docs/Home.j2"
    filt = None
    overrideclassval = None
    if page == "PendingHome":
        title += " - Pending"
        template = "docs/PendingHome.j2"
        filt = "pending"
        overrideclassval = 'class="ext ext-pending"'
    elif page == "AtticHome":
        title += " - Retired"
        template = "docs/AtticHome.j2"
        filt = "attic"
        overrideclassval = 'class="ext ext-attic"'
    elif page == "AutoHome":
        title += " - Auto Section"
        template = "docs/AutoHome.j2"
        filt = "auto"
        overrideclassval = 'class="ext"'
    elif page == "BibHome":
        title += " - Bib Section"
        template = "docs/BibHome.j2"
        filt = "bib"
        overrideclassval = 'class="ext"'
    elif page == "Health-lifesciHome":
        title += " - Health-lifesci Section"
        template = "docs/Health-lifesciHome.j2"
        filt = "health-lifesci"
        overrideclassval = 'class="ext"'
    elif page == "MetaHome":
        title += " - Meta"
        template = "docs/MetaHome.j2"
        filt = "meta"
        overrideclassval = 'class="ext"'
    sectionterms = {}
    termcount = 0
    if filt:
        terms = SdoTermSource.getAllTerms(layer=filt, expanded=True)
        for t in terms:
            t.cat = ""
            if filt == "pending":
                for s in t.sources:
                    if "schemaorg/issue" in s:
                        t.cat = "issue-" + os.path.basename(s)
                        break
        terms.sort(key=lambda u: (u.cat, u.id))

        first = True
        cat = None
        for t in terms:
            if first or t.cat != cat:
                first = False
                cat = t.cat
                ttypes = {}
                sectionterms[cat] = ttypes
                ttypes[SdoTerm.TYPE] = []
                ttypes[SdoTerm.PROPERTY] = []
                ttypes[SdoTerm.DATATYPE] = []
                ttypes[SdoTerm.ENUMERATION] = []
                ttypes[SdoTerm.ENUMERATIONVALUE] = []
            if t.termType == SdoTerm.REFERENCE:
                continue
            ttypes[t.termType].append(t)
            termcount += 1

    sectionterms = dict(sorted(sectionterms.items()))

    extra_vars = {
        'home_page': "True",
        'title': SITENAME,
        'termcount': termcount,
        'sectionterms': sectionterms
    }
    STRCLASSVAL = overrideclassval
    ret = docsTemplateRender(template, extra_vars)
    STRCLASSVAL = None
    return ret
    default="all",
    choices=['xml', 'rdf', 'nquads', 'nt', 'json-ld', 'turtle', 'csv'])
parser.add_argument("-o", "--output", required=True, help="output file")
args = parser.parse_args()

exts = {"rdf": ".rdf", "nt": ".nt", "json-ld": ".jsonld", "turtle": ".ttl"}

from rdflib.namespace import OWL
s_p = "http://schema.org/"
s_s = "https://schema.org/"
outGraph = rdflib.Graph()
outGraph.bind("schema_p", s_p)
outGraph.bind("schema_s", s_s)
outGraph.bind("owl", OWL)

for t in SdoTermSource.getAllTerms(expanded=True):
    if not t.retired:  #drops non-schema terms and those in attic
        eqiv = OWL.equivalentClass
        if t.termType == SdoTerm.PROPERTY:
            eqiv = OWL.equivalentProperty

        p = URIRef(s_p + t.id)
        s = URIRef(s_s + t.id)
        outGraph.add((p, eqiv, s))
        outGraph.add((s, eqiv, p))
        #log.info("%s " % t.uri)

for ftype in exts:
    if args.format != "all" and args.format != ftype:
        continue
    ext = exts[ftype]
Beispiel #10
0
def createcontext():
    """Generates a basic JSON-LD context file for schema.org."""

    SCHEMAURI = "http://schema.org/"

    jsonldcontext = []
    jsonldcontext.append("{\n  \"@context\": {\n")
    jsonldcontext.append("        \"type\": \"@type\",\n")
    jsonldcontext.append("        \"id\": \"@id\",\n")
    jsonldcontext.append("        \"HTML\": { \"@id\": \"rdf:HTML\" },\n")
    #jsonldcontext.append("        \"@vocab\": \"%s\",\n" % SdoTermSource.vocabUri())
    jsonldcontext.append("        \"@vocab\": \"%s\",\n" % SCHEMAURI)
    ns = SdoTermSource.sourceGraph().namespaces()
    done = []
    for n in ns:
        for n in ns:
            pref, pth = n
            pref = str(pref)
            if not pref in done:
                done.append(pref)
                if pref == "schema":
                    pth = SCHEMAURI  #Overide vocab setting to maintain http compatability

                jsonldcontext.append("        \"%s\": \"%s\",\n" % (pref, pth))

    datatypepre = "schema:"
    vocablines = ""
    externalines = ""
    typins = ""
    for t in SdoTermSource.getAllTerms(expanded=True, supressSourceLinks=True):
        if t.termType == SdoTerm.PROPERTY:
            range = t.rangeIncludes

            types = []

            #If Text in range don't output a @type value
            if not "Text" in range:
                if "URL" in range:
                    types.append("@id")
                if "Date" in range:
                    types.append("Date")
                if "Datetime" in range:
                    types.append("DateTime")

            typins = ""
            for typ in types:
                typins += ", \"@type\": \"" + typ + "\""

            line = "        \"" + t.id + "\": { \"@id\": \"" + prefixedIdFromUri(
                t.uri) + "\"" + typins + "},"
        elif t.termType == SdoTerm.REFERENCE:
            continue
        else:
            line = "        \"" + t.id + "\": {\"@id\": \"" + prefixedIdFromUri(
                t.uri) + "\"},"

        if t.id.startswith("http:") or t.id.startswith("https:"):
            externalines += line
        else:
            vocablines += line

    jsonldcontext.append(vocablines)
    #jsonldcontext.append(externalines)
    jsonldcontext.append("}}\n")
    ret = "".join(jsonldcontext)
    ret = ret.replace("},}}", "}\n    }\n}")
    ret = ret.replace("},", "},\n")
    return ret