コード例 #1
0
ファイル: cns.py プロジェクト: qingyouzhuying/cnschema
    def _copy_data(self, v, plist=PLIST_BASIC, simplify=False):
        ret = {}
        for p in plist:
            if not p in v:
                continue

            if simplify:
                pX = os.path.basename(p)
            else:
                pX = p

            if p in PLIST_DOMAIN_RANGE:
                ret[pX] = []
                for rX in json_get_list(v, p):
                    r = self.map_id_schemaorg.get(rX["@id"])
                    ret[pX].append(self._copy_data(r, PLIST_REF))
                if ret[pX]:
                    ret[pX][-1]["_last"] = True
            elif p in PLIST_OBJ:
                r = self.map_id_schemaorg.get(v[p]["@id"])
                ret[pX] = self._copy_data(r, PLIST_REF)
            else:
                ret[pX] = v[p]

        #logging.info(ret.keys())
        return ret
コード例 #2
0
ファイル: cns.py プロジェクト: qingyouzhuying/cnschema
    def convert_extend2mustach(self, node):
        xid = node["@id"]
        entry = self._copy_data(node, PLIST_PROP, simplify=True)

        entry["_node_label"] = node["rdfs:label"]
        entry["_group_{}".format(node["_group"])] = True

        #source
        p = "http://purl.org/dc/terms/source"
        sourceList = json_get_list(node, p)
        for sourceRef in sourceList:
            if not type(sourceRef) == dict:
                logging.error(sourceList)
                continue

            #logging.info(node[p])
            source = self.map_id_schemaorg.get(sourceRef["@id"])
            if source:
                temp = source
                json_append(entry, "_sourceAck", temp)
            else:
                temp = {}
                temp["@id"] = sourceRef["@id"]
                temp["rdfs:label"] = sourceRef["@id"]
                temp["rdfs:comment"] = '<a href="{}">{}</a>'.format(
                    sourceRef["@id"], sourceRef["@id"])
                json_append(entry, "_source", temp)

        if node["_group"] == "property":
            result = []
            self._gen_path("_super", node, [node], result)

            rootPath = [
                self._copy_data(
                    self.map_id_schemaorg["http://cnschema.org/Thing"],
                    PLIST_REF), {
                        "rdfs:label": "Property",
                        "@id": "http://meta.cnschema.org/Property"
                    }
            ]
            entry["_paths"] = []
            for onePath in result:
                temp = []
                temp.extend(rootPath)
                temp.extend(onePath["_path"])
                temp[-1]["_lastone"] = True
                entry["_paths"].append({"_path": temp})

        if node["_group"] == "other":
            result = []
            typeNode = self.map_id_schemaorg.get(node["@type"])
            self._gen_path("_super", typeNode, [typeNode], result)
            entry["_paths"] = result
            entry["_is_instance"] = True

        if node["_group"] == "type":
            #path
            result = []
            self._gen_path("_super", node, [node], result)
            #logging.info(result)
            entry["_paths"] = result

            #domain
            p = "http://cnschema.org/domainIncludes"
            p = "isDomainOf"
            pX = os.path.basename(p)
            entry["_pTree"] = []
            seedList = [xid]
            while seedList:
                newSeedList = []
                for seedId in seedList:
                    seed = self.map_id_schemaorg.get(seedId)

                    treeItem = self._copy_data(seed)
                    treeItem["_properties"] = []

                    for v in sorted(seed.get(p, []), key=lambda x: x["@id"]):
                        if "http://cnschema.org/supersededBy" in v:
                            continue

                        prop = self._copy_data(v,
                                               plist=PLIST_PROP,
                                               simplify=True)

                        # if v["@id"] == "http://cnschema.org/offeredBy":
                        #     logging.info(v.keys())
                        #     logging.info(prop.keys())
                        #     exit()

                        #logging.info(prop)
                        #exit()
                        treeItem["_properties"].append(prop)
                        #break #TODO

                    if treeItem["_properties"]:
                        entry["_pTree"].append(treeItem)

                    newSeedList.extend(seed.get("_super", []))
                    #logging.info(v)
                seedList = newSeedList
            if entry["_pTree"]:
                entry["_pTree"][-1]["_last"] = True

            #range
            p = "http://cnschema.org/rangeIncludes"
            p = "isRangeOf"
            pX = "_pRange"
            if node.get(p, []):
                entry[pX] = []
                for v in sorted(node.get(p, []), key=lambda x: x["@id"]):
                    prop = self._copy_data(v, plist=PLIST_PROP, simplify=True)
                    entry[pX].append(prop)

        #super and sub
        for p in ["_sub", "_super"]:
            if node.get(p, []):
                entry[p] = []
                for v in node.get(p, []):
                    relNode = self.map_id_schemaorg.get(v)
                    entry[p].append(self._copy_data(relNode, PLIST_REF))

        entry["_sitename"] = self.site  #"schema.org" # self.site

        entry["_version"] = self.version
        entry["_url_root"] = "."
        entry["_url_schema"] = "http://{}".format(
            self.site)  # "http://schema.org"

        #entry["_examples"] = []
        for k, v in entry.items():
            if type(v) == list and v and type(
                    v[0]) in [dict, collections.defaultdict]:
                v[-1]["_last"] = True
            #logging.info(k)
            #logging.info(type(v))
            #logging.info(type(v[0]))
        #exit()

        return entry
コード例 #3
0
ファイル: schemaorg.py プロジェクト: zxlzr/cnschema
    def _init_schema(self):
        # the main json-ld
        self.map_id_node = {}
        filename = "schema.jsonld"
        url = '{}/data/releases/{}/{}'.format(self.url_base, self.version,
                                              filename)
        #logging.info(url)
        r = requests.get(url)
        data_jsonld = json.loads(r.content)
        logging.info(len(data_jsonld))

        for node in data_jsonld["@graph"]:
            if "schema.org" not in node["@id"]:
                logging.debug(node["@id"])
                # node.get("@type")
                pass

            type_list = node.get("@type", [])
            if not type(type_list) == list:
                type_list = [type_list]
            else:
                node["@type"] = sorted(type_list)
            node["xtype"] = ','.join(type_list)

        #first pass
        for node in data_jsonld["@graph"]:
            xid = node["@id"]
            self.map_id_node[xid] = node

            #group
            xtypeList = json_get_list(node, "@type")
            if "rdfs:Class" in xtypeList:
                node["_group"] = "type"
            elif "rdf:Property" in xtypeList:
                node["_group"] = "property"
            else:
                node["_group"] = "other"

            node["_layer"] = "core"

            #nameCount
            usage_id = self.map_id_stat2015.get(node.get("rdfs:label"))
            node["_usage"] = get_usage_str(usage_id)

            examples = self.map_id_examples.get(node.get("rdfs:label"))
            if examples:
                node["_examples"] = examples

        #second pass
        for node in data_jsonld["@graph"]:
            xid = node["@id"]

            # instances
            for xtype in json_get_list(node, "@type"):
                the_node = self.map_id_node.get(xtype)
                if the_node:
                    json_append(the_node, "_instances",
                                self._copy_node(node, PLIST_REF))
                    the_node["_instances"] = sorted(
                        the_node["_instances"], key=lambda x: x["rdfs:label"])
            # subclass relation
            for p in ["rdfs:subClassOf", "rdfs:subPropertyOf"]:
                for v in json_get_list(node, p):
                    node_id = v["@id"]
                    if node_id not in self.map_id_node:
                        continue

                    the_node = self.map_id_node[node_id]
                    json_append(node, "_super", node_id)
                    json_append(the_node, "_sub", xid)

            #domain range
            if node["_group"] == "property":
                for p in PLIST_DOMAIN_RANGE:
                    for the_node in json_get_list(node, p):
                        refxid = the_node["@id"]
                        pX = INVERSE_DOMAIN_RANGE[p]
                        json_append(self.map_id_node[refxid], pX, node)
                        self.map_id_node[refxid][pX] = sorted(
                            self.map_id_node[refxid][pX],
                            key=lambda x: x["rdfs:label"])

            # http://schema.org/supersededBy
            p = "http://schema.org/supersededBy"
            if p in node:
                node_id = node[p]["@id"]
                the_node = self.map_id_node[node_id]
                the_node["_supersede"] = self._copy_node(node, PLIST_REF)