def _copy_data(self, v, plist=PLIST_BASIC, simplify=False): ret = {} for p in plist: if not p in v: continue if simplify: pX = os.path.basename(p) else: pX = p if p in PLIST_DOMAIN_RANGE: ret[pX] = [] for rX in json_get_list(v, p): r = self.map_id_schemaorg.get(rX["@id"]) ret[pX].append(self._copy_data(r, PLIST_REF)) if ret[pX]: ret[pX][-1]["_last"] = True elif p in PLIST_OBJ: r = self.map_id_schemaorg.get(v[p]["@id"]) ret[pX] = self._copy_data(r, PLIST_REF) else: ret[pX] = v[p] #logging.info(ret.keys()) return ret
def convert_extend2mustach(self, node): xid = node["@id"] entry = self._copy_data(node, PLIST_PROP, simplify=True) entry["_node_label"] = node["rdfs:label"] entry["_group_{}".format(node["_group"])] = True #source p = "http://purl.org/dc/terms/source" sourceList = json_get_list(node, p) for sourceRef in sourceList: if not type(sourceRef) == dict: logging.error(sourceList) continue #logging.info(node[p]) source = self.map_id_schemaorg.get(sourceRef["@id"]) if source: temp = source json_append(entry, "_sourceAck", temp) else: temp = {} temp["@id"] = sourceRef["@id"] temp["rdfs:label"] = sourceRef["@id"] temp["rdfs:comment"] = '<a href="{}">{}</a>'.format( sourceRef["@id"], sourceRef["@id"]) json_append(entry, "_source", temp) if node["_group"] == "property": result = [] self._gen_path("_super", node, [node], result) rootPath = [ self._copy_data( self.map_id_schemaorg["http://cnschema.org/Thing"], PLIST_REF), { "rdfs:label": "Property", "@id": "http://meta.cnschema.org/Property" } ] entry["_paths"] = [] for onePath in result: temp = [] temp.extend(rootPath) temp.extend(onePath["_path"]) temp[-1]["_lastone"] = True entry["_paths"].append({"_path": temp}) if node["_group"] == "other": result = [] typeNode = self.map_id_schemaorg.get(node["@type"]) self._gen_path("_super", typeNode, [typeNode], result) entry["_paths"] = result entry["_is_instance"] = True if node["_group"] == "type": #path result = [] self._gen_path("_super", node, [node], result) #logging.info(result) entry["_paths"] = result #domain p = "http://cnschema.org/domainIncludes" p = "isDomainOf" pX = os.path.basename(p) entry["_pTree"] = [] seedList = [xid] while seedList: newSeedList = [] for seedId in seedList: seed = self.map_id_schemaorg.get(seedId) treeItem = self._copy_data(seed) treeItem["_properties"] = [] for v in sorted(seed.get(p, []), key=lambda x: x["@id"]): if "http://cnschema.org/supersededBy" in v: continue prop = self._copy_data(v, plist=PLIST_PROP, simplify=True) # if v["@id"] == "http://cnschema.org/offeredBy": # logging.info(v.keys()) # logging.info(prop.keys()) # exit() #logging.info(prop) #exit() treeItem["_properties"].append(prop) #break #TODO if treeItem["_properties"]: entry["_pTree"].append(treeItem) newSeedList.extend(seed.get("_super", [])) #logging.info(v) seedList = newSeedList if entry["_pTree"]: entry["_pTree"][-1]["_last"] = True #range p = "http://cnschema.org/rangeIncludes" p = "isRangeOf" pX = "_pRange" if node.get(p, []): entry[pX] = [] for v in sorted(node.get(p, []), key=lambda x: x["@id"]): prop = self._copy_data(v, plist=PLIST_PROP, simplify=True) entry[pX].append(prop) #super and sub for p in ["_sub", "_super"]: if node.get(p, []): entry[p] = [] for v in node.get(p, []): relNode = self.map_id_schemaorg.get(v) entry[p].append(self._copy_data(relNode, PLIST_REF)) entry["_sitename"] = self.site #"schema.org" # self.site entry["_version"] = self.version entry["_url_root"] = "." entry["_url_schema"] = "http://{}".format( self.site) # "http://schema.org" #entry["_examples"] = [] for k, v in entry.items(): if type(v) == list and v and type( v[0]) in [dict, collections.defaultdict]: v[-1]["_last"] = True #logging.info(k) #logging.info(type(v)) #logging.info(type(v[0])) #exit() return entry
def _init_schema(self): # the main json-ld self.map_id_node = {} filename = "schema.jsonld" url = '{}/data/releases/{}/{}'.format(self.url_base, self.version, filename) #logging.info(url) r = requests.get(url) data_jsonld = json.loads(r.content) logging.info(len(data_jsonld)) for node in data_jsonld["@graph"]: if "schema.org" not in node["@id"]: logging.debug(node["@id"]) # node.get("@type") pass type_list = node.get("@type", []) if not type(type_list) == list: type_list = [type_list] else: node["@type"] = sorted(type_list) node["xtype"] = ','.join(type_list) #first pass for node in data_jsonld["@graph"]: xid = node["@id"] self.map_id_node[xid] = node #group xtypeList = json_get_list(node, "@type") if "rdfs:Class" in xtypeList: node["_group"] = "type" elif "rdf:Property" in xtypeList: node["_group"] = "property" else: node["_group"] = "other" node["_layer"] = "core" #nameCount usage_id = self.map_id_stat2015.get(node.get("rdfs:label")) node["_usage"] = get_usage_str(usage_id) examples = self.map_id_examples.get(node.get("rdfs:label")) if examples: node["_examples"] = examples #second pass for node in data_jsonld["@graph"]: xid = node["@id"] # instances for xtype in json_get_list(node, "@type"): the_node = self.map_id_node.get(xtype) if the_node: json_append(the_node, "_instances", self._copy_node(node, PLIST_REF)) the_node["_instances"] = sorted( the_node["_instances"], key=lambda x: x["rdfs:label"]) # subclass relation for p in ["rdfs:subClassOf", "rdfs:subPropertyOf"]: for v in json_get_list(node, p): node_id = v["@id"] if node_id not in self.map_id_node: continue the_node = self.map_id_node[node_id] json_append(node, "_super", node_id) json_append(the_node, "_sub", xid) #domain range if node["_group"] == "property": for p in PLIST_DOMAIN_RANGE: for the_node in json_get_list(node, p): refxid = the_node["@id"] pX = INVERSE_DOMAIN_RANGE[p] json_append(self.map_id_node[refxid], pX, node) self.map_id_node[refxid][pX] = sorted( self.map_id_node[refxid][pX], key=lambda x: x["rdfs:label"]) # http://schema.org/supersededBy p = "http://schema.org/supersededBy" if p in node: node_id = node[p]["@id"] the_node = self.map_id_node[node_id] the_node["_supersede"] = self._copy_node(node, PLIST_REF)