def generate_page_entity_detail(self): # write html using template file #load template page filename = os.path.join(os.path.dirname(__file__), "../templates/page.mustache") with codecs.open(filename, encoding="utf-8") as f: templatePage = f.read() for xid in sorted(self.map_id_schemaorg): #if not "Music" in xid: # continue item = self.map_id_schemaorg.get(xid) if "#" in xid: logging.warn("skip {}".format(xid)) continue entry = self.convert_extend2mustach(item) html = pystache.render(templatePage, entry) filename = os.path.join(self.dir_output, "{}.html".format(entry["rdfs:label"])) create_dir_if_not_exist(filename) with codecs.open(filename, "w", encoding="utf-8") as f: f.write(html) filename = os.path.join(self.dir_output, "data/{}.json".format(entry["rdfs:label"])) create_dir_if_not_exist(filename) json2file(entry, filename)
def task_cns_make_html(args=None): name = "cns-core" version = "3.2" site = "cnschema.org" items = read_cns_core_jsonld(version, path="data") map_id_cnschema = {} for item in items: schemaorg_id = item["schemaorgUrl"] map_id_cnschema[schemaorg_id] = item # update map_id_schemaorg with cnschem properties so = Schemaorg(version) map_id_schemaorg = so.load_data() for entry in map_id_schemaorg.values(): entry.update( json_dict_copy(map_id_cnschema.get(entry["@id"], {}), MAP_CNSCHEMA)) for p in ["isDomainOf", "isRangeOf"]: target_list = entry.get(p, []) for target in target_list: target.update( json_dict_copy(map_id_cnschema.get(target["@id"], {}), MAP_CNSCHEMA)) #rewrite map_id_schemaorg schema.org => cnschema.org items_new = schemaorg2cnschema(items_new) map_id_schemaorg = schemaorg2cnschema(map_id_schemaorg) filename = '../local/releases/3.2/cns-core.extend.json' filename = file2abspath(filename, __file__) json2file(map_id_schemaorg, filename) website = WebsiteV1(version, site, map_id_schemaorg) website.run()
def write_cns_core(items, version, formats=["excel", "jsonld"]): name = "cns-core" # write excel keys = [ "category", "@id", "name", "description", "supersededBy", "nameZh", "descriptionZh", "alternateName", "wikidataName", "wikidataUrl", "wikipediaUrl", "schemaorgUrl" ] # validation for key in keys: assert key in items[0].keys() # write excel if "excel" in formats: filename = "../data/releases/{}/{}.xls".format(version, name) filename = file2abspath(filename, __file__) json2excel(items, keys, filename) # write json-ld if "jsonld" in formats: for item in items: p = "alternateName" item[p] = split_string_by_comma(item.get(p, "")) filename = "../data/releases/{}/{}.jsonld".format(version, name) filename = file2abspath(filename, __file__) output = { "@context": { "@vocab": "http://cnschema.org/" }, "@graph": items } json2file(filename, output)
def generate_page_vocab(self): data_json = {} data_json["classes"] = self._recusive_tree2json( ["http://cnschema.org/Thing"]) data_json["types"] = self._recusive_tree2json( ["http://cnschema.org/DataType"]) data_json["properties"] = [] data_fields = [ "rdfs:label", "rdfs:comment", "nameZh", "descriptionZh", "_supersede" ] for xid in sorted(self.map_id_schemaorg): item = self.map_id_schemaorg[xid] if item["_group"] != "property": continue item_simple = {} for p in data_fields: if p in item: item_simple[p] = item[p] data_json["properties"].append(item_simple) filename = os.path.join(self.dir_output, "data2/classes.json") create_dir_if_not_exist(filename) json2file(data_json["classes"][0], filename) filename = os.path.join(self.dir_output, "data2/properties.json") json2file(data_json["properties"], filename)
def load_data(self): filename_cache = os.path.join(self.dir_output, "schemaorg.json") if os.path.exists(filename_cache): return file2json(filename_cache) #examples self._init_examples() # the word count stats 2015 self._init_stat2015() # init the schema, with information from examples and stats self._init_schema() json2file(self.map_id_node, filename_cache) return self.map_id_node
def generate_page_vocab(self): # classes, types, properties data_json = {} content_json = {} lines = [] self._recusive_tree2li(["http://cnschema.org/Thing"], lines) content_json["classes"] = u"\n".join(lines) data_json["classes"] = self._recusive_tree2json( ["http://cnschema.org/Thing"]) lines = [] self._recusive_tree2li(["http://cnschema.org/DataType"], lines) content_json["types"] = u"\n".join(lines) data_json["types"] = self._recusive_tree2json( ["http://cnschema.org/DataType"]) content_json["properties"] = [] data_json["properties"] = [] data_fields = [ "rdfs:label", "rdfs:comment", "nameZh", "descriptionZh", "_supersede" ] for xid in sorted(self.map_id_schemaorg): item = self.map_id_schemaorg[xid] if item["_group"] != "property": continue #print xid content_json["properties"].append(item) item_simple = {} for p in data_fields: if p in item: item_simple[p] = item[p] data_json["properties"].append(item_simple) #load template page filename = os.path.join(os.path.dirname(__file__), "../templates/vocab.mustache") with codecs.open(filename, encoding="utf-8") as f: templatePage = f.read() # apply template html = pystache.render(templatePage, content_json) filename = os.path.join(self.dir_output, "docs/vocab.htm") create_dir_if_not_exist(filename) with codecs.open(filename, "w", encoding="utf-8") as f: f.write(html) filename = os.path.join(self.dir_output, "docs/vocab.json") create_dir_if_not_exist(filename) json2file(data_json, filename) filename = os.path.join(self.dir_output, "docs/classes.json") json2file(data_json["classes"][0], filename) filename = os.path.join(self.dir_output, "docs/properties.json") json2file(data_json["properties"], filename)
def task_superclasses(args): filename = "../local/releases/3.2/schema_taxonomy.json" filename = file2abspath(filename, __file__) data = file2json(filename) pairs = [] loadmapping(data, [], pairs) logging.info(json.dumps(pairs, indent=4, ensure_ascii=False)) mapping = collections.defaultdict(list) for pair in pairs: key = pair["to"] mapping[key].append(pair["to"]) for pair in pairs: key = pair["to"] for parent in pair["path"]: if parent not in mapping[key]: mapping[key].append(parent) logging.info(json.dumps(mapping, indent=4, ensure_ascii=False)) filename = "../data/releases/3.2/schema.superclass.json" filename = file2abspath(filename, __file__) json2file(mapping, filename)