def task_cns_template(args=None): mapping = { "version": [u"版本"], "domain": [u"直属分类"], "name": [u"规范属性名"], "nameZh": [u"cnschema属性名"], "alternateName": [u"中文属性名"], "nameSchemaorg": [u"schema.org属性名"], "nameWikidata": [u"wikidata属性名"], "descriptionWikipedia": [u"wikipedia定义"], "range": [u"预期的属性类型"], "value": [u"example value"], "jsonld": [u"example json-ld"], } filename = "../local/201707/cns-t-organization.xls" filename = file2abspath(filename, __file__) excel_data = excel2json(filename) bindings = collections.defaultdict(dict) for sheet_data in excel_data["data"].values(): if len(sheet_data) == 0: continue logging.info(len(sheet_data)) for row in sheet_data: logging.info(json.dumps(row, ensure_ascii=False)) domain = row["domain"] #row[u"直属分类"] propName = row[u"name"] rangeList = row[u"range"] bindings[domain][propName] = row break logging.info(bindings)
def read_cns_core(version): name = "cns-core" filename = "../local/releases/{}/{}.xls".format(version, name) filename = file2abspath(filename, __file__) temp = excel2json(filename) keys = temp["fields"].values()[0] items = temp["data"].values()[0] logging.info(len(items)) #cleanup itemsNew = [] for item in items: itemNew = {} for p in item.keys(): #skip commented fields if p.startswith("#"): continue itemNew[p] = item.get(p, "") if p == "description": itemNew[p] = clean_schemaorg_description(itemNew[p]) #if p == "@id": # itemNew["schemaorgUrl"] = itemNew[p] # itemNew[p] = re.sub("http://schema.org", "http://cnschema.org", itemNew[p]) itemsNew.append(itemNew) items = itemsNew #keys = [x for x in keys if x in items[0].keys()] return items
def read_cns_core_excel(version, path="local"): name = "cns-core" filename = "../{}/releases/{}/{}.xls".format(path, version, name) filename = file2abspath(filename, __file__) temp = excel2json(filename) keys = temp["fields"].values()[0] items = temp["data"].values()[0] logging.info(len(items)) return items
def read_cns_core_excel(version, path="data"): if version == "3.2": return read_cns_core_excel_v1(version, path) name = "schemaorg_translate" filename = "../{}/releases/{}/{}.xlsx".format(path, version, name) filename = file2abspath(filename, __file__) temp = excel2json(filename) keys = temp["fields"][version] items = temp["data"][version] # enhance with @id for item in items: item["@id"] = "http://cnschema.org/{}".format(item["name"]) logging.info(len(items)) return items
def read_cns_core_excel(version, path="data"): if version == "3.2": return read_cns_core_excel_v1(version, path) # https://docs.google.com/spreadsheets/d/1mpiBxI5rK_qs86IpbXgN1xbhrxS_VYF0XjI_fcRpl00/edit#gid=364353024 name = "schemaorg_translate" filename = "../{}/releases/{}/{}.xlsx".format(path, version, name) filename = file2abspath(filename, __file__) temp = excel2json(filename) keys = temp["fields"][version] items = temp["data"][version] # enhance with @id for item in items: item["@id"] = "http://cnschema.org/{}".format(item["name"]) logging.info(len(items)) return items