Exemple #1
0
def task_cns_template(args=None):
    mapping = {
        "version": [u"版本"],
        "domain": [u"直属分类"],
        "name": [u"规范属性名"],
        "nameZh": [u"cnschema属性名"],
        "alternateName": [u"中文属性名"],
        "nameSchemaorg": [u"schema.org属性名"],
        "nameWikidata": [u"wikidata属性名"],
        "descriptionWikipedia": [u"wikipedia定义"],
        "range": [u"预期的属性类型"],
        "value": [u"example value"],
        "jsonld": [u"example json-ld"],
    }

    filename = "../local/201707/cns-t-organization.xls"
    filename = file2abspath(filename, __file__)
    excel_data = excel2json(filename)
    bindings = collections.defaultdict(dict)
    for sheet_data in excel_data["data"].values():
        if len(sheet_data) == 0:
            continue
        logging.info(len(sheet_data))
        for row in sheet_data:
            logging.info(json.dumps(row, ensure_ascii=False))
            domain = row["domain"]  #row[u"直属分类"]
            propName = row[u"name"]
            rangeList = row[u"range"]
            bindings[domain][propName] = row

        break

    logging.info(bindings)
Exemple #2
0
def read_cns_core(version):
    name = "cns-core"
    filename = "../local/releases/{}/{}.xls".format(version, name)
    filename = file2abspath(filename, __file__)

    temp = excel2json(filename)
    keys = temp["fields"].values()[0]
    items = temp["data"].values()[0]
    logging.info(len(items))

    #cleanup
    itemsNew = []
    for item in items:
        itemNew = {}
        for p in item.keys():
            #skip commented fields
            if p.startswith("#"):
                continue

            itemNew[p] = item.get(p, "")

            if p == "description":
                itemNew[p] = clean_schemaorg_description(itemNew[p])
            #if p == "@id":
            #    itemNew["schemaorgUrl"] = itemNew[p]
            #    itemNew[p] = re.sub("http://schema.org", "http://cnschema.org", itemNew[p])
        itemsNew.append(itemNew)
    items = itemsNew
    #keys = [x for x in keys if x in items[0].keys()]

    return items
Exemple #3
0
def read_cns_core_excel(version, path="local"):
    name = "cns-core"
    filename = "../{}/releases/{}/{}.xls".format(path, version, name)
    filename = file2abspath(filename, __file__)

    temp = excel2json(filename)
    keys = temp["fields"].values()[0]
    items = temp["data"].values()[0]
    logging.info(len(items))

    return items
Exemple #4
0
def read_cns_core_excel(version, path="data"):
    if version == "3.2":
        return read_cns_core_excel_v1(version, path)

    name = "schemaorg_translate"
    filename = "../{}/releases/{}/{}.xlsx".format(path, version, name)
    filename = file2abspath(filename, __file__)

    temp = excel2json(filename)
    keys = temp["fields"][version]
    items = temp["data"][version]

    # enhance with @id
    for item in items:
        item["@id"] = "http://cnschema.org/{}".format(item["name"])
    logging.info(len(items))

    return items
Exemple #5
0
def read_cns_core_excel(version, path="data"):
    if version == "3.2":
        return read_cns_core_excel_v1(version, path)

    # https://docs.google.com/spreadsheets/d/1mpiBxI5rK_qs86IpbXgN1xbhrxS_VYF0XjI_fcRpl00/edit#gid=364353024
    name = "schemaorg_translate"
    filename = "../{}/releases/{}/{}.xlsx".format(path, version, name)
    filename = file2abspath(filename, __file__)

    temp = excel2json(filename)
    keys = temp["fields"][version]
    items = temp["data"][version]

    # enhance with @id
    for item in items:
        item["@id"] = "http://cnschema.org/{}".format(item["name"])
    logging.info(len(items))

    return items