コード例 #1
0
ファイル: cns_convert.py プロジェクト: dingmaotu/kgtool
def task_convert(args):
    logging.info("called task_convert")
    filename = "../schema/cns_top_v2.0.jsonld"
    filename = file2abspath(filename, __file__)
    loaded_schema = CnsSchema()
    loaded_schema.import_jsonld(filename)

    filename = args["input_file"]
    jsondata = file2json(filename)
    report = CnsBugReport()
    for idx, item in enumerate(jsondata):
        types = [item["mainType"], "Thing"]
        primary_keys = [idx]
        cns_item = run_convert(loaded_schema, item, types, primary_keys,
                               report)
        logging.info(json4debug(cns_item))
        #loaded_schema.run_validate(cns_item, report)
    logging.info(json4debug(report.data))
コード例 #2
0
def task_graphviz(args):
    #logging.info( "called task_graphviz" )

    filename = args["input_file"]
    loaded_schema = CnsSchema()
    preloaded_schema_list = preload_schema(args)
    loaded_schema.import_jsonld(filename, preloaded_schema_list)

    #validate if we can reproduce the same jsonld based on input
    jsonld_input = file2json(filename)

    name = os.path.basename(args["input_file"]).split(u".")[0]
    name = re.sub(ur"-", "_", name)
    ret = run_graphviz(loaded_schema, name)
    for key, lines in ret.items():
        xdebug_file = os.path.join(args["debug_dir"],
                                   name + "_" + key + u".dot")
        lines2file([lines], xdebug_file)
コード例 #3
0
def task_validate(args):
    logging.info( "called task_validate" )
    schema_filename = args.get("input_schema")
    if not schema_filename:
        schema_filename = "schema/cns_top.jsonld"

    preloadSchemaList = preload_schema(args)
    loaded_schema = CnsSchema()
    loaded_schema.import_jsonld(schema_filename, preloadSchemaList)


    filepath = args["input_file"]
    filename_list = glob.glob(filepath)
    report = init_report()

    # init xtemplate
    report[XTEMPLATE] = collections.Counter()
    for template in loaded_schema.metadata["template"]:
        d = template["refClass"]
        p = template["refProperty"]
        key_cp = u"cp_{}_{}_{}".format(d, d, p)
        report[XTEMPLATE][key_cp] += 0
    logging.info(json4debug(report[XTEMPLATE]))

    # init class path dependency
    for template in loaded_schema.metadata["template"]:
        d = template["refClass"]
        key_cp = u"parent_{}".format(d)
        report[XTEMPLATE][key_cp] = loaded_schema.index_inheritance["rdfs:subClassOf"].get(d)

    for definition in loaded_schema.definition.values():
        if "rdfs:Class" in definition["@type"]:
            d = definition["name"]
            key_cp = u"parent_{}".format(d)
            report[XTEMPLATE][key_cp] = loaded_schema.index_inheritance["rdfs:subClassOf"].get(d)

    #validate
    lines = []

    for filename in filename_list:
        logging.info(filename)
        if not os.path.exists(filename):
            continue

        if args.get("option") == "jsons":
            for idx, line in enumerate(file2iter(filename)):
                if idx % 10000 ==0:
                    logging.info(idx)
                    logging.info(json4debug(report))
                json_data = json.loads(line)
                run_validate(loaded_schema, json_data, report)
                stat_kg_report_per_item(json_data, None, report["stats"])

                # collection entity listing
                if "CnsLink" not in json_data["@type"]:
                    entity_simple = [
                        json_data["@type"][0],
                        json_data.get("name",""),
                         "\""+u",".join(json_data.get("alternateName",[]))+"\""
                    ]
                    lines.append(u",".join(entity_simple))

        else:
            jsondata = file2json(filename)
            run_validate(loaded_schema, jsondata, report)

    #out
    filename = args["output_validate_entity"]
    logging.info(filename)
    lines = sorted(lines)

    fields = ["main_type","name","alternateName"]
    lines.insert(0, u",".join(fields))
    lines2file(lines, filename)

    #display report
    logging.info(json4debug(report))

    #write report csv
    write_csv_report(args, report, loaded_schema)

    filename = args["output_validate_report"].replace("csv","json")
    logging.info(filename)
    json2file(report, filename)