def task_convert(args): logging.info("called task_convert") filename = "../schema/cns_top_v2.0.jsonld" filename = file2abspath(filename, __file__) loaded_schema = CnsSchema() loaded_schema.import_jsonld(filename) filename = args["input_file"] jsondata = file2json(filename) report = CnsBugReport() for idx, item in enumerate(jsondata): types = [item["mainType"], "Thing"] primary_keys = [idx] cns_item = run_convert(loaded_schema, item, types, primary_keys, report) logging.info(json4debug(cns_item)) #loaded_schema.run_validate(cns_item, report) logging.info(json4debug(report.data))
def task_graphviz(args): #logging.info( "called task_graphviz" ) filename = args["input_file"] loaded_schema = CnsSchema() preloaded_schema_list = preload_schema(args) loaded_schema.import_jsonld(filename, preloaded_schema_list) #validate if we can reproduce the same jsonld based on input jsonld_input = file2json(filename) name = os.path.basename(args["input_file"]).split(u".")[0] name = re.sub(ur"-", "_", name) ret = run_graphviz(loaded_schema, name) for key, lines in ret.items(): xdebug_file = os.path.join(args["debug_dir"], name + "_" + key + u".dot") lines2file([lines], xdebug_file)
def task_validate(args): logging.info( "called task_validate" ) schema_filename = args.get("input_schema") if not schema_filename: schema_filename = "schema/cns_top.jsonld" preloadSchemaList = preload_schema(args) loaded_schema = CnsSchema() loaded_schema.import_jsonld(schema_filename, preloadSchemaList) filepath = args["input_file"] filename_list = glob.glob(filepath) report = init_report() # init xtemplate report[XTEMPLATE] = collections.Counter() for template in loaded_schema.metadata["template"]: d = template["refClass"] p = template["refProperty"] key_cp = u"cp_{}_{}_{}".format(d, d, p) report[XTEMPLATE][key_cp] += 0 logging.info(json4debug(report[XTEMPLATE])) # init class path dependency for template in loaded_schema.metadata["template"]: d = template["refClass"] key_cp = u"parent_{}".format(d) report[XTEMPLATE][key_cp] = loaded_schema.index_inheritance["rdfs:subClassOf"].get(d) for definition in loaded_schema.definition.values(): if "rdfs:Class" in definition["@type"]: d = definition["name"] key_cp = u"parent_{}".format(d) report[XTEMPLATE][key_cp] = loaded_schema.index_inheritance["rdfs:subClassOf"].get(d) #validate lines = [] for filename in filename_list: logging.info(filename) if not os.path.exists(filename): continue if args.get("option") == "jsons": for idx, line in enumerate(file2iter(filename)): if idx % 10000 ==0: logging.info(idx) logging.info(json4debug(report)) json_data = json.loads(line) run_validate(loaded_schema, json_data, report) stat_kg_report_per_item(json_data, None, report["stats"]) # collection entity listing if "CnsLink" not in json_data["@type"]: entity_simple = [ json_data["@type"][0], json_data.get("name",""), "\""+u",".join(json_data.get("alternateName",[]))+"\"" ] lines.append(u",".join(entity_simple)) else: jsondata = file2json(filename) run_validate(loaded_schema, jsondata, report) #out filename = args["output_validate_entity"] logging.info(filename) lines = sorted(lines) fields = ["main_type","name","alternateName"] lines.insert(0, u",".join(fields)) lines2file(lines, filename) #display report logging.info(json4debug(report)) #write report csv write_csv_report(args, report, loaded_schema) filename = args["output_validate_report"].replace("csv","json") logging.info(filename) json2file(report, filename)