def resource_ttl(operator): return Rsc.operator_ttl( labels.get(operator.lower(), 'Unknown').replace(' ', ''))
class Validate: global RSC_SPACE TPL_SPACE = 2 + RSC_SPACE # header = F"\n\n{'#' * 110}\n#{'VALIDATION TERMINOLOGY':^108}#\n{'#' * 110}\n\n" # -------------------------------------- # # ACCEPTED # -------------------------------------- # accepted = Rsc.ga_resource_ttl('Accepted') accepted_label = "Accepted" accepted_desc = F""" {space * TPL_SPACE}A validation process that results in the link under scrutiny being flagged as ACCEPTED. {space * TPL_SPACE}This, with the intent of notifying that the link has undergone and PASSED a user-defined set {space * TPL_SPACE}of checks which gives ground to CONFIRM the rightful creation of the context dependent link.""" accepted_doc = F""" {space * RSC_SPACE}### VALIDATED AS ACCEPTED {space * RSC_SPACE}{accepted} {space * RSC_SPACE}{preVal('a', VoidPlus.ValidationFlag_ttl, line=False)} {space * RSC_SPACE}{preVal(Sns.RDFS.label_ttl, Literal(accepted_label).n3(), line=False)} {space * RSC_SPACE}{preVal(Sns.DCterms.description_ttl, Literal(accepted_desc).n3(), line=False, end=True)} """ # -------------------------------------- # # REJECTED # -------------------------------------- # rejected = Rsc.ga_resource_ttl('Rejected') rejected_label = ["Not Accepted", "Rejected"] rejected_desc = F""" {space * TPL_SPACE}A validation process that results in the link under scrutiny being flagged as REJECTED. {space * TPL_SPACE}This, with the intent of notifying that the link has undergone and FAILED a user-defined {space * TPL_SPACE}set of checks which gives ground to REFUTE the creation of the context dependent link.""" rejected_doc = F""" {space * RSC_SPACE}### VALIDATED AS REJECTED {space * RSC_SPACE}{rejected} {space * RSC_SPACE}{preVal('a', VoidPlus.ValidationFlag_ttl, line=False)} {space * RSC_SPACE}{preVal(Sns.RDFS.label_ttl, valueList(rejected_label, newLine=False), line=False)} {space * RSC_SPACE}{preVal(Sns.DCterms.description_ttl, Literal(rejected_desc).n3(), line=False, end=True)} """ # -------------------------------------- # # UNCERTAIN - NOT SURE # -------------------------------------- # unsure = Rsc.ga_resource_ttl('Uncertain') unsure_label = ["Not Sure", "Unsure", "Uncertain"] unsure_desc = F""" {space * TPL_SPACE}A validation process that results in the link under scrutiny being flagged as UNCERTAIN. {space * TPL_SPACE}This flag reveals the lack of confidence in confirming or refuting the creation of {space * TPL_SPACE}the context dependent link.""" unsure_doc = F""" {space * RSC_SPACE}### VALIDATED AS UNCERTAIN {space * RSC_SPACE}{unsure} {space * RSC_SPACE}{preVal('a', VoidPlus.ValidationFlag_ttl, line=False)} {space * RSC_SPACE}{preVal(Sns.RDFS.label_ttl, valueList(unsure_label, newLine=False), line=False)} {space * RSC_SPACE}{preVal(Sns.DCterms.description_ttl, Literal(unsure_desc).n3(), line=False, end=True)} """ # -------------------------------------- # # UNCHECKED - NOT VALIDATED # -------------------------------------- # unchecked = Rsc.ga_resource_ttl('Unchecked') unchecked_label = ["Not Validated", "Not Checked", "Unchecked"] unchecked_desc = F""" {space * TPL_SPACE}Flagging a link as UNCHECKED literally highlights that it has not undergone any user-defined scrutiny {space * TPL_SPACE}such that it could be flagged as ACCEPTED, REJECTED or UNCERTAIN""" unchecked_doc = F""" {space * RSC_SPACE}### VALIDATED AS UNCHECKED {space * RSC_SPACE}{unchecked} {space * RSC_SPACE}{preVal('a', VoidPlus.ValidationFlag_ttl, line=False)} {space * RSC_SPACE}{preVal(Sns.RDFS.label_ttl, valueList(unchecked_label, newLine=False), line=False)} {space * RSC_SPACE}{preVal(Sns.DCterms.description_ttl, Literal(unchecked_desc).n3(), line=False, end=True)} """ # -------------------------------------- # # MIXED - DISPUTED - CONTRADICTION # -------------------------------------- # mixed = Rsc.ga_resource_ttl('Disputed') mixed_label = ["Contradictory", "Disputed"] mixed_desc = F""" {space * TPL_SPACE}A validation process that results in the link under scrutiny being flagged as DISPUTED. {space * TPL_SPACE}This, with the intent of notifying that the link has undergone MULTIPLE user-defined set of checks {space * TPL_SPACE}which result in a contradiction. In other words, the same link has been flagged with contradicting {space * TPL_SPACE}labels such as for example ACCEPTED, ACCEPTED, UNSURE AND UNCHECKED""" mixed_doc = F""" {space * RSC_SPACE}### VALIDATED AS DISPUTED {space * RSC_SPACE}{mixed} {space * RSC_SPACE}{preVal('a', VoidPlus.ValidationFlag_ttl, line=False)} {space * RSC_SPACE}{preVal(Sns.RDFS.label_ttl, valueList(mixed_label, newLine=False), line=False)} {space * RSC_SPACE}{preVal(Sns.DCterms.description_ttl, Literal(mixed_desc).n3(), line=False, end=True)} """ generic_desc = F""" {space * TPL_SPACE}Unless explicitly stated in a specific validation resource, {space * TPL_SPACE}a. An accepted link entails that the validator agrees with the alignment (matching) in general and {space * TPL_SPACE}with the specific correspondence under scrutiny. {space * TPL_SPACE}b. A rejected link entails that the validator agrees with the alignment (matching) in general but {space * TPL_SPACE}disagrees with the specific correspondence under scrutiny. Together, the values of the selected {space * TPL_SPACE}properties provided by the validator justify her disagreement. {space * TPL_SPACE}c. Rejecting an alignment entails that the validator disagrees with the alignment in general and {space * TPL_SPACE}may have selected a set of properties for which the values justify her disagreement.""" get_resource = { 'accepted': accepted, 'rejected': rejected, 'not_sure': unsure, 'not_validated': unchecked, 'mixed': mixed } get_triples = { 'accepted': accepted_doc, 'rejected': rejected_doc, 'not_sure': unsure_doc, 'not_validated': unchecked_doc, 'mixed': mixed_doc } def terminology(self): return F"{header('LINK VALIDATION TERMINOLOGY')}" \ F"{self.accepted_doc}" \ F"{self.rejected_doc}" \ F"{self.unsure_doc}" \ F"{self.unchecked_doc}" \ F"{self.mixed_doc}"
def csv2Linkset(csv_linkset_file: str, link_type: str, auto_prefixes: dict): errors = "" ttl_file = F"{csv_linkset_file}.ttl" # LOAD PARAMETERS THAT ARE NOT THE SOURCE - TARGET - OR PROPERTY vars_dic = defaultdict(int) vars_size = 0 # WRITING THE TURTLE FILE with open(ttl_file, "w") as writer: # Reading the old csv file with open(csv_linkset_file, "r") as csv_file: # Dissect the link-type for prefix - namespace - name and turtle predicate_data = uri2ttl(reconstructTurtle( link_type, auto_prefixes=auto_prefixes), auto_prefixes=auto_prefixes) for count, row in enumerate(csv_reader(csv_file)): if True: # THE FIRST LINE IS ASSUMED TO BE THE HEADER if count > 0: # GET THE SOURCE AND TARGET URIS src_data, trg_data = uri2ttl( uri=row[0], auto_prefixes=auto_prefixes), uri2ttl( row[1], auto_prefixes) # GENERATION OF THE LINK if src_data and trg_data and src_data[ 3] is not None and trg_data[3] is not None: # The RDFStar subject writer.write(F"\n\t### LINK Nbr: {count}\n") link = F"\t{src_data[3]} {predicate_data[3]} {trg_data[3]} .\n" writer.write( F"\t{src_data[3]} {predicate_data[3]} {trg_data[3]} .\n" ) # STANDARD REIFICATION code = Rsc.ga_resource_ttl( F"Reification-{Grl.deterministicHash(link)}") writer.write( F"\n\t### STANDARD REIFICATION Nbr: {count}") writer.write(F"\n\t{code}\n") writer.write(F"\t{preVal('a', 'rdf:Statement')}") writer.write( F"\t{preVal('rdf:predicate', predicate_data[3])}" ) writer.write( F"\t{preVal('rdf:subject', src_data[3])}") writer.write( F"\t{preVal('rdf:object', trg_data[3])}") # ANNOTATION OF THE LINK USING THE REIFGIED CODE for counter, (predicate, index) in enumerate( vars_dic.items()): end = ".\n" if counter == vars_size - 1 else ";" writer.write( F"\t\t{predicate:{Vars.PRED_SIZE}}" F" {Literal(row[index]).n3() if not Grl.isDecimalLike(row[index]) else round(float(row[index]), 5)} {end}\n" ) # THE MAPPING OF GHE CSV HEADERS TO VOIDPLUS RDF PREDICATES else: # THE CSV HEADER # Star at position for column in range(2, len(row)): if row[column] in CSV_HEADERS: vars_dic[CSV_HEADERS[row[column]]] = column vars_size += 1 # except Exception as err: # errors += F">>>> [ERROR FROM csv_2_linkset] {row}, {err}" return ttl_file
def rdfStarLinkGenerator_fromCSV(link_predicate: str, result_batch, offset=0): errors = "" vars_size = 0 buffer = Buffer() vars_dic = defaultdict(int) for count, row in enumerate(result_batch): if True: # THE FIRST LINE IS ASSUMED TO BE THE HEADER if count > 0 and len(row) > 1: # GET THE SOURCE AND TARGET URIS src_data, trg_data = row[0], row[1] # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"{space}### LINK Nbr: {count + offset}\n" F"{space}<<<{src_data}> {link_predicate} <{trg_data}>>>\n" if len(vars_dic) > 0 else F"{space}<{src_data}> {link_predicate} <{trg_data}> .\n") # ANNOTATION OF THE LINK # ll_val:has-link-validation "not_validated" . for counter, (predicate, index) in enumerate(vars_dic.items()): end = ".\n" if counter == vars_size - 1 else ";" # APPENDING THE CLUSTER SIZE # if clusters and predicate == VoidPlus.cluster_ID_ttl and int(row[index]) in clusters: # buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}" # F"{Literal(clusters[int(row[index])]['size']).n3(MANAGER)} ;\n") # APPENDING THE VALIDATION FLAG # if predicate == VoidPlus.has_validation_flag_ttl: # triple_value = validate.get_resource[row[index]] # APPENDING THE VALIDATION FLAG RESOURCE if predicate == VoidPlus.has_validation_ttl: small = src_data if src_data < trg_data else trg_data big = trg_data if small == src_data else src_data key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) # buffer.write(F"{space * 2}{VoidPlus.has_validation_ttl:{Vars.PRED_SIZE}}{triple_value} {end}\n") # APPENDING THE CLUSTER ID AS A RESOURCE elif predicate == VoidPlus.cluster_ID_ttl: cluster_id = int(row[index]) triple_value = Rsc.cluster_ttl(cluster_id) # clusters[cluster_id]['item'].extend([src_data, trg_data]) # APPENDING ANYTHING ELSE else: triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \ if Grl.isDecimalLike(row[index]) \ else Literal(row[index]).n3(MANAGER) buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") # buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}" # F"{validate.get_resource[row[index]] if not Grl.isDecimalLike(row[index]) else round(float(row[index]), 5)} {end}\n") yield buffer.getvalue() clearBuffer(buffer) else: # THE CSV HEADER # Star at position for column in range(2, len(row)): if row[column] in CSV_HEADERS: vars_dic[CSV_HEADERS[row[column]]] = column vars_size += 1
def validationGraphGenerator(validationset, linksetStats, auto_prefixes, setGraph, set_id, created, isLinkset: bool): # THE LAST STATUS MUST ALWAYS HAVE A VALUE DO THAT IT DETERMINES THE LAST TRIPLE predicate_map = { "Motivation": VoidPlus.motivation_ttl, "Status": VoidPlus.has_validation_status_ttl } if isLinkset is False: auto_prefixes[Rsc.lens] = "lens" if validationset: validationset_graph = F"{Rsc.validationset_ttl(Grl.deterministicHash(validationset))}-{set_id}" writer = Buffer() # ADDING THE CLUSTER NAMESPACE # auto_prefixes[Rsc.validationset] = "validationset" # APPENDING ALL NAMESPACES writer.write( linksetNamespaces( auto_prefixes, # isValidated=validationset and len(validationset['items']) > 0, isValidated=True, isClustered=Vars.clusters in linksetStats and linksetStats[Vars.clusters] > 0 )) # VALIDATION METADATA writer.write(F'{header("LINK VALIDATION METADATA")}\n\n') writer.write(F"{validationset_graph}\n") writer.write(preVal('a', VoidPlus.Validationset_ttl)) writer.write(preVal(VoidPlus.hasTarget_ttl, setGraph)) if "creator" in validationset and len(validationset["creator"].strip()) > 0: writer.write(preVal(Sns.DCterms.creator_ttl, Literal(validationset["creator"]).n3())) if "publisher" in validationset and len(validationset["publisher"].strip()) > 0: writer.write(preVal(Sns.DCterms.publisher_ttl, Literal(validationset["publisher"]).n3())) # CREATED writer.write(preVal(Sns.DCterms.created_ttl, Literal(created, datatype=XSD.dateTi).n3(MANAGER))) # EXPORT TIMESTAMP writer.write(preVal(VoidPlus.exportDate_ttl, Grl.getXSDTimestamp())) # VALIDATION STATS # THE TOTAL AMOUNT OF LINKS ACCEPTED writer.write(F"\n{space}### VOID+ VALIDATION STATS\n") if Vars.accepted in linksetStats and linksetStats[Vars.accepted] > -1: writer.write(preVal(VoidPlus.accepted_ttl, Rsc.literal_resource(linksetStats[Vars.accepted]))) # THE TOTAL AMOUNT OF LINKS REJECTED if Vars.rejected in linksetStats and linksetStats[Vars.rejected] > -1: writer.write(preVal(VoidPlus.rejected_ttl, Rsc.literal_resource(linksetStats[Vars.rejected]))) # THE TOTAL AMOUNT OF LINKS WITH AN UNCERTAIN VALIDATION FLAG if Vars.not_sure in linksetStats and linksetStats[Vars.not_sure] > -1: writer.write(preVal(VoidPlus.uncertain_ttl, Rsc.literal_resource(linksetStats[Vars.not_sure]))) # THE TOTAL AMOUNT OF LINKS NOT VALIDATED if Vars.notValidated in linksetStats and linksetStats[Vars.notValidated] > -1: writer.write( preVal(VoidPlus.unchecked_ttl, Rsc.literal_resource(linksetStats[Vars.notValidated]))) writer.write("\n") writer.write(preVal(Sns.DCterms.description_ttl, Rsc.literal_resource(validate.generic_desc), end=True)) # VALIDATION TERMS writer.write(validate.terminology()) # VALIDATIONSET writer.write(F'{header("VALIDATIONSET")}\n\n') writer.write(F"{validationset_graph}\n{{") # VALIDATIONS for key, validation in validationset['items'].items(): # print(validation) writer.write(F'\n\t{Rsc.validation_ttl(key)}\n') writer.write(preVal('a', VoidPlus.Validation_ttl, position=2)) for index, (val_header, value) in enumerate(predicate_map.items()): end = True if index == len(predicate_map) - 1 else False curr_feature = predicate_map.get(val_header, None) if curr_feature: # aACCEPTED | REJECTED | NOT-VALIDATED : UNSURE | MIXED if curr_feature == VoidPlus.has_validation_status_ttl: writer.write(preVal(VoidPlus.has_validation_status_ttl, validate.get_resource[validation[val_header]], end=end, position=2)) elif validation[val_header]: writer.write(preVal(curr_feature, Literal(validation[val_header]).n3(MANAGER), end=end, position=2)) writer.write("}") # print(writer.getvalue()) return writer.getvalue()
def clusterGraphGenerator(clusters, stats, auto_prefixes, linksetGraph, created, linkset_id): node_count = 0 validated = 0 clusterset_graph = F"{Rsc.clusterset_ttl(Grl.deterministicHash(clusters))}-{linkset_id}" if clusters: # ADDING THE CLUSTER NAMESPACE # auto_prefixes[Rsc.clusterset] = "clusterset" writer = Buffer() predicate_map = { # SET OF NODES # "nodes": VoidPlus.size_ttl, "extended": VoidPlus.extended_ttl, "id": VoidPlus.intID_ttl, "hash_id": VoidPlus.hashID_ttl, # VALIDATIONS # "links": VoidPlus.links_ttl, "reconciled": VoidPlus.reconciled_ttl, "size": VoidPlus.size_ttl, "accepted": VoidPlus.accepted_ttl, "rejected": VoidPlus.rejected_ttl, "not_sure": VoidPlus.uncertain_ttl, "mixed": VoidPlus.contradictions_ttl, "not_validated": VoidPlus.unchecked_ttl, 'network_id': VoidPlus.network_ID_ttl } # APPENDING ALL NAMESPACES writer.write( linksetNamespaces( auto_prefixes, isClustered=clusters and len(clusters) > 0, isValidated=(Vars.notValidated in stats and stats[Vars.notValidated] < stats[Vars.triples]) is True )) # THE CLUSTER METADATA writer.write(F'{header("RESOURCE PARTITIONING METADATA")}\n\n') writer.write(F"{clusterset_graph}\n") writer.write(preVal('a', VoidPlus.Clusterset_ttl)) writer.write(preVal(VoidPlus.clusters_ttl, Literal(len(clusters)).n3(MANAGER))) writer.write(preVal(Sns.VoID.entities_ttl, "###NodeCounts")) writer.write(preVal(VoidPlus.validations_ttl, "###VALIDATED")) writer.write(preVal(VoidPlus.largestNodeCount_ttl, Rsc.literal_resource(stats['largest_size']))) writer.write(preVal(VoidPlus.largestLinkCount_ttl, Rsc.literal_resource(stats['largest_count']))) writer.write(preVal(VoidPlus.hasTarget_ttl, linksetGraph)) writer.write(preVal(VoidPlus.method_ttl, Algorithm.simple_clustering_ttl)) # EXPORT TIMESTAMP writer.write(preVal(VoidPlus.exportDate_ttl, Grl.getXSDTimestamp())) # CREATED TIMESTAMP writer.write(preVal(Sns.DCterms.created_ttl, Literal(created, datatype=XSD.dateTi).n3(MANAGER), end=True)) # DESCRIPTION OF THE CLUSTERING ALGORITHM writer.write(F'\n\n{Algorithm.simple_clustering_ttl}\n') writer.write(preVal('a', VoidPlus.ClusteringAlgorithm_ttl)) writer.write(preVal(Sns.DCterms.description_ttl, Literal(Algorithm.simple_clustering_short_description).n3(MANAGER))) writer.write(preVal(Sns.RDFS.seeAlso_ttl, Rsc.ga_resource_ttl("https://doi.org/10.3233/SW-200410"), end=True)) # THE PARTITION OF CO-REFERENT MATCHED RESOURCES writer.write(F'{header("ANNOTATED CO-REFERENT RESOURCES")}\n\n') writer.write(F"{clusterset_graph}\n{{\n") for cid, cluster_data in clusters.items(): # print(cluster_data.keys()) # exit() temp = Buffer() # A CLUSTER RESOURCE writer.write(F"\n\t{Rsc.cluster_ttl(cid)}\n") writer.write(preVal('a', VoidPlus.Cluster_ttl, position=2)) for feature, value in cluster_data.items(): # CLUSTERED RESOURCES if feature == 'nodes': if value: nodes = set(value) # temp.write(preVal(predicate_map[feature], Literal(len(nodes)).n3(MANAGER), position=2)) node_count += len(nodes) temp.write( preVal( VoidPlus.hasItem_ttl, F" ,\n{space*2}{' ' * Vars.PRED_SIZE}".join(Rsc.ga_resource_ttl(elt) for elt in nodes), position=2 ) ) # VALIDATION FLAGS elif feature == "links": if value and value['not_validated'] == 0: validated += 1 for flag, integer in value.items(): temp.write( preVal( predicate_map[flag], Literal(integer).n3(MANAGER), position=2 ) ) elif feature in ["values"]: pass # ABOUT THE CLUSTER'S SIZE, Extension, Reconciliation, intID else: temp.write(preVal(predicate_map[feature], Literal(value).n3(MANAGER), position=2)) writer.write(F"{temp.getvalue()[:-2]}.\n") # print(triples.getvalue()) result = writer.getvalue().replace('###NodeCounts', Literal(node_count).n3(MANAGER)) return F"{result.replace('###VALIDATED', Literal(validated).n3(MANAGER))}}}"
def standardLinkGenerator2(link_predicate: str, result_batch, namespace, clusters=None, offset=0): """ :param offset : an integer to increment the counting of tghe links :param link_predicate : a turtle representation of a URI (e.i: owl:sameAs). :param namespace : a dictionary for namespace :param result_batch : an iterable object with link results. :param clusters : a dictionary proving the size of the clusters links. :return : Yields a string as set of triples. """ errors = "" vars_size = 0 buffer = Buffer() vars_dic = defaultdict(int) for count, row in enumerate(result_batch): try: # THE FIRST LINE IS ASSUMED TO BE THE HEADER if count > 0 and len(row) > 1: # GET THE SOURCE AND TARGET URIS src_data, trg_data, predicate = uri2ttl(row[0], namespace)["short"], \ uri2ttl(row[1], namespace)["short"], \ uri2ttl(link_predicate, namespace)["short"] print(src_data) # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n" F"{space}{src_data} {Rsc.ga_resource_ttl(predicate)} {trg_data} .\n") # STANDARD REIFICATION link = F"{space}{src_data} {Rsc.ga_resource_ttl(predicate)} {trg_data} .\n" code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(link)}") buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" F"\n{space}{code}\n" F"{space}{preVal('a', 'rdf:Statement')}" F"{space}{preVal('rdf:predicate', predicate)}" F"{space}{preVal('rdf:subject', F'{src_data}')}" F"{space}{preVal('rdf:object', F'{trg_data}')}") # ANNOTATION OF THE LINK USING THE REIFIED CODE for counter, (predicate, index) in enumerate(vars_dic.items()): end = ".\n" if counter == vars_size - 1 else ";" # APPENDING THE CLUSTER SIZE if clusters and predicate == VoidPlus.cluster_ID_ttl and row[index] in clusters: buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}" F"{Literal(clusters[row[index]]).n3(MANAGER)} ;\n") # APPENDING THE VALIDATION FLAG if predicate == VoidPlus.has_validation_status_ttl: triple_value = validate.get_resource[row[index]] # APPENDING DING ANYTHING ELSE else: triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \ if Grl.isDecimalLike(row[index]) \ else Literal(row[index]).n3(MANAGER) buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer) else: # THE CSV HEADER # Star at position # MAPPING THE CSV HEADERS for column in range(2, len(row)): header = row if row[column] in CSV_HEADERS: vars_dic[CSV_HEADERS[row[column]]] = column vars_size += 1 except Exception as err: errors += F">>>> [ERROR FROM csv_2_linkset] {row}, {err}" print(errors)
def standardLinkGenerator(mappings: dict, link_predicate: str, result_batch, offset=0): """ :param mappings : dictionary of namespaces as keys and prefixes ad values. :param offset : an integer to increment the counting of tghe links :param link_predicate : a turtle representation of a URI (e.i: owl:sameAs). :param result_batch : an iterable object with link results. :param clusters : a dictionary proving the size of the clusters links. :return : Yields a string as set of triples. """ buffer = Buffer() errors = "" def ns_modification(uri): for ns in mappings: if uri.startswith(ns): uri = uri.replace(ns, F"{mappings[ns]}:") break if uri.__contains__("://"): uri = F"<{uri}>" return uri for count, link in enumerate(result_batch): if True: # GET THE SOURCE AND TARGET URIS # src_data, trg_data = link['source'], link['target'] src_data, trg_data = ns_modification(link['source']), ns_modification(link['target']) # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n" F"{space}{src_data} {Rsc.ga_resource_ttl(link_predicate)} {trg_data} .\n") # STANDARD REIFICATION reification = F"{space}{src_data} {Rsc.ga_resource_ttl(link_predicate)} {trg_data} .\n" code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(reification)}") buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" F"\n{space}{code}\n" F"{space}{preVal('a', 'rdf:Statement')}" F"{space}{preVal('rdf:predicate', link_predicate)}" F"{space}{preVal('rdf:subject', F'{src_data}')}" F"{space}{preVal('rdf:object', F'{trg_data}')}") # ANNOTATION OF THE LINK USING THE REIFIED CODE for counter, (feature, value) in enumerate(link.items()): end = ".\n" if counter == len(link) - 1 else ";" cur_predicate = JSON_HEADERS.get(feature, None) if cur_predicate: # APPENDING THE VALIDATION FLAG RESOURCE if cur_predicate == VoidPlus.has_validation_ttl: small = link['source'] if link['source'] < link['target'] else link['target'] big = link['target'] if small == link['source'] else link['source'] # print(F"{small} {big} {link_predicate}") key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) if key is not None else key # APPENDING THE CLUSTER ID AS A RESOURCE elif cur_predicate == VoidPlus.cluster_ID_ttl: triple_value = Rsc.cluster_ttl(value) if value is not None else value # triple_value = None elif cur_predicate == VoidPlus.network_ID_ttl: print("++++++++++++++++++>>>>>>>>>>") triple_value = Literal(value).n3(MANAGER) if value is not None else value # APPENDING ANYTHING ELSE else: if cur_predicate == VoidPlus.cluster_Int_ID_ttl: triple_value = None elif value is not None: triple_value = Literal(round(float(value), 5)).n3(MANAGER) \ if Grl.isDecimalLike(value) \ else Literal(value).n3(MANAGER) else: triple_value = None if triple_value is not None: buffer.write(F"{space * 2}{cur_predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer)
def rdfStarLinkGenerator(mappings: dict, link_predicate: str, result_batch, offset=0): errors = "" buffer = Buffer() def ns_modification(uri): for ns in mappings: if uri.startswith(ns): uri = uri.replace(ns, F"{mappings[ns]}:") break if uri.__contains__("://"): uri = F"<{uri}>" return uri for count, link in enumerate(result_batch): try: # GET THE SOURCE AND TARGET URIS src_data, trg_data = ns_modification(link['source']), ns_modification(link['target']) # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"{space}### LINK Nbr: {count + offset}\n" F"{space}<<{src_data} {link_predicate} {trg_data}>>\n") # ANNOTATION OF THE LINK # ll_val:has-link-validation "not_validated" . for counter, (feature, value) in enumerate(link.items()): end = ".\n" if counter == len(link) - 1 else ";" current_property = JSON_HEADERS.get(feature, None) if current_property: # APPENDING THE VALIDATION FLAG RESOURCE if current_property == VoidPlus.has_validation_ttl: small = link['source'] if link['source'] < link['target'] else link['target'] big = link['target'] if small == link['source'] else link['source'] key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) if key is not None else key # NOT APPENDING THE CLUSTER INT ID elif current_property == VoidPlus.cluster_ID_ttl: triple_value = Rsc.cluster_ttl(value) if value is not None else value # APPENDING ANYTHING ELSE else: if current_property == VoidPlus.cluster_Int_ID_ttl: triple_value = None elif value is not None: triple_value = Literal(round(float(value), 5)).n3(MANAGER) \ if Grl.isDecimalLike(value) \ else Literal(value).n3(MANAGER) else: triple_value = value if triple_value is not None: buffer.write(F"{space * 2}{current_property:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer) except Exception as err: errors += F">>>> [ERROR FROM AnnotatedLinkset_Generic/rdfStarLinkGenerator] {link}, {err}"
def standardLinkGenerator_fromCSV(link_predicate: str, result_batch, offset=0): """ :param offset : an integer to increment the counting of tghe links :param link_predicate : a turtle representation of a URI (e.i: owl:sameAs). :param result_batch : an iterable object with link results. :param clusters : a dictionary proving the size of the clusters links. :return : Yields a string as set of triples. """ errors = "" vars_size = 0 buffer = Buffer() vars_dic = defaultdict(int) # print(clusters) for count, row in enumerate(result_batch): try: # THE FIRST LINE IS ASSUMED TO BE THE HEADER if count > 0 and len(row) > 1: # GET THE SOURCE AND TARGET URIS src_data, trg_data = row[0], row[1] # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n" F"{space}<{src_data}> {Rsc.ga_resource_ttl(link_predicate)} <{trg_data}> .\n") # STANDARD REIFICATION link = F"{space}{src_data} {Rsc.ga_resource_ttl(link_predicate)} {trg_data} .\n" code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(link)}") buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" F"\n{space}{code}\n" F"{space}{preVal('a', 'rdf:Statement')}" F"{space}{preVal('rdf:predicate', link_predicate)}" F"{space}{preVal('rdf:subject', F'<{src_data}>')}" F"{space}{preVal('rdf:object', F'<{trg_data}>')}") # ANNOTATION OF THE LINK USING THE REIFIED CODE for counter, (predicate, index) in enumerate(vars_dic.items()): end = ".\n" if counter == vars_size - 1 else ";" # APPENDING THE CLUSTER SIZE # if clusters and predicate == VoidPlus.cluster_ID_ttl and int(row[index]) in clusters: # buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}" # F"{Literal(clusters[int(row[index])]['size']).n3(MANAGER)} ;\n") # APPENDING THE VALIDATION FLAG # if predicate == VoidPlus.has_validation_flag_ttl: # triple_value = validate.get_resource[row[index]] # APPENDING THE VALIDATION FLAG RESOURCE if predicate == VoidPlus.has_validation_ttl: small = src_data if src_data < trg_data else trg_data big = trg_data if small == src_data else src_data key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) # buffer.write(F"{space * 2}{VoidPlus.has_validation_ttl:{Vars.PRED_SIZE}}{triple_value} {end}\n") # APPENDING THE CLUSTER ID AS A RESOURCE elif predicate == VoidPlus.cluster_ID_ttl: cluster_id = int(row[index]) triple_value = Rsc.cluster_ttl(cluster_id) # clusters[cluster_id]['item'].extend([src_data, trg_data]) # APPENDING ANYTHING ELSE else: triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \ if Grl.isDecimalLike(row[index]) \ else Literal(row[index]).n3(MANAGER) buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer) else: # THE CSV HEADER # Star at position # MAPPING THE CSV HEADERS row_header = row # print(header, len(header)) for column in range(2, len(row_header)): if row[column] in CSV_HEADERS: vars_dic[CSV_HEADERS[row_header[column]]] = column # print('--->', CSV_HEADERS[header[column]], header[column], column) vars_size += 1 except Exception as err: errors += F">>>> [ERROR FROM AnnotatedLinkset_Generic/standardLinkGenerator] \n\t{row} \n\t{err}" print(errors)