def fetch_test_data(): try: from urllib.request import urlopen except ImportError: from urllib2 import urlopen try: from io import BytesIO as Buffer except ImportError: from StringIO import StringIO as Buffer import zipfile from imageio import volread im_url = "https://imagej.nih.gov/ij/images/t1-head-raw.zip" with closing(urlopen(im_url)) as response: if response.status != 200: raise RuntimeError( "Test data could not be found at {}, status code {}".format( im_url, response.status)) zip_buffer = Buffer(response.read()) with zipfile.ZipFile(zip_buffer) as zf: tif_buffer = Buffer(zf.read('JeffT1_le.tif')) return np.asarray(volread(tif_buffer, format='tif'), dtype=np.uint8)
def wrapper(*args, **kwargs): buffer = Buffer() plot = func(*args, **kwargs) triage(plot, buffer) buffer.seek(0) image = Image.open(buffer) return image
def log_buffer(caplog): buf = Buffer() _formatter = NewRelicContextFormatter("", datefmt="ISO8601") _handler = logging.StreamHandler(buf) _handler.setFormatter(_formatter) _logger.addHandler(_handler) caplog.set_level(logging.INFO, logger=__name__) yield buf _logger.removeHandler(_handler)
def growl_raw_image(image): """Convert image for Growl""" b = Buffer() image.save(b, 'PNG') return b.getvalue()
def display_matrix(matrix, spacing=50, limit=100, output=False, line_feed='.', is_activated=False): limit = limit table = Buffer() message = """ #################################################################################### TABLE OF {} Row(S) AND {} Columns LIMIT={} #################################################################################### """.format(0, 0, limit) if is_activated is True: line = "" for space in range(spacing): line += "#" # logger.info(display_result) my_format = "{{:{}<{}}}".format(line_feed, spacing) my_format2 = "{{:<{}}}".format(spacing) if matrix[St.message] == "NO RESPONSE": # print(Ec.ERROR_CODE_1) return message if matrix[St.result] is None: # logger.warning("\nTHE MATRIX IS EMPTY\n") print(message) return message message = """ #################################################################################### TABLE OF {} Row(S) AND {} Columns LIMIT={} #################################################################################### """.format( len(matrix[St.result]) - 1, len(matrix[St.result][0]), limit) table.write(message) count = 0 for r in range(len(matrix[St.result])): count += 1 row = "" # SUBJECT if r == 0: for c in range(len(matrix[St.result][0])): # formatted = my_format2.format(to_bytes(matrix[St.result][r][c])) formatted = my_format2.format(matrix[St.result][r][c]) row = "{}{} ".format(row, formatted) # SUBJECT LINE elif r == 1: for c in range(len(matrix[St.result][0])): formatted = my_format2.format(line) row = "{}{} ".format(row, formatted) row += "\n\t" if r >= 1: for c in range(len(matrix[St.result][0])): # formatted = my_format.format(to_bytes(matrix[St.result][r][c])) formatted = my_format.format(str(matrix[St.result][r][c])) row = "{}{} ".format(row, formatted) table.write("\n\t{}".format(row)) if count == limit + 1: # if output is False: # print table.getvalue() # else: # return table.getvalue() break if output is False: print(table.getvalue()) else: return table.getvalue()
def get_resource_value(resources, targets): """ :param resources : LIST OF RESOURCE URI FOR WHICH DATA NEEDS TO BE EXTRACTED :param targets : A DICTIONARY WITH THE FOLLOWING KEYS :return : DESCRIPTION OF THE PROPERTIES FOR NODE'S LABEL VISUALISATION OBJECT ------------------------------------------------------------------ targets = [ { graph : THE DATASET URI data = [ { entity_type : THE ENTITY TYPE OF INTEREST properties : THE PROPERTIES SELECTED BY THE USER FOR THE ABOVE TYPE } ] }, ... ] """ rsc_builder = Buffer() if type(resources) is str: rsc_builder.write("\t\t{}\n".format(Ut.to_nt_format(resources))) else: for i in range(0, len(resources)): rsc_builder.write("\t{}\n".format(Ut.to_nt_format(resources[i]))) if i == 0 \ else rsc_builder.write("\t\t\t{}\n".format(Ut.to_nt_format(resources[i]))) i_format = """ {{ GRAPH <{0}> {{ BIND("{2}" AS ?property) BIND(<{0}> AS ?dataset) ?resource a <{1}> . ?resource {2} ?value . }} }} """ query = Buffer() empty = True for dictionary in targets: graph = dictionary[St.graph] data = dictionary[St.data] for types in data: data_type = types[St.entity_type] properties = types[St.properties] for i_property in properties: p_formatted = Ut.to_nt_format(i_property) if empty is True: query.write( "\t\tVALUES ?resource \n\t\t{{\n\t\t {} \t\t}}".format( rsc_builder.getvalue())) query.write(i_format.format(graph, data_type, p_formatted)) empty = False else: query.write("\tUNION" + i_format.format(graph, data_type, p_formatted)) end_format = F""" SELECT ?resource ?dataset ?property ?value {{\n{query.getvalue()}}} """ return end_format
def metric(graph, strengths=None, alt_keys=None, hyper_parameter_1=0.1, hyper_parameter_2=0.25, has_evidence=False): # print "\n- STRENGTHS:", strengths # print "- ALT KEYS:", alt_keys """ :param graph: THE GRAPH TO EVALUATE IT IS THE LIST OF EDGES MODELLED AS TUPLES :param strengths: STRENGTH OF THE GRAPHS'S EDGES IT IS A DICTIONARY WHERE THE KEY IS PROVIDED WITH THE FUNCTION get_key(node_1, node_2) FROM THE UTILITY FILE :param alt_keys: IF GIVEN, IS THE KEY MAPPING OF GHE COMPUTED KEY HERE AND THE REAL KEY. AN EXAMPLE CA BE FOUND IN THE FUNCTION cluster_d_test IN THIS CODE :param hyper_parameter_1: = THE THRESHOLD FOR AN ILN TO BE FLAGGED GOOD :param hyper_parameter_2: THE GRAY ARRAY INTERVAL :param has_evidence: A BOOLEAN ATTRIBUTE FOR NOTIFYING WHETHER THE GRAPH HAS ASSOCIATION :return: """ analysis_builder = Buffer() # def get_key(node_1, node_2): # strength_key = "key_{}".format(str(hash((node_1, node_2))).replace("-", "N")) if node_1 < node_2 \ # else "key_{}".format(str(hash((node_2, node_1))).replace("-", "N")) # return strength_key # CREATE THE NETWORKS GRAPH OBJECT g = nx.Graph() """"""""""""""""""""""""""""""""""""""" LOADING THE NETWORK GRAPH OBJECT... """"""""""""""""""""""""""""""""""""""" # ADD NODE TO THE GRAPH OBJECT # nodes = set([n1 for n1, n2 in graph] + [n2 for n1, n2 in graph]) nodes = set([data[0] for data in graph] + [data[1] for data in graph]) for node in nodes: g.add_node(node) # ADD EDGES TO THE GRAPH OBJECT for edge in graph: # print edge[0], edge[1],"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" # RECOMPOSE THE KEY FOR EXTRACTING THE WEIGHT OF AN EDGE # print("edge:", edge) strength_key = get_key(edge[0], edge[1]) if alt_keys is not None: strength_key = alt_keys[strength_key] # MAXIMUM WEIGHT FROM THE LIST OF WEIGHTS AVAILABLE FOR THE CURRENT EDGE if strength_key in strengths: # print "strengths[strength_key] = ", strengths[strength_key] strength_value = max(strengths[strength_key]) # g.add_edges_from([(edge[0], edge[1], {'capacity': 12, 'weight': 2 - float(strength_value)})]) # ADDING THE EDGE, THE EDGE'S WEIGHT AND CAPACITY # print edge[0], edge[1] g.add_edge(edge[0], edge[1], capacity=2, weight=float(strength_value)) else: problem(text="THE LINK KEY IS INCORRECT") # g.add_edge(edge[0], edge[1], capacity=2, weight=float(1)) """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 1. ORIGINAL METRIC COMPUTATIONS WITHOUT WEIGHT """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" bridges_list = [] nb_used, nd_used, nc_used = "na", "na", "na" edge_discovered, node_count, edge_derived, bridges = 0, 0, 0, 0 try: # TOTAL NUMBER OF NODES IN THE GRAPH node_count = len(nodes) # TOTAL NUMBER OF DISCOVERED EDGES edge_discovered = len(graph) # TOTAL NUMBER OF DERIVED EDGES (POSSIBLE EDGES) edge_derived = node_count * (node_count - 1) / 2 # A LIST OF BRIDGES (BRIDGE EDGE) bridges_list = list(nx.bridges(g)) # TOTAL NUMBER OF BRIDGES IN THE NETWORK bridges = 0 if has_evidence is True and node_count == 2 else len(bridges_list) # print "BRIDGES:", bridges "NEW CODE FOR PRINTING THE NETWORK" # nodes_print = list(dict(g.nodes(data=True)).keys()) # edge_print = list(g.edges(data=True)) # specs = { # # # THE ACTIVATED DATA STORE # "data_store": "STARDOG", # # # 1. THE SERIALISED DATA # "serialised": '_PHDemoClusters_', # # # THE CLUSTER ID # "cluster_id": "123", # # # MORE INFORMATION ON THE CLUSTER TO VISUALISE # "cluster_data": { # # "nodes": nodes_print, # # 'strengths': strengths, # # "links": edge_print # }, # # # THE PROPERTIES SELECTED BY THE USER # "properties": [ # # # MARRIAGE # {"dataset": "http://goldenagents.org/datasets/Marriage003", # "entity_type": "http://goldenagents.org/uva/SAA/ontology/Person", # "property": "http://goldenagents.org/uva/SAA/ontology/full_name"}, # # # ECARTICO # {"dataset": "http://goldenagents.org/datasets/Ecartico", # "entity_type": "http://www.vondel.humanities.uva.nl/ecartico/ontology/Person", # "property": "http://www.vondel.humanities.uva.nl/ecartico/ontology/full_name"}, # # # BAPTISM # {"dataset": "http://goldenagents.org/datasets/Baptism002", # "entity_type": "http://goldenagents.org/uva/SAA/ontology/Person", # "property": "http://goldenagents.org/uva/SAA/ontology/full_name"}, # # # BURIAL # {"dataset": "http://goldenagents.org/datasets/Burial008", # "entity_type": "http://goldenagents.org/uva/SAA/ontology/Person", # "property": "http://goldenagents.org/uva/SAA/ontology/full_name"}, # # ] # } # # vis = cluster_vis_input(specs, visualisation_obj=None, resources_obj=None, # dataset_obj=None, sub_clusters=None, root=None, investigated=True, activated=True) # from ll.org.LLData.Validation import CLUSTER_VISUALISATION_DIR # with open(join(CLUSTER_VISUALISATION_DIR, "eQ.json"), mode='w') as file: # json.dump(vis, file) "END NEW CODE FOR PRINTING THE NETWORK" # THE NETWORK DIAMETER diameter = nx.diameter(g) # NETWORK METRIC ELEMENTS # try: normalised_closure = float(edge_discovered) / float(edge_derived) if edge_derived != 0 else 0 normalised_bridge = float(bridges / float(len(nodes) - 1)) if node_count > 1 else 0 normalised_diameter = (float(diameter - 1) / float(len(nodes) - 2)) \ if len(nodes) > 2 else (float(diameter - 1) if diameter >= 1 else 0) # except: # print "AN ERROR WITH THE COMPUTATION OF THE METRIC...." # FINAL NORMALISATION (NORMALISATION USED FOR NON WEIGHTED METRIC COMPUTATION) nb_used = sigmoid(bridges) if sigmoid(bridges) > normalised_bridge else normalised_bridge nd_used = sigmoid(diameter - 1) if sigmoid(diameter - 1) > normalised_diameter else normalised_diameter nc_used = 1 - normalised_closure # THE METRICS NEGATIVE IMPACTS impact = (nb_used + nd_used + nc_used) / float(3) # THE METRIC QUALITY EVALUATION estimated_quality = round(1 - impact, 3) except nx.NetworkXError as error: impact = 1 estimated_quality = 0 diameter = node_count - 1 print("GRAPH:{}\nNODES: {}\nEDGE DISCOVERED: {}".format(g, node_count, edge_discovered)) print(error) """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 2. WEIGHTED METRIC COMPUTATIONS OPTION 1: AVERAGE AND MINIMUM """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" max_strengths = {} min_strength = 1 average_strength = 1 if strengths is not None: # MAXIMUM WEIGHT FOR EACH EDGE for key, val in strengths.items(): max_strengths[key] = float(max(val)) # MINIMUM WEIGHT IN THE CLUSTER min_strength = 0 if len(strengths.items()) > 0: min_strength = min(strengths.items(), key=lambda strength_tuple: max(strength_tuple[1])) min_strength = float(max(min_strength[1])) average_strength = 0 if len(max_strengths) > 0: average_strength = sum(max_strengths.values()) / float(len(max_strengths)) weighted_eq = round(estimated_quality * min_strength, 3), round(estimated_quality * average_strength, 3) if len(str(estimated_quality)) > 5: print("BIGGER", estimated_quality) """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 3. WEIGHTED METRIC COMPUTATIONS OPTION 2: ALL INTEGRATED / COMPLETE """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" biggest_cost = 0 weighted_bridges = 0 weighted_edge_discovered = 0 weighted_nb_used, weighted_nd_used, weighted_nc_used = "na", "na", "na" try: # LIST OF NODES WITH AN ECCENTRICITY EQUAL TO THE NETWORK DIAMETER periphery = nx.periphery(g) # POSSIBLE PAIRWISE COMBINATIONS OF PERIPHERY NODES FOR COMPUTING BIGGEST COST OF SHORTEST PATH link_combinations = combinations(periphery) # COMPUTING THE WEIGHTED BRIDGE if has_evidence is True and node_count == 2: weighted_bridges = 0 else: for node_1, node_2 in bridges_list: weighted_bridges += (g.get_edge_data(node_1, node_2))['weight'] # COMPUTING THE WEIGHTED DIAMETER for start, end in link_combinations: # A TUPLE WHERE THE FIRST ELEMENT IS THE COST OF THE SHORTEST PATH FOUND (SECOND ELEMENT) shortest_path = nx.single_source_dijkstra(g, start, target=end, weight="weight") # UPDATING THE SHORTEST PATH COST WITH THE INVERTED WEIGHT PENALTY curr_cost = 2 * (len(shortest_path[1]) - 1) - shortest_path[0] # UPDATING THE BIGGEST COST biggest_cost = curr_cost if curr_cost > biggest_cost else biggest_cost # THE WEIGHTED DIAMETER IS THEN THE BIGGEST SHORTEST PATH COST weighted_diameter = biggest_cost # NORMALISING THE DIAMETER if len(nodes) > 2: weighted_normalised_diameter = (float(weighted_diameter - 1) / float(len(nodes) - 2)) else: weighted_normalised_diameter = float(weighted_diameter - 1) weighted_normalised_diameter = 1 if weighted_normalised_diameter > 1 else weighted_normalised_diameter # FIRST NORMALISATION weighted_normalised_bridge = float(weighted_bridges / float(len(nodes) - 1)) if node_count > 1 else 0 weighted_edge_discovered = g.size(weight="weight") # print("\tEDGE = {} WEIGHTED EDGE = {} AND DIAMETER = {} ON A GRAPH OF {} NODES".format( # g.size(), weighted_edge_discovered, diameter, len(nodes))) weighted_closure = float(weighted_edge_discovered) / float(edge_derived) if edge_derived != 0 else 0 # SECOND NORMALISATION weighted_nb_used = sigmoid(weighted_bridges) \ if sigmoid(weighted_bridges) > weighted_normalised_bridge else weighted_normalised_bridge weighted_nd_used = sigmoid(weighted_diameter - 1) \ if sigmoid(weighted_diameter - 1) > weighted_normalised_diameter else weighted_normalised_diameter weighted_nc_used = round(1 - weighted_closure, 2) # WEIGHTED IMPACT weighted_impact = (weighted_nb_used + weighted_nd_used + weighted_nc_used) / float(3) weighted_eq_2 = round(1 - weighted_impact, 3) # print "\t>>> biggest_cost", biggest_cost # print "\t>>> weight:", g.size(weight="weight") # print "\t>>> bridge weight:", weighted_bridges # print "\t>>> Quality [{}] Weighted-Min [{}] Weighted-Avg [{}] Weighted-eQ [{}]".format( # estimated_quality, weighted_eq[0], weighted_eq[1], weighted_eq_2) except nx.NetworkXError: weighted_impact = 1 weighted_eq_2 = 0 """"""""""""""""""""""""""""""""""""""" 4. PRINTING MATRIX COMPUTATIONS """"""""""""""""""""""""""""""""""""""" test = "[MIN: {}] | [AVERAGE: {}] | [COMPLETE: {}]".format( str(weighted_eq[0]), str(weighted_eq[1]), weighted_eq_2) # analysis_builder.write( # # "\nMETRICS READING: THE CLOSER TO ZERO, THE BETTER\n" # # "\n\tAverage Degree [{}] \nBridges [{}] normalised to [{}] {}\nDiameter [{}] normalised to [{}] {}" # # "\nClosure [{}/{}][{}] normalised to [{}]\n\n>>> Decision Support [{}] {} <<<". # # format(average_node_connectivity, bridges, normalised_bridge, nb_used, # # diameter, normalised_diameter, nd_used, # # edge_discovered, edge_derived, closure, normalised_closure, interpretation, estimated_quality)) # # ">>>\tESTIMATED QUALITY [{} | {}]\t<<<" # "\n\tBridges [{}] Diameter [{}] Closure [{}/{}] -> [{}]". # format(estimated_quality, test, nb_used, nd_used, edge_discovered, # edge_derived, normalised_closure)) analysis_builder.write("\t{:25} : [{}] | {}\t".format("ESTIMATED QUALITY", estimated_quality, test)) # if ratio == 1: # analysis_builder.write("\n\nDiagnose: VERY GOOD") # elif average_node_connectivity == 2 or bridges == 0: # analysis_builder.write("\n\nDiagnose: ACCEPTABLE") # elif bridges > 0: # analysis_builder.write("\n\nDiagnose : NEED BRIDGE INVESTIGATION") # AUTOMATED DECISION FOR WEIGHTED IMPACT RESULT # ********************************************* weighted_quality = {} # hyper_parameter_1 = 0.1 # hyper_parameter_2 = 0.25 # WEIGHT USING MINIMUM STRENGTH if 1 - weighted_eq[0] <= hyper_parameter_1: weighted_quality["min"] = "GOOD [{}]".format(weighted_eq[0]) else: weighted_quality["min"] = "BAD [{}]".format(weighted_eq[0]) # WEIGHT USING AVERAGE STRENGTH if 1 - weighted_eq[1] <= hyper_parameter_1: weighted_quality["avg"] = "GOOD [{}]".format(weighted_eq[1]) else: weighted_quality["avg"] = "BAD [{}]".format(weighted_eq[1]) # WEIGHT USING BRIDGE-CLOSURE-DIAMETER STRENGTH if 1 - weighted_eq_2 <= hyper_parameter_1: weighted_quality["bcd"] = "GOOD [{}]".format(weighted_eq_2) else: weighted_quality["bcd"] = "BAD [{}]".format(weighted_eq_2) # AUTOMATED DECISION FOR NON WEIGHTED IMPACT RESULT # ************************************************* if impact <= hyper_parameter_1: # analysis_builder.write("\n\nInterpretation: GOOD") auto_decision = "GOOD [{}]".format(estimated_quality) analysis_builder.write("\n{:25} : The network is a GOOD representation of a unique real world object".format( "INTERPRETATION")) elif (bridges == 0) and (diameter < 3): auto_decision = "ACCEPTABLE [{}]".format(estimated_quality) analysis_builder.write("\n{:25} : The network is an ACCEPTABLE representation of a unique real world object". format("INTERPRETATION")) elif ((impact > hyper_parameter_1) and (impact < hyper_parameter_2)) or (bridges == 0): # analysis_builder.write("\n\nInterpretation: UNCERTAIN") auto_decision = "UNCERTAIN [{}]".format(estimated_quality) analysis_builder.write("\n{:25} : We are UNCERTAIN whether the network represents a unique real world object". format("INTERPRETATION")) else: # analysis_builder.write("\n\nInterpretation: THE NETWORK IS NOT A GOOD REPRESENTATION OF A SINGLE RESOURCE") auto_decision = "BAD [{}]".format(estimated_quality) analysis_builder.write( "\n{:25} : The network is NOT A GOOD representation of a unique real world object".format("INTERPRETATION")) # DECISION SUPPORT EXPLAINING WHY A DECISION IS TAKEN # *************************************************** if bridges > 0: # analysis_builder.write("\n\nEvidence: NEED BRIDGE INVESTIGATION") analysis_builder.write(" BECAUSE it needs a bridge investigation") if diameter > 2: # analysis_builder.write("\n\nEvidence: TOO MANY INTERMEDIATES") adding_2 = "and " if bridges > 0 else "" adding_1 = "\n" if bridges > 0 else "" analysis_builder.write(" {}{:>36}BECAUSE it has too many intermediates".format(adding_1, adding_2)) if bridges == 0 and diameter <= 2: # analysis_builder.write("\n\nEvidence: LESS INTERMEDIATES AND NO BRIDGE") analysis_builder.write(" and BECAUSE there are less intermediate(s) and no bridge") analysis_builder.write("\n{:33} : Bridges [{}] Diameter [{}] Closure [{}/{} = {}]".format( "NON WEIGHTED NETWORK METRICS USED", nb_used, nd_used, edge_discovered, edge_derived, nc_used)) analysis_builder.write("\n{:33} : Bridges [{}] Diameter [{}] Closure [{}/{} = {}]" " impact: [{}] quality: [{}]". format("WEIGHTED NETWORK METRICS USED", weighted_nb_used, weighted_nd_used, round(weighted_edge_discovered, 3), edge_derived, weighted_nc_used, weighted_impact, 1 - weighted_impact)) return {'message': analysis_builder.getvalue(), 'decision': impact, 'AUTOMATED_DECISION': auto_decision, 'WEIGHTED_DECISION': weighted_quality}
def validationGraphGenerator(validationset, linksetStats, auto_prefixes, setGraph, set_id, created, isLinkset: bool): # THE LAST STATUS MUST ALWAYS HAVE A VALUE DO THAT IT DETERMINES THE LAST TRIPLE predicate_map = { "Motivation": VoidPlus.motivation_ttl, "Status": VoidPlus.has_validation_status_ttl } if isLinkset is False: auto_prefixes[Rsc.lens] = "lens" if validationset: validationset_graph = F"{Rsc.validationset_ttl(Grl.deterministicHash(validationset))}-{set_id}" writer = Buffer() # ADDING THE CLUSTER NAMESPACE # auto_prefixes[Rsc.validationset] = "validationset" # APPENDING ALL NAMESPACES writer.write( linksetNamespaces( auto_prefixes, # isValidated=validationset and len(validationset['items']) > 0, isValidated=True, isClustered=Vars.clusters in linksetStats and linksetStats[Vars.clusters] > 0 )) # VALIDATION METADATA writer.write(F'{header("LINK VALIDATION METADATA")}\n\n') writer.write(F"{validationset_graph}\n") writer.write(preVal('a', VoidPlus.Validationset_ttl)) writer.write(preVal(VoidPlus.hasTarget_ttl, setGraph)) if "creator" in validationset and len(validationset["creator"].strip()) > 0: writer.write(preVal(Sns.DCterms.creator_ttl, Literal(validationset["creator"]).n3())) if "publisher" in validationset and len(validationset["publisher"].strip()) > 0: writer.write(preVal(Sns.DCterms.publisher_ttl, Literal(validationset["publisher"]).n3())) # CREATED writer.write(preVal(Sns.DCterms.created_ttl, Literal(created, datatype=XSD.dateTi).n3(MANAGER))) # EXPORT TIMESTAMP writer.write(preVal(VoidPlus.exportDate_ttl, Grl.getXSDTimestamp())) # VALIDATION STATS # THE TOTAL AMOUNT OF LINKS ACCEPTED writer.write(F"\n{space}### VOID+ VALIDATION STATS\n") if Vars.accepted in linksetStats and linksetStats[Vars.accepted] > -1: writer.write(preVal(VoidPlus.accepted_ttl, Rsc.literal_resource(linksetStats[Vars.accepted]))) # THE TOTAL AMOUNT OF LINKS REJECTED if Vars.rejected in linksetStats and linksetStats[Vars.rejected] > -1: writer.write(preVal(VoidPlus.rejected_ttl, Rsc.literal_resource(linksetStats[Vars.rejected]))) # THE TOTAL AMOUNT OF LINKS WITH AN UNCERTAIN VALIDATION FLAG if Vars.not_sure in linksetStats and linksetStats[Vars.not_sure] > -1: writer.write(preVal(VoidPlus.uncertain_ttl, Rsc.literal_resource(linksetStats[Vars.not_sure]))) # THE TOTAL AMOUNT OF LINKS NOT VALIDATED if Vars.notValidated in linksetStats and linksetStats[Vars.notValidated] > -1: writer.write( preVal(VoidPlus.unchecked_ttl, Rsc.literal_resource(linksetStats[Vars.notValidated]))) writer.write("\n") writer.write(preVal(Sns.DCterms.description_ttl, Rsc.literal_resource(validate.generic_desc), end=True)) # VALIDATION TERMS writer.write(validate.terminology()) # VALIDATIONSET writer.write(F'{header("VALIDATIONSET")}\n\n') writer.write(F"{validationset_graph}\n{{") # VALIDATIONS for key, validation in validationset['items'].items(): # print(validation) writer.write(F'\n\t{Rsc.validation_ttl(key)}\n') writer.write(preVal('a', VoidPlus.Validation_ttl, position=2)) for index, (val_header, value) in enumerate(predicate_map.items()): end = True if index == len(predicate_map) - 1 else False curr_feature = predicate_map.get(val_header, None) if curr_feature: # aACCEPTED | REJECTED | NOT-VALIDATED : UNSURE | MIXED if curr_feature == VoidPlus.has_validation_status_ttl: writer.write(preVal(VoidPlus.has_validation_status_ttl, validate.get_resource[validation[val_header]], end=end, position=2)) elif validation[val_header]: writer.write(preVal(curr_feature, Literal(validation[val_header]).n3(MANAGER), end=end, position=2)) writer.write("}") # print(writer.getvalue()) return writer.getvalue()
def rdfStarLinkGenerator_fromCSV(link_predicate: str, result_batch, offset=0): errors = "" vars_size = 0 buffer = Buffer() vars_dic = defaultdict(int) for count, row in enumerate(result_batch): if True: # THE FIRST LINE IS ASSUMED TO BE THE HEADER if count > 0 and len(row) > 1: # GET THE SOURCE AND TARGET URIS src_data, trg_data = row[0], row[1] # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"{space}### LINK Nbr: {count + offset}\n" F"{space}<<<{src_data}> {link_predicate} <{trg_data}>>>\n" if len(vars_dic) > 0 else F"{space}<{src_data}> {link_predicate} <{trg_data}> .\n") # ANNOTATION OF THE LINK # ll_val:has-link-validation "not_validated" . for counter, (predicate, index) in enumerate(vars_dic.items()): end = ".\n" if counter == vars_size - 1 else ";" # APPENDING THE CLUSTER SIZE # if clusters and predicate == VoidPlus.cluster_ID_ttl and int(row[index]) in clusters: # buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}" # F"{Literal(clusters[int(row[index])]['size']).n3(MANAGER)} ;\n") # APPENDING THE VALIDATION FLAG # if predicate == VoidPlus.has_validation_flag_ttl: # triple_value = validate.get_resource[row[index]] # APPENDING THE VALIDATION FLAG RESOURCE if predicate == VoidPlus.has_validation_ttl: small = src_data if src_data < trg_data else trg_data big = trg_data if small == src_data else src_data key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) # buffer.write(F"{space * 2}{VoidPlus.has_validation_ttl:{Vars.PRED_SIZE}}{triple_value} {end}\n") # APPENDING THE CLUSTER ID AS A RESOURCE elif predicate == VoidPlus.cluster_ID_ttl: cluster_id = int(row[index]) triple_value = Rsc.cluster_ttl(cluster_id) # clusters[cluster_id]['item'].extend([src_data, trg_data]) # APPENDING ANYTHING ELSE else: triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \ if Grl.isDecimalLike(row[index]) \ else Literal(row[index]).n3(MANAGER) buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") # buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}" # F"{validate.get_resource[row[index]] if not Grl.isDecimalLike(row[index]) else round(float(row[index]), 5)} {end}\n") yield buffer.getvalue() clearBuffer(buffer) else: # THE CSV HEADER # Star at position for column in range(2, len(row)): if row[column] in CSV_HEADERS: vars_dic[CSV_HEADERS[row[column]]] = column vars_size += 1
def clusterGraphGenerator(clusters, stats, auto_prefixes, linksetGraph, created, linkset_id): node_count = 0 validated = 0 clusterset_graph = F"{Rsc.clusterset_ttl(Grl.deterministicHash(clusters))}-{linkset_id}" if clusters: # ADDING THE CLUSTER NAMESPACE # auto_prefixes[Rsc.clusterset] = "clusterset" writer = Buffer() predicate_map = { # SET OF NODES # "nodes": VoidPlus.size_ttl, "extended": VoidPlus.extended_ttl, "id": VoidPlus.intID_ttl, "hash_id": VoidPlus.hashID_ttl, # VALIDATIONS # "links": VoidPlus.links_ttl, "reconciled": VoidPlus.reconciled_ttl, "size": VoidPlus.size_ttl, "accepted": VoidPlus.accepted_ttl, "rejected": VoidPlus.rejected_ttl, "not_sure": VoidPlus.uncertain_ttl, "mixed": VoidPlus.contradictions_ttl, "not_validated": VoidPlus.unchecked_ttl, 'network_id': VoidPlus.network_ID_ttl } # APPENDING ALL NAMESPACES writer.write( linksetNamespaces( auto_prefixes, isClustered=clusters and len(clusters) > 0, isValidated=(Vars.notValidated in stats and stats[Vars.notValidated] < stats[Vars.triples]) is True )) # THE CLUSTER METADATA writer.write(F'{header("RESOURCE PARTITIONING METADATA")}\n\n') writer.write(F"{clusterset_graph}\n") writer.write(preVal('a', VoidPlus.Clusterset_ttl)) writer.write(preVal(VoidPlus.clusters_ttl, Literal(len(clusters)).n3(MANAGER))) writer.write(preVal(Sns.VoID.entities_ttl, "###NodeCounts")) writer.write(preVal(VoidPlus.validations_ttl, "###VALIDATED")) writer.write(preVal(VoidPlus.largestNodeCount_ttl, Rsc.literal_resource(stats['largest_size']))) writer.write(preVal(VoidPlus.largestLinkCount_ttl, Rsc.literal_resource(stats['largest_count']))) writer.write(preVal(VoidPlus.hasTarget_ttl, linksetGraph)) writer.write(preVal(VoidPlus.method_ttl, Algorithm.simple_clustering_ttl)) # EXPORT TIMESTAMP writer.write(preVal(VoidPlus.exportDate_ttl, Grl.getXSDTimestamp())) # CREATED TIMESTAMP writer.write(preVal(Sns.DCterms.created_ttl, Literal(created, datatype=XSD.dateTi).n3(MANAGER), end=True)) # DESCRIPTION OF THE CLUSTERING ALGORITHM writer.write(F'\n\n{Algorithm.simple_clustering_ttl}\n') writer.write(preVal('a', VoidPlus.ClusteringAlgorithm_ttl)) writer.write(preVal(Sns.DCterms.description_ttl, Literal(Algorithm.simple_clustering_short_description).n3(MANAGER))) writer.write(preVal(Sns.RDFS.seeAlso_ttl, Rsc.ga_resource_ttl("https://doi.org/10.3233/SW-200410"), end=True)) # THE PARTITION OF CO-REFERENT MATCHED RESOURCES writer.write(F'{header("ANNOTATED CO-REFERENT RESOURCES")}\n\n') writer.write(F"{clusterset_graph}\n{{\n") for cid, cluster_data in clusters.items(): # print(cluster_data.keys()) # exit() temp = Buffer() # A CLUSTER RESOURCE writer.write(F"\n\t{Rsc.cluster_ttl(cid)}\n") writer.write(preVal('a', VoidPlus.Cluster_ttl, position=2)) for feature, value in cluster_data.items(): # CLUSTERED RESOURCES if feature == 'nodes': if value: nodes = set(value) # temp.write(preVal(predicate_map[feature], Literal(len(nodes)).n3(MANAGER), position=2)) node_count += len(nodes) temp.write( preVal( VoidPlus.hasItem_ttl, F" ,\n{space*2}{' ' * Vars.PRED_SIZE}".join(Rsc.ga_resource_ttl(elt) for elt in nodes), position=2 ) ) # VALIDATION FLAGS elif feature == "links": if value and value['not_validated'] == 0: validated += 1 for flag, integer in value.items(): temp.write( preVal( predicate_map[flag], Literal(integer).n3(MANAGER), position=2 ) ) elif feature in ["values"]: pass # ABOUT THE CLUSTER'S SIZE, Extension, Reconciliation, intID else: temp.write(preVal(predicate_map[feature], Literal(value).n3(MANAGER), position=2)) writer.write(F"{temp.getvalue()[:-2]}.\n") # print(triples.getvalue()) result = writer.getvalue().replace('###NodeCounts', Literal(node_count).n3(MANAGER)) return F"{result.replace('###VALIDATED', Literal(validated).n3(MANAGER))}}}"
def standardLinkGenerator2(link_predicate: str, result_batch, namespace, clusters=None, offset=0): """ :param offset : an integer to increment the counting of tghe links :param link_predicate : a turtle representation of a URI (e.i: owl:sameAs). :param namespace : a dictionary for namespace :param result_batch : an iterable object with link results. :param clusters : a dictionary proving the size of the clusters links. :return : Yields a string as set of triples. """ errors = "" vars_size = 0 buffer = Buffer() vars_dic = defaultdict(int) for count, row in enumerate(result_batch): try: # THE FIRST LINE IS ASSUMED TO BE THE HEADER if count > 0 and len(row) > 1: # GET THE SOURCE AND TARGET URIS src_data, trg_data, predicate = uri2ttl(row[0], namespace)["short"], \ uri2ttl(row[1], namespace)["short"], \ uri2ttl(link_predicate, namespace)["short"] print(src_data) # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n" F"{space}{src_data} {Rsc.ga_resource_ttl(predicate)} {trg_data} .\n") # STANDARD REIFICATION link = F"{space}{src_data} {Rsc.ga_resource_ttl(predicate)} {trg_data} .\n" code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(link)}") buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" F"\n{space}{code}\n" F"{space}{preVal('a', 'rdf:Statement')}" F"{space}{preVal('rdf:predicate', predicate)}" F"{space}{preVal('rdf:subject', F'{src_data}')}" F"{space}{preVal('rdf:object', F'{trg_data}')}") # ANNOTATION OF THE LINK USING THE REIFIED CODE for counter, (predicate, index) in enumerate(vars_dic.items()): end = ".\n" if counter == vars_size - 1 else ";" # APPENDING THE CLUSTER SIZE if clusters and predicate == VoidPlus.cluster_ID_ttl and row[index] in clusters: buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}" F"{Literal(clusters[row[index]]).n3(MANAGER)} ;\n") # APPENDING THE VALIDATION FLAG if predicate == VoidPlus.has_validation_status_ttl: triple_value = validate.get_resource[row[index]] # APPENDING DING ANYTHING ELSE else: triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \ if Grl.isDecimalLike(row[index]) \ else Literal(row[index]).n3(MANAGER) buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer) else: # THE CSV HEADER # Star at position # MAPPING THE CSV HEADERS for column in range(2, len(row)): header = row if row[column] in CSV_HEADERS: vars_dic[CSV_HEADERS[row[column]]] = column vars_size += 1 except Exception as err: errors += F">>>> [ERROR FROM csv_2_linkset] {row}, {err}" print(errors)
def standardLinkGenerator(mappings: dict, link_predicate: str, result_batch, offset=0): """ :param mappings : dictionary of namespaces as keys and prefixes ad values. :param offset : an integer to increment the counting of tghe links :param link_predicate : a turtle representation of a URI (e.i: owl:sameAs). :param result_batch : an iterable object with link results. :param clusters : a dictionary proving the size of the clusters links. :return : Yields a string as set of triples. """ buffer = Buffer() errors = "" def ns_modification(uri): for ns in mappings: if uri.startswith(ns): uri = uri.replace(ns, F"{mappings[ns]}:") break if uri.__contains__("://"): uri = F"<{uri}>" return uri for count, link in enumerate(result_batch): if True: # GET THE SOURCE AND TARGET URIS # src_data, trg_data = link['source'], link['target'] src_data, trg_data = ns_modification(link['source']), ns_modification(link['target']) # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n" F"{space}{src_data} {Rsc.ga_resource_ttl(link_predicate)} {trg_data} .\n") # STANDARD REIFICATION reification = F"{space}{src_data} {Rsc.ga_resource_ttl(link_predicate)} {trg_data} .\n" code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(reification)}") buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" F"\n{space}{code}\n" F"{space}{preVal('a', 'rdf:Statement')}" F"{space}{preVal('rdf:predicate', link_predicate)}" F"{space}{preVal('rdf:subject', F'{src_data}')}" F"{space}{preVal('rdf:object', F'{trg_data}')}") # ANNOTATION OF THE LINK USING THE REIFIED CODE for counter, (feature, value) in enumerate(link.items()): end = ".\n" if counter == len(link) - 1 else ";" cur_predicate = JSON_HEADERS.get(feature, None) if cur_predicate: # APPENDING THE VALIDATION FLAG RESOURCE if cur_predicate == VoidPlus.has_validation_ttl: small = link['source'] if link['source'] < link['target'] else link['target'] big = link['target'] if small == link['source'] else link['source'] # print(F"{small} {big} {link_predicate}") key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) if key is not None else key # APPENDING THE CLUSTER ID AS A RESOURCE elif cur_predicate == VoidPlus.cluster_ID_ttl: triple_value = Rsc.cluster_ttl(value) if value is not None else value # triple_value = None elif cur_predicate == VoidPlus.network_ID_ttl: print("++++++++++++++++++>>>>>>>>>>") triple_value = Literal(value).n3(MANAGER) if value is not None else value # APPENDING ANYTHING ELSE else: if cur_predicate == VoidPlus.cluster_Int_ID_ttl: triple_value = None elif value is not None: triple_value = Literal(round(float(value), 5)).n3(MANAGER) \ if Grl.isDecimalLike(value) \ else Literal(value).n3(MANAGER) else: triple_value = None if triple_value is not None: buffer.write(F"{space * 2}{cur_predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer)
def rdfStarLinkGenerator(mappings: dict, link_predicate: str, result_batch, offset=0): errors = "" buffer = Buffer() def ns_modification(uri): for ns in mappings: if uri.startswith(ns): uri = uri.replace(ns, F"{mappings[ns]}:") break if uri.__contains__("://"): uri = F"<{uri}>" return uri for count, link in enumerate(result_batch): try: # GET THE SOURCE AND TARGET URIS src_data, trg_data = ns_modification(link['source']), ns_modification(link['target']) # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"{space}### LINK Nbr: {count + offset}\n" F"{space}<<{src_data} {link_predicate} {trg_data}>>\n") # ANNOTATION OF THE LINK # ll_val:has-link-validation "not_validated" . for counter, (feature, value) in enumerate(link.items()): end = ".\n" if counter == len(link) - 1 else ";" current_property = JSON_HEADERS.get(feature, None) if current_property: # APPENDING THE VALIDATION FLAG RESOURCE if current_property == VoidPlus.has_validation_ttl: small = link['source'] if link['source'] < link['target'] else link['target'] big = link['target'] if small == link['source'] else link['source'] key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) if key is not None else key # NOT APPENDING THE CLUSTER INT ID elif current_property == VoidPlus.cluster_ID_ttl: triple_value = Rsc.cluster_ttl(value) if value is not None else value # APPENDING ANYTHING ELSE else: if current_property == VoidPlus.cluster_Int_ID_ttl: triple_value = None elif value is not None: triple_value = Literal(round(float(value), 5)).n3(MANAGER) \ if Grl.isDecimalLike(value) \ else Literal(value).n3(MANAGER) else: triple_value = value if triple_value is not None: buffer.write(F"{space * 2}{current_property:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer) except Exception as err: errors += F">>>> [ERROR FROM AnnotatedLinkset_Generic/rdfStarLinkGenerator] {link}, {err}"
def standardLinkGenerator_fromCSV(link_predicate: str, result_batch, offset=0): """ :param offset : an integer to increment the counting of tghe links :param link_predicate : a turtle representation of a URI (e.i: owl:sameAs). :param result_batch : an iterable object with link results. :param clusters : a dictionary proving the size of the clusters links. :return : Yields a string as set of triples. """ errors = "" vars_size = 0 buffer = Buffer() vars_dic = defaultdict(int) # print(clusters) for count, row in enumerate(result_batch): try: # THE FIRST LINE IS ASSUMED TO BE THE HEADER if count > 0 and len(row) > 1: # GET THE SOURCE AND TARGET URIS src_data, trg_data = row[0], row[1] # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n" F"{space}<{src_data}> {Rsc.ga_resource_ttl(link_predicate)} <{trg_data}> .\n") # STANDARD REIFICATION link = F"{space}{src_data} {Rsc.ga_resource_ttl(link_predicate)} {trg_data} .\n" code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(link)}") buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" F"\n{space}{code}\n" F"{space}{preVal('a', 'rdf:Statement')}" F"{space}{preVal('rdf:predicate', link_predicate)}" F"{space}{preVal('rdf:subject', F'<{src_data}>')}" F"{space}{preVal('rdf:object', F'<{trg_data}>')}") # ANNOTATION OF THE LINK USING THE REIFIED CODE for counter, (predicate, index) in enumerate(vars_dic.items()): end = ".\n" if counter == vars_size - 1 else ";" # APPENDING THE CLUSTER SIZE # if clusters and predicate == VoidPlus.cluster_ID_ttl and int(row[index]) in clusters: # buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}" # F"{Literal(clusters[int(row[index])]['size']).n3(MANAGER)} ;\n") # APPENDING THE VALIDATION FLAG # if predicate == VoidPlus.has_validation_flag_ttl: # triple_value = validate.get_resource[row[index]] # APPENDING THE VALIDATION FLAG RESOURCE if predicate == VoidPlus.has_validation_ttl: small = src_data if src_data < trg_data else trg_data big = trg_data if small == src_data else src_data key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) # buffer.write(F"{space * 2}{VoidPlus.has_validation_ttl:{Vars.PRED_SIZE}}{triple_value} {end}\n") # APPENDING THE CLUSTER ID AS A RESOURCE elif predicate == VoidPlus.cluster_ID_ttl: cluster_id = int(row[index]) triple_value = Rsc.cluster_ttl(cluster_id) # clusters[cluster_id]['item'].extend([src_data, trg_data]) # APPENDING ANYTHING ELSE else: triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \ if Grl.isDecimalLike(row[index]) \ else Literal(row[index]).n3(MANAGER) buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer) else: # THE CSV HEADER # Star at position # MAPPING THE CSV HEADERS row_header = row # print(header, len(header)) for column in range(2, len(row_header)): if row[column] in CSV_HEADERS: vars_dic[CSV_HEADERS[row_header[column]]] = column # print('--->', CSV_HEADERS[header[column]], header[column], column) vars_size += 1 except Exception as err: errors += F">>>> [ERROR FROM AnnotatedLinkset_Generic/standardLinkGenerator] \n\t{row} \n\t{err}" print(errors)