def load_default_namespaces(directory): print headings( "LOADING DEFAULT NAMESPACES TO STARDOG [{}]".format(stardog_db)) if path.isdir(directory) is False: return "\n>>> [{}] IS NOT A DIRECTORY ".format(directory) f_path = path.join(directory, "namespace.bat" if Ut.is_windows() else "namespace.sh") # PLATFORM DEPENDENT CMD if Ut.is_windows(): cmd = namespaces.format("call ", Svr.settings[St.stardog_uri]) else: cmd = namespaces.format( stardog_bin, Svr.settings[St.stardog_uri], ) # EXECUTE THE CMD result = Ut.run_cdm(cmd, f_path, delete_after=True, output=False) # DISPLAY THE FINAL RETURN print "Finished with: {}".format(result)
def query_graph_metadata(graph): print headings("QUERYING STARDOG FOR THE GENERIC METADATA OF A GRAPH") print "{:12} : {}".format("INPUT GRAPH ", graph) graph = main_alignment(graph) print "{:12} : {}".format("MAIN GRAPH", graph) qry = std_queries["metadata"].format(graph) result = query(qry) # print result return result
def add_namespace(namespace, uri): print headings("ADDING A NAMESPACE") print "NAMESPACE LABEL:".format(namespace) print "NAMESPACE URIS :".format(uri) # PLATFORM DEPENDENT CMD try: cmd = "stardog namespace add {} --prefix {} --uri {}".format( Svr.settings[St.stardog_uri], namespace, uri) return subprocess.check_output(cmd, shell=True) except ValueError: traceback.print_exc()
def stardog_query_list(): """""" """""" """""" """""" """""" """""" """""" """""" """""" """ # QUERYING STARDOG FOR THE CURRENT LIST OF QUERIES """ """""" """""" """""" """""" """""" """""" """""" """""" """""" print headings("QUERYING STARDOG FOR THE CURRENT LIST OF QUERIES") try: cmd = stardog_cmds["query_list"].format(stardog_bin, stardog_address) remove = "{}".format(stardog_bin) print "{:12} : {}".format("STARDOG COMMAND", cmd.replace("\"", "").replace(remove, "")) return subprocess.check_output(cmd, shell=True) except Exception as err: return err
def stardog_query_kill(query_id): """""" """""" """""" """""" """""" """""" """""" """""" """""" """ # TERMINATING A SPECIFIC QUERY BASED ON ID """ """""" """""" """""" """""" """""" """""" """""" """""" """""" print headings("TERMINATING A SPECIFIC QUERY BASED ON ID") try: cmd = stardog_cmds["query_kill"].format(stardog_bin, stardog_address, query_id) remove = "{}".format(stardog_bin) print "{:12} : {}".format("STARDOG COMMAND", cmd.replace("\"", "").replace(remove, "")) return subprocess.check_output(cmd, shell=True) except Exception as err: return err
def query_generic(graph, limit=100): """""" """""" """""" """""" """"" # QUERRYING STARDOG """ """""" """""" """""" """""" "" print headings("GENERIC QUERY FOR STARDOG") try: cmd = std_queries["query_generic"].format(stardog_bin, stardog_uri, graph, limit) remove = "\"{}stardog\" query {} \"".format(stardog_bin, stardog_uri) print "{:12} : {}".format("QUERY", cmd[0:-1].replace(remove, "")) return subprocess.check_output(cmd, shell=True) except Exception as err: traceback.print_exc() return err
def stardog_data_add_folder(folder_path, named_graph=None, database=None, add=True, fies_format="trig", activated=False): if activated is False: message = "THE FUNCTION [stardog_data_add_folder] IS NOT ACTIVATED" print message return message if fies_format.strip() == "ttl" and named_graph is None: return "The named graph is required for loading your data." """""" """""" """""" """""" """""" """""" """""" """ # ADDING DATA TO STARDOG """ """""" """""" """""" """""" """""" """""" """""" print headings("ADDING DATA TO STARDOG FROM A FOLDER") if database is None: database = stardog_uri if named_graph is not None: graph = "-g {}".format(named_graph.strip()) else: graph = "" if folder_path.strip().endswith(path.sep) is False: folder_path = "{}{}".format(folder_path, path.sep) add_remove = "add" if add is True else "remove" try: cmd = stardog_cmds["data_add_folder"].format(stardog_bin, add_remove, database, graph, folder_path, fies_format) cmd = cmd.replace("\\", "/") remove = "{}".format(stardog_bin) print "{:12} : {}".format("STARDOG COMMAND", cmd.replace("\"", "").replace(remove, "")) result = subprocess.check_output(cmd, shell=True) print result return result except Exception as err: traceback.print_exc() return err
def query(query_search): """""" """""" """""" """""" """"" # QUERYING STARDOG """ """""" """""" """""" """""" "" print headings("QUERYING STARDOG") try: cmd = std_queries["query"].format(stardog_bin, stardog_uri, query_search) remove = "\"{}stardog\" query {} \"".format(stardog_bin, stardog_uri) print "{:12} : {}".format("QUERY", cmd[0:-1].replace(remove, "")) cmd = cmd.replace("\n", "") return subprocess.check_output(cmd, shell=True) except Exception as err: traceback.print_exc() return err
def stardog_query_status(query_id): """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """ # ASSESSING THE STATUS OF A SPECIFIC CURRENTLY RUNNING QUERY """ """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" """""" print headings( "ASSESSING THE STATUS OF A SPECIFIC CURRENTLY RUNNING QUERY") try: cmd = stardog_cmds["query_status"].format(stardog_bin, stardog_address, query_id) remove = "{}".format(stardog_bin) print "{:12} : {}".format("STARDOG COMMAND", cmd.replace("\"", "").replace(remove, "")) return subprocess.check_output(cmd, shell=True) except Exception as err: return err
def stardog_export_graph(file_path, graph, database=None): """""" """""" """""" """""" """""" """""" """""" """ # EXPORTING AN ENTIRE STARDOG GRAPH """ """""" """""" """""" """""" """""" """""" """""" print headings("EXPORTING AN ENTIRE STARDOG GRAPH") if database is None: database = stardog_uri try: cmd = stardog_cmds["export_graph"].format(stardog_bin, graph, database, file_path) remove = "{}".format(stardog_bin) print "{:12} : {}".format("STARDOG COMMAND", cmd.replace("\"", "").replace(remove, "")) return subprocess.check_output(cmd, shell=True) except Exception as err: return err
def stardog_data_add_file(file_path, graph=None, database=None, add=True, activated=False): if activated is False: message = "THE FUNCTION [stardog_data_add_folder] IS NOT ACTIVATED" print message return message """""" """""" """""" """""" """""" """""" """""" """ # EXPORTING AN ENTIRE STARDOG DATABASE """ """""" """""" """""" """""" """""" """""" """""" print headings("ADDING DATA TO STARDOG FROM A FILE") if database is None: database = stardog_uri if graph is not None: graph = "-g {}".format(graph.strip()) add_remove = "add" if add is True else "remove" try: cmd = stardog_cmds["data_add_file"].format(stardog_bin, add_remove, database, graph, file_path) cmd = cmd.replace("\\", "/") remove = "{}".format(stardog_bin) print "{:12} : {}".format("STARDOG COMMAND", cmd.replace("\"", "").replace(remove, "")) result = subprocess.check_output(cmd, shell=True) print result return result except Exception as err: traceback.print_exc() return err
def query_graph_search(search_exp): print headings("QUERYING STARDOG FOR A SPECIFIC GRAPH") qry = std_queries["graphs_search"].format(search_exp) return query(qry)
def query_graph_properties(graph): print headings("QUERYING STARDOG'S GRAPH PROPERTIES") qry = std_queries["graph_properties"].format(graph) return query(qry)
def query_graphs(): print headings("QUERYING STARDOG'S NAMED GRAPHS") qry = std_queries["graph_all"] return query(qry)
def __init__(self, database, is_trig, file_to_convert, separator, entity_type, rdftype=None, subject_id=None, embedded_uri=None, field_metadata=None, activated=False): entity_type = entity_type.strip().replace(" ", "_") database = database.strip().replace(" ", "_") if activated is False: print headings("The function [CSV init] has not been activated.") return specs = "\t{:18}: {}\n\t{:18}: {:18}\n\t{:18}: {}\n\t{:18}: [{}]\n\t{:18}: {}\n\t{:18}: {}".format( "DATABASE NAME", database, "ENTITY-TYPE", entity_type, "SUBJECT ID", subject_id, "COLUMN SEPARATOR", separator, "TRIG FORMAT", str(is_trig), "FILE", file_to_convert) print headings( "CONVERTING TO RDF WITH THE FOLLOWING SPECS\n{}".format(specs)) # embedded_uri is an array of dictionaries. # For each dictionary, We have: # ID:Integer <- of the column that needs to became a uri # reverse:Boolean <- to determine whether the URI needs to connect to the subject # namespace:string e.g. http://risis.eu/ <- the namespace to assign to the new URI # predicate:string It could be just the property (e.g. CityOf) or the URI (http://risis.eu/resource/CityOf) print "RDF TYPE LIST : {}".format(rdftype) print "SUBJECT ID : {}".format(subject_id) """ param database: name of the dataset param is_trig: A boolean value indicating the format of the RDF file that will be generated. param file_to_convert: Represents the oath of the CSV file that that is to be converted. param separator: A character specifying the character used for value separation. param subject_id: The index of the column identified to be used as the subject in the RDF file. param entity_type: The name of the entity type starting by a capital character. """ self.embedded_uri = embedded_uri self.fieldMetadata = field_metadata if subject_id is None: self.no_id(database, is_trig, file_to_convert, separator, entity_type, rdftype, activated=activated) return bom = '' _file = "" self.errorCount = 0 self.rdftype = rdftype self.subjectID = subject_id # -> int The index of the attribute to use as identification self.inputPath = file_to_convert # -> string The out file path self.pvFormat = u"" # -> string Representing RDF triple format to use for formatting Predicate_Value self.risisOntNeutral = "riClass:Neutral" # Prefix for Neutrality self.lastColumn = 0 # -> int The last attribute index self.longestHeader = 0 # -> int The number of characters in the longest attribute # self.data_prefix = entity_type.lower().replace(' ', '_') self.data_prefix = "resource" if entity_type is None else entity_type.strip( ).lower().replace(' ', '_') '''Replace unwanted characters -> #;:.-(–)—[']`=’/”{“}^@*+!~\,%''' self.pattern = '[?&#;:%!~+`=’*.(\-)–\\—@\[' ',\\]`{^}“/”]' try: # Open the file to convert # _file = codecs.open(self.inputPath, 'rb', encoding="utf-8") _file = open(self.inputPath, 'rb') except Exception as exception: print "\n", exception exit(1) """ About BYTE ORDER MARK (BOM) """ self.first_line = _file.readline().strip() if self.first_line.startswith(to_bytes(codecs.BOM_UTF8)): for i in range(len(to_bytes(codecs.BOM_UTF8))): bom += self.first_line[i] self.first_line = self.first_line.replace(bom, '') print u"[" + os.path.basename( self.inputPath) + u"]", u"contains BOM." # get the first line # self.first_line = self.first_line.strip(u'\r\n') print "\n\tTHIS IS THE HEADER STRING : ", self.first_line # Get the attribute headers # -> Array about the list of attributes in the csv file self.csvHeader = self.extractor(self.first_line, separator) self.csvHeaderLabel = self.extractor(self.first_line, separator) print "\tTHIS IS THE HEADER LIST : ", self.csvHeader print "\tTHE HEADER LIST IS OF SIZE : ", len(self.csvHeader) """ 2. Get the last column ID. This allows to stop the loop before the end whenever the identification column happens to be the last column""" self.lastColumn = len(self.csvHeader) - 1 # This is no longer the case because we now keep the column used as reference # if self.subjectID == self.lastColumn: # self.lastColumn -= 1 """ 3. Get the attribute headers and make them URI ready""" for i in range(0, len(self.csvHeader)): self.csvHeader[i] = self.csvHeader[i].replace(' ', '_') self.csvHeader[i] = re.sub(self.pattern, u"", self.csvHeader[i].replace('&', "_and_")) '''For every attribute composed of more than 1 word and separated by space, stat the first word with lower case followed by the underscore character''' # print self.csvHeader new_header = "" header_split = self.csvHeader[i].split() if header_split is not None and len(header_split) > 0: new_header = header_split[0].lower() for j in range(1, len(header_split)): new_header += u"_" + header_split[j] self.csvHeader[i] = new_header # print header_split '''Get the size (number of characters) of the longest attribute''' if self.longestHeader < len(self.csvHeader[i]): self.longestHeader = len(self.csvHeader[i]) """ 4. Set the RDF triple formatter """ sub_position = 6 # vocab: takes 6 slots pre_position = sub_position + self.longestHeader self.pvFormat = u"{0:>" + u"{0}".format( str(sub_position)) + u"} {1:" + u"{0}".format( str(pre_position)) + u"} {2}" # GENERATING THE RDF SCHEMA schema = self.get_schema(entity_type=entity_type, field_metadata=self.fieldMetadata) # print schema RDF.__init__(self, input_path=self.inputPath, database=database, entity_type=entity_type, is_trig=is_trig, namespace=self.get_namespace(database), schema=schema) n = 0 """ Opening the named-graph """ self.open_trig(dataset_prefix) """ Writing the rdf instances of the dataset """ while True: n += 1 line = to_unicode(_file.readline()) if not line: # WRITE THE BAT FILE print "\n\nFILE LOCATION: {}", self.dirName self.bat_file = bat(self.dirName, self.database) """ Closing the named-graph by closing the turtle writer. CAN POSSIBLY THROUGH AND EXCEPTION BY RDFLIB AFTER CHECKING THE FILE """ if self.isClosed is not True: self.close_writer() print '\nNo more line... Process ended at line > ' + str(n) print 'Done with converting [' + file_to_convert + '] to RDF!!!' _file.close() break # if n <= 5: # print line # pass # if n <= 72: # """ Proceed with the conversion """ # self.write_triples(to_unicode(line), separator, embedded_uri, self.fieldMetadata) buffered = "" while True: print >> sys.stderr, '\r', "\tCURRENT LINE: {}".format(n), record = line if not record: break if buffered != "": div = CSV.extractor(record, separator, content_delimiter='"') if len(div) != len(self.csvHeader): record = u"{}{}".format(buffered, record) div = CSV.extractor(record, separator, content_delimiter='"') if len(div) < len(self.csvHeader): buffered = u"{}".format(record) print u">>> Buffered: {}".format(buffered) elif len(div) == len(self.csvHeader): # print "\nLINE: {}".format(record.rstrip()) # print "SIZE: {}".format(len(div)) buffered = "" # for item in div: # print "{}".format(item) """ Proceed with the conversion """ self.write_triples(div, embedded_uri, self.fieldMetadata) break elif len(div) > len(self.csvHeader): print "\nERROR!!!!" # REPORT ERROR IN THE CHARACTER SEPARATED VALUE FORMAT if len(div) != len(self.csvHeader): self.errorCount += 1 print "{:5} Record encoding error. Header: {} columns while Record: {} columns".format( self.errorCount, len(self.csvHeader), len(div)) print "\t\t{:8}".format(div) # print line # PRINTING ITEMS for i in range(0, len(div)): print b"\t\t{} - {}".format( i + 1, to_bytes(div[i])) break line = to_unicode(_file.readline()) print "\n"
def union(specs, activated=False): if activated is False: # logger.warning("THE FUNCTION IS NOT ACTIVATED") print ("THE FUNCTION IS NOT ACTIVATED") return {St.message: "THE FUNCTION IS NOT ACTIVATED.", St.error_code: 1, St.result: None} # print "\nEXECUTING UNION SPECS" \ # "\n======================================================" \ # "========================================================" print headings("EXECUTING UNION SPECS...") """ THE generate_lens_name FUNCTION RETURNS THE NAME OF THE UNION AND A QUERY THAT ALLOWS TO ASk WHETHER THE LENS TO BE CREATED EXIST BY CHECKING WHETHER THERE EXISTS A LENS WITH THE SAME COMPOSITION IN TERMS GRAPHS USED FOR THE UNION """ # SET THE NAME OF THE UNION-LENS print "1. DATASETS:", len(specs[St.datasets]) for ds in specs[St.datasets]: print "\t- {}".format(ds) info = Lu.generate_lens_name(specs[St.datasets]) specs[St.lens] = "{}{}".format(Ns.lens, info["name"]) print "\n2. LENS: ", info["name"] # CHECK WHETHER THE LENS EXISTS check = run_checks(specs, info["query"]) if check[St.result] != "GOOD TO GO": if check[St.message].__contains__("ALREADY EXISTS"): Urq.register_lens(specs, is_created=False) return check # print "AFTER CHECK" # PREPARATION FOR THE CREATION OF THE LENS specs[St.lens_target_triples] = "" specs[St.expectedTriples] = 0 specs[St.insert_query] = "" lens = specs[St.lens] source = "{}{}".format(Ns.tmpgraph, "load00") message_2 = Ec.ERROR_CODE_8.replace("#", specs[St.lens]) count = -1 insert_ans = False try: # GO THROUGH THE LINKSETS/LENSES IN THE LENS # 1-SUM UP THE EXPECTED NUMBER OF TRIPLES # 2-GENERATE THE TRIPLES REPRESENTATION OF GHE GRAPHS COMPOSING THIS LENS # 3-GENERATE THE INSERT QUERY FOR MOVING BOTH LINKSET AND SINGLETON GRAPHS TO THE UNION GRAPH total_size = 0 # LOAD ALL GRAPHS IN LOAD00 specs[St.insert_query] += "DROP SILENT GRAPH <{}{}> ;\n".format(Ns.tmpgraph, "load00") # ITERATE THROUGH THE PROVIDED GRAPHS for linkset in specs[St.datasets]: # print "TARGET: ", linkset count += 1 # GET THE TOTAL NUMBER OF CORRESPONDENCE TRIPLES INSERTED curr_triples = Qry.get_triples(linkset) # PROBABLY THE LINKSET HAS NO SUCH PROPERTY " void:triples ?triples ." if curr_triples is None: curr_triples = Qry.get_triples_count(linkset) total_size += int(curr_triples) print "{} Contains {} triples".format(linkset, curr_triples) if curr_triples is not None: specs[St.expectedTriples] += int(curr_triples) else: # THE IS A PROBLEM WITH THE GRAPH FOR SEVERAL POSSIBLE REASONS return {St.message: message_2.replace("\n", "<br/>"), St.error_code: 1, St.result: None} # GENERATE TRIPLES OUT OF THE TARGETS specs[St.lens_target_triples] += "\n\t void:target <{}> ;".format(linkset) # GET THE INSERT QUERY # BOTH THE LINKSET AND THE SINGLETONS ARE MOVED TO A SINGLE GRAPH partial_query = Qry.q_copy_graph(source, source, linkset) if count == 0: specs[St.insert_query] += partial_query else: specs[St.insert_query] += " ;\n{}".format(partial_query) # INTERSECTION MANIPULATION OVER THE UNION (SOURCE) insert_query = union_insert_q(lens, source, specs[St.lens_name]) # print "manipulation:", manipulation specs[St.insert_query] += " ;\n{}".format(insert_query) # GENERATE THE LENS UNION if activated is True: # print specs[St.insert_query] insert_ans = Qry.boolean_endpoint_response(specs[St.insert_query]) specs[St.triples] = Qry.get_namedgraph_size(lens, isdistinct=False) if specs[St.triples] == "0": message = Ec.ERROR_CODE_9 print message # return None return {St.message: message.replace("\n", "<br/>"), St.error_code: 1, St.result: None} # CHECK WHETHER THE RESULT CONTAINS DUPLICATES contains_duplicated = Qry.contains_duplicates(lens) print "Contains Opposite Direction Duplicated:", contains_duplicated # IF IT DOES, REMOVE THE DUPLICATES if contains_duplicated is True: # logger.warning("THE LENS CONTAINS DUPLICATES.") print "THE LENS CONTAINS DUPLICATES." Qry.remove_duplicates(lens) # logger.warning("THE DUPLICATES ARE NOW REMOVED.") print "THE DUPLICATES ARE NOW REMOVED." print "Number of triples loaded : {}".format(total_size) specs[St.triples] = Qry.get_namedgraph_size(lens, isdistinct=False) print "\t>>> INSERTED: {}\n\t>>> INSERTED TRIPLES: {}".format(insert_ans, specs[St.triples]) print "Inserted : {}".format(specs[St.triples]) print "Removed : {}".format(total_size - int(specs[St.triples])) # LOAD THE METADATA # NOT GOOD AS THE LENS ALSO HAS A SINGLETON GRAPH # inserted_correspondences = int(Qry.get_union_triples(lens)) inserted_correspondences = int(specs[St.triples]) # print "inserted_correspondences:", inserted_correspondences specs[St.removedDuplicates] = specs[St.expectedTriples] - inserted_correspondences metadata = Gn.union_meta(specs) # print "METADATA:", metadata meta_ans = Qry.boolean_endpoint_response(metadata) print "\t>>> IS THE METADATA GENERATED AND INSERTED? {}".format(meta_ans) construct_response = Qry.get_constructed_graph(specs[St.lens]) if construct_response is not None: print "\t>>> WRITING TO FILE" construct_response = construct_response.replace('{', "<{}>\n{{".format(specs[St.lens]), 1) write_to_file( graph_name=specs[St.lens_name], metadata=None, correspondences=construct_response, directory=DIRECTORY) print "\tLens created as : ", specs[St.lens] # REGISTER THE LINKSET Urq.register_lens(specs, is_created=True) # return specs[St.lens] message = "THE LENS WAS CREATED as {}. " \ "With initially {} triples loaded, {} duplicated triples were found and removed.".\ format(specs[St.lens], total_size, total_size - int(specs[St.triples])) print "\t*** JOB DONE! ***" return {St.message: message, St.error_code: 0, St.result: specs[St.lens]} except Exception as err: # logger.warning(err) if insert_ans == "true": "DROP THE INSERTED UNION" drop_linkset(lens, activated=True) print "ERROR IN UNION LENS CREATION:", err return {St.message: ERROR_CODE_11, St.error_code: 11, St.result: None}