def test_find_paths(self): base_num = "15" base_entity = base_iri + "br/" + base_num base_prov = base_entity + "/prov/se/1" base_entity_prefix = base_iri + "br/012340" + base_num base_prov_prefix = base_iri + "br/012340" + base_num + "/prov/se/1" res = find_paths(base_entity, base_dir, base_iri, default_dir, dir_split_number, items_per_file) print(base_entity, res) self.assertEqual( res, (base_dir + "br" + sep + default_dir + sep + self.__get_dir(base_num), base_dir + "br" + sep + default_dir + sep + self.__get_file(base_num) + ".json")) res = find_paths(base_prov, base_dir, base_iri, default_dir, dir_split_number, items_per_file) print(base_prov, res) self.assertEqual( res, (base_dir + "br" + sep + default_dir + sep + self.__get_file(base_num) + sep + "prov", base_dir + "br" + sep + default_dir + sep + self.__get_file(base_num) + sep + "prov" + sep + "se.json")) res = find_paths(base_entity_prefix, base_dir, base_iri, default_dir, dir_split_number, items_per_file) print(base_entity_prefix, res) self.assertEqual( res, (base_dir + "br" + sep + "012340" + sep + self.__get_dir(base_num), base_dir + "br" + sep + "012340" + sep + self.__get_file(base_num) + ".json")) res = find_paths(base_prov_prefix, base_dir, base_iri, default_dir, dir_split_number, items_per_file) print(base_prov_prefix, res) self.assertEqual( res, (base_dir + "br" + sep + "012340" + sep + self.__get_file(base_num) + sep + "prov", base_dir + "br" + sep + "012340" + sep + self.__get_file(base_num) + sep + "prov" + sep + "se.json")) res = find_paths(base_iri + "prov/pa/1", base_dir, base_iri, default_dir, dir_split_number, items_per_file) print(base_iri + "prov/pa/1", res) self.assertEqual(res, (base_dir + "prov" + sep + "pa", base_dir + "prov" + sep + "pa" + sep + "1.json")) res = find_paths(base_iri, base_dir, base_iri, default_dir, dir_split_number, items_per_file) print(base_iri, res) self.assertEqual(res, (base_dir[:-1], base_dir + "index.json")) res = find_paths(base_iri + "br/", base_dir, base_iri, default_dir, dir_split_number, items_per_file) print(base_iri + "br/", res) self.assertEqual( res, (base_dir + "br", base_dir + "br" + sep + "index.json"))
def dir_and_file_paths(self, cur_g, base_dir, base_iri): cur_subject = set(cur_g.subjects(None, None)).pop() if self.nt or self.nq: is_json = False else: is_json = True return find_paths(str(cur_subject), base_dir, base_iri, self.default_dir, self.dir_split, self.n_file_item, is_json=is_json)
def _find_prov_info_path(self, prov_subject, short_name): if prov_subject is None: g_prov = self.base_iri + "prov/" prov_info_path = \ g_prov.replace(self.base_iri, self.info_dir.rsplit(os.sep, 2)[0] + os.sep) + short_name + ".txt" else: g_prov = str(prov_subject) + "/prov/" res_file_path = \ find_paths(str(prov_subject), self.info_dir, self.base_iri, self.default_dir, self.dir_split, self.n_file_item)[1][:-5] prov_info_path = res_file_path + os.sep + "prov" + os.sep + short_name + ".txt" return prov_info_path, g_prov
def _add_prov(self, short_name, prov_type, res, resp_agent, prov_subject=None): if prov_subject is None: g_prov = self.base_iri + "prov/" prov_info_path = g_prov.replace( self.base_iri, self.info_dir) + short_name + ".txt" else: g_prov = str(prov_subject) + "/prov/" res_file_path = \ find_paths(str(prov_subject), self.info_dir, self.base_iri, self.default_dir, self.dir_split, self.n_file_item)[1][:-5] prov_info_path = res_file_path + os.sep + "prov" + os.sep + short_name + ".txt" return self._add(g_prov, prov_type, res, resp_agent, None, None, prov_info_path, short_name, [] if prov_subject is None else [prov_subject])
def add_prov_triples_in_filesystem(self, res_iri, prov_entity_type=None): if self.base_dir is not None and self.base_iri is not None: cur_file_path = find_paths(res_iri, self.base_dir, self.base_iri, self.default_dir, self.dir_split, self.n_file_item)[1] if cur_file_path.endswith("index.json"): cur_path = cur_file_path.replace("index.json", "") + "prov" else: cur_path = cur_file_path[:-5] + os.sep + "prov" file_list = [] if os.path.isdir(cur_path): for cur_dir, cur_subdir, cur_files in os.walk(cur_path): for cur_file in cur_files: if (cur_file.endswith(".json") or cur_file.endswith(".ttl")) and \ (prov_entity_type is None or cur_file.startswith(prov_entity_type)): file_list += [cur_dir + os.sep + cur_file] for file_path in file_list: if file_path not in self.loaded: self.loaded.add(file_path) cur_g = self.storer.load(file_path, tmp_dir=self.tmp_dir)
br_counter = 0 write_to_disk(update_br, remove_id, full_info_dir, br_iri) # Initialize all the variables again br_iri = [] br_files = {} id_files = {} update_br = GraphSet(base_iri, context_path) remove_id = GraphSet(base_iri, context_path) if str( br ) not in br_done: # Check if it has not been considered, and if so process it br_counter += 1 print("\nAnalyse %s" % br) cur_dir, cur_file = find_paths(str(br), base_dir, base_iri, default_dir, dir_split_number, items_per_file) if cur_file not in br_files: g = s.load(cur_file, tmp_dir=temp_dir_for_rdf_loading) br_files[cur_file] = g cur_g = br_files[cur_file] has_identifier_statements = [] # For each identifier to remove in a certain br... for iden in to_remove[br]: print("Analyse %s" % iden) t = (br, GraphEntity.has_identifier, iden) if t in cur_g: # ... it specify the statement 'br has_identifier id' to be removed in br # (after this for block)...
def dir_and_file_paths(self, cur_g, base_dir, base_iri): cur_subject = set(cur_g.subjects(None, None)).pop() return find_paths( str(cur_subject), base_dir, base_iri, self.default_dir, self.dir_split, self.n_file_item)