Exemple #1
0
    def test_find_paths(self):
        base_num = "15"
        base_entity = base_iri + "br/" + base_num
        base_prov = base_entity + "/prov/se/1"
        base_entity_prefix = base_iri + "br/012340" + base_num
        base_prov_prefix = base_iri + "br/012340" + base_num + "/prov/se/1"

        res = find_paths(base_entity, base_dir, base_iri, default_dir,
                         dir_split_number, items_per_file)
        print(base_entity, res)
        self.assertEqual(
            res, (base_dir + "br" + sep + default_dir + sep +
                  self.__get_dir(base_num), base_dir + "br" + sep +
                  default_dir + sep + self.__get_file(base_num) + ".json"))

        res = find_paths(base_prov, base_dir, base_iri, default_dir,
                         dir_split_number, items_per_file)
        print(base_prov, res)
        self.assertEqual(
            res, (base_dir + "br" + sep + default_dir + sep +
                  self.__get_file(base_num) + sep + "prov",
                  base_dir + "br" + sep + default_dir + sep +
                  self.__get_file(base_num) + sep + "prov" + sep + "se.json"))

        res = find_paths(base_entity_prefix, base_dir, base_iri, default_dir,
                         dir_split_number, items_per_file)
        print(base_entity_prefix, res)
        self.assertEqual(
            res,
            (base_dir + "br" + sep + "012340" + sep + self.__get_dir(base_num),
             base_dir + "br" + sep + "012340" + sep +
             self.__get_file(base_num) + ".json"))

        res = find_paths(base_prov_prefix, base_dir, base_iri, default_dir,
                         dir_split_number, items_per_file)
        print(base_prov_prefix, res)
        self.assertEqual(
            res, (base_dir + "br" + sep + "012340" + sep +
                  self.__get_file(base_num) + sep + "prov",
                  base_dir + "br" + sep + "012340" + sep +
                  self.__get_file(base_num) + sep + "prov" + sep + "se.json"))

        res = find_paths(base_iri + "prov/pa/1", base_dir, base_iri,
                         default_dir, dir_split_number, items_per_file)
        print(base_iri + "prov/pa/1", res)
        self.assertEqual(res,
                         (base_dir + "prov" + sep + "pa",
                          base_dir + "prov" + sep + "pa" + sep + "1.json"))

        res = find_paths(base_iri, base_dir, base_iri, default_dir,
                         dir_split_number, items_per_file)
        print(base_iri, res)
        self.assertEqual(res, (base_dir[:-1], base_dir + "index.json"))

        res = find_paths(base_iri + "br/", base_dir, base_iri, default_dir,
                         dir_split_number, items_per_file)
        print(base_iri + "br/", res)
        self.assertEqual(
            res, (base_dir + "br", base_dir + "br" + sep + "index.json"))
Exemple #2
0
 def dir_and_file_paths(self, cur_g, base_dir, base_iri):
     cur_subject = set(cur_g.subjects(None, None)).pop()
     if self.nt or self.nq:
         is_json = False
     else:
         is_json = True
     return find_paths(str(cur_subject),
                       base_dir,
                       base_iri,
                       self.default_dir,
                       self.dir_split,
                       self.n_file_item,
                       is_json=is_json)
Exemple #3
0
    def _find_prov_info_path(self, prov_subject, short_name):
        if prov_subject is None:
            g_prov = self.base_iri + "prov/"

            prov_info_path = \
                g_prov.replace(self.base_iri, self.info_dir.rsplit(os.sep, 2)[0] + os.sep) + short_name + ".txt"
        else:
            g_prov = str(prov_subject) + "/prov/"

            res_file_path = \
                find_paths(str(prov_subject), self.info_dir, self.base_iri, self.default_dir,
                           self.dir_split, self.n_file_item)[1][:-5]

            prov_info_path = res_file_path + os.sep + "prov" + os.sep + short_name + ".txt"

        return prov_info_path, g_prov
Exemple #4
0
    def _add_prov(self,
                  short_name,
                  prov_type,
                  res,
                  resp_agent,
                  prov_subject=None):
        if prov_subject is None:
            g_prov = self.base_iri + "prov/"

            prov_info_path = g_prov.replace(
                self.base_iri, self.info_dir) + short_name + ".txt"
        else:
            g_prov = str(prov_subject) + "/prov/"

            res_file_path = \
                find_paths(str(prov_subject), self.info_dir, self.base_iri, self.default_dir,
                           self.dir_split, self.n_file_item)[1][:-5]
            prov_info_path = res_file_path + os.sep + "prov" + os.sep + short_name + ".txt"
        return self._add(g_prov, prov_type, res, resp_agent, None, None,
                         prov_info_path, short_name,
                         [] if prov_subject is None else [prov_subject])
Exemple #5
0
    def add_prov_triples_in_filesystem(self, res_iri, prov_entity_type=None):
        if self.base_dir is not None and self.base_iri is not None:
            cur_file_path = find_paths(res_iri, self.base_dir, self.base_iri,
                                       self.default_dir, self.dir_split,
                                       self.n_file_item)[1]
            if cur_file_path.endswith("index.json"):
                cur_path = cur_file_path.replace("index.json", "") + "prov"
            else:
                cur_path = cur_file_path[:-5] + os.sep + "prov"

            file_list = []
            if os.path.isdir(cur_path):
                for cur_dir, cur_subdir, cur_files in os.walk(cur_path):
                    for cur_file in cur_files:
                        if (cur_file.endswith(".json") or cur_file.endswith(".ttl")) and \
                                (prov_entity_type is None or cur_file.startswith(prov_entity_type)):
                            file_list += [cur_dir + os.sep + cur_file]

            for file_path in file_list:
                if file_path not in self.loaded:
                    self.loaded.add(file_path)
                    cur_g = self.storer.load(file_path, tmp_dir=self.tmp_dir)
Exemple #6
0
                    br_counter = 0
                    write_to_disk(update_br, remove_id, full_info_dir, br_iri)
                    # Initialize all the variables again
                    br_iri = []
                    br_files = {}
                    id_files = {}
                    update_br = GraphSet(base_iri, context_path)
                    remove_id = GraphSet(base_iri, context_path)

                if str(
                        br
                ) not in br_done:  # Check if it has not been considered, and if so process it
                    br_counter += 1
                    print("\nAnalyse %s" % br)
                    cur_dir, cur_file = find_paths(str(br), base_dir, base_iri,
                                                   default_dir,
                                                   dir_split_number,
                                                   items_per_file)
                    if cur_file not in br_files:
                        g = s.load(cur_file, tmp_dir=temp_dir_for_rdf_loading)
                        br_files[cur_file] = g

                    cur_g = br_files[cur_file]
                    has_identifier_statements = []

                    # For each identifier to remove in a certain br...
                    for iden in to_remove[br]:
                        print("Analyse %s" % iden)
                        t = (br, GraphEntity.has_identifier, iden)
                        if t in cur_g:
                            # ... it specify the statement 'br has_identifier id' to be removed in br
                            # (after this for block)...
Exemple #7
0
 def dir_and_file_paths(self, cur_g, base_dir, base_iri):
     cur_subject = set(cur_g.subjects(None, None)).pop()
     return find_paths(
         str(cur_subject), base_dir, base_iri, self.default_dir, self.dir_split, self.n_file_item)