Ejemplo n.º 1
0
    def _cmp_dir_by_timestamp(self, dir_name_1, dir_name_2):
        dir_name_1 = dir_name_1.replace(self._file_tree.get_root_name(), '')
        dir_name_2 = dir_name_2.replace(self._file_tree.get_root_name(), '')
        if not dir_name_2:
            return False
        else:
            dir_name_1 = FileUtil.normalize_dir_name(dir_name=dir_name_1)
            dir_name_2 = FileUtil.normalize_dir_name(dir_name=dir_name_2)
            dir_name_1_split, dir_name_2_split = dir_name_1.split(
                '/')[:-1], dir_name_2.split('/')[:-1]
            if len(dir_name_1_split) > len(dir_name_2_split):
                return False

            dir_name_2 = FileUtil.normalize_dir_name('/'.join(
                dir_name_2_split[:len(dir_name_1_split)]))
            dir_name_1_timestamp = FileUtil.parse_dir_to_timestamp(
                dir_name=dir_name_1)
            dir_name_2_timestamp = FileUtil.parse_dir_to_timestamp(
                dir_name=dir_name_2)
            return dir_name_1_timestamp < dir_name_2_timestamp
Ejemplo n.º 2
0
    def initialize_from_dir(self, dir_name, force=False):
        def _recursive_initialize_from_dir(node, max_recursion):
            self.sys_log("Starting recursion of " + str(max_recursion) + '.')
            if max_recursion == 0:
                self.sys_log("Exhausted all recursions for dir [" + dir_name +
                             '].')
                self._logger.info("Exhausted all recursions for dir [" +
                                  dir_name + '].')
                return

            node_name = node.get_node_name()
            for child_node_name in sorted(
                    FileUtil.list_dirs_in_dir(dir_name=node_name),
                    reverse=from_scratch):
                if from_scratch and self._file_tree.get_num_nodes(
                ) >= self._max_capacity > 0:
                    self.sys_log("Reach the max number of node: " +
                                 str(self._max_capacity) + '.')
                    return

                newly_added_string = child_node_name.replace(node_name,
                                                             '').replace(
                                                                 '/', '')
                if not newly_added_string.isdigit():
                    continue

                if not from_scratch and self._cmp_dir_by_timestamp(
                        dir_name_1=child_node_name,
                        dir_name_2=self._get_latest_dir_internal()):
                    continue

                child_node = self._file_tree.find_node(
                    node_name=child_node_name)
                if not child_node:
                    child_node = OrderedNodeBase(node_name=child_node_name)
                    # The nodes are ordered from large to small. So if the tree is built scratch, since the directory
                    # is listed from large to small, SortOrder.ORDER is used. If it is incremental build, since the
                    # directory is listed from small to large, SortOrder.REVERSE is used.
                    order = SortOrder.ORDER if from_scratch else SortOrder.REVERSE
                    self._file_tree.add_node(parent_node=node,
                                             child_node=child_node,
                                             order=order)
                    self.sys_log("Adding new node [" + child_node_name +
                                 node.get_node_name() + '].')
                    self._logger.info("Adding new node [" + child_node_name +
                                      "] to parent node [" +
                                      node.get_node_name() + '].')

                    if not from_scratch:
                        self._file_tree.trim_tree(
                            max_capacity=self._max_capacity)

                _recursive_initialize_from_dir(node=child_node,
                                               max_recursion=max_recursion - 1)

        from_scratch = False
        dir_name = FileUtil.normalize_dir_name(dir_name=dir_name)
        FileUtil.create_dir_if_not_exist(dir_name=dir_name)
        if not self._file_tree or self.is_updated() or force:
            root_node = OrderedNodeBase(node_name=FileUtil.normalize_dir_name(
                dir_name=dir_name))
            self._file_tree = TreeBase(root=root_node,
                                       max_dict_size=self._max_capacity)
            from_scratch = True

        _recursive_initialize_from_dir(
            node=self._file_tree.get_root_node(),
            max_recursion=self.PARTITIONER_TYPE_TO_HEIGHT_MAP[
                self.PARTITIONER_TYPE])
Ejemplo n.º 3
0
 def test_normalize_path(self):
     dir_name = 'test/foo//'
     self.assertEqual(FileUtil.normalize_dir_name(dir_name=dir_name), 'test/foo/')
     file_name = 'test/foo.txt'
     self.assertEqual(FileUtil.normalize_file_name(file_name=file_name), 'test/foo.txt')