def _cmp_dir_by_timestamp(self, dir_name_1, dir_name_2): dir_name_1 = dir_name_1.replace(self._file_tree.get_root_name(), '') dir_name_2 = dir_name_2.replace(self._file_tree.get_root_name(), '') if not dir_name_2: return False else: dir_name_1 = FileUtil.normalize_dir_name(dir_name=dir_name_1) dir_name_2 = FileUtil.normalize_dir_name(dir_name=dir_name_2) dir_name_1_split, dir_name_2_split = dir_name_1.split( '/')[:-1], dir_name_2.split('/')[:-1] if len(dir_name_1_split) > len(dir_name_2_split): return False dir_name_2 = FileUtil.normalize_dir_name('/'.join( dir_name_2_split[:len(dir_name_1_split)])) dir_name_1_timestamp = FileUtil.parse_dir_to_timestamp( dir_name=dir_name_1) dir_name_2_timestamp = FileUtil.parse_dir_to_timestamp( dir_name=dir_name_2) return dir_name_1_timestamp < dir_name_2_timestamp
def initialize_from_dir(self, dir_name, force=False): def _recursive_initialize_from_dir(node, max_recursion): self.sys_log("Starting recursion of " + str(max_recursion) + '.') if max_recursion == 0: self.sys_log("Exhausted all recursions for dir [" + dir_name + '].') self._logger.info("Exhausted all recursions for dir [" + dir_name + '].') return node_name = node.get_node_name() for child_node_name in sorted( FileUtil.list_dirs_in_dir(dir_name=node_name), reverse=from_scratch): if from_scratch and self._file_tree.get_num_nodes( ) >= self._max_capacity > 0: self.sys_log("Reach the max number of node: " + str(self._max_capacity) + '.') return newly_added_string = child_node_name.replace(node_name, '').replace( '/', '') if not newly_added_string.isdigit(): continue if not from_scratch and self._cmp_dir_by_timestamp( dir_name_1=child_node_name, dir_name_2=self._get_latest_dir_internal()): continue child_node = self._file_tree.find_node( node_name=child_node_name) if not child_node: child_node = OrderedNodeBase(node_name=child_node_name) # The nodes are ordered from large to small. So if the tree is built scratch, since the directory # is listed from large to small, SortOrder.ORDER is used. If it is incremental build, since the # directory is listed from small to large, SortOrder.REVERSE is used. order = SortOrder.ORDER if from_scratch else SortOrder.REVERSE self._file_tree.add_node(parent_node=node, child_node=child_node, order=order) self.sys_log("Adding new node [" + child_node_name + node.get_node_name() + '].') self._logger.info("Adding new node [" + child_node_name + "] to parent node [" + node.get_node_name() + '].') if not from_scratch: self._file_tree.trim_tree( max_capacity=self._max_capacity) _recursive_initialize_from_dir(node=child_node, max_recursion=max_recursion - 1) from_scratch = False dir_name = FileUtil.normalize_dir_name(dir_name=dir_name) FileUtil.create_dir_if_not_exist(dir_name=dir_name) if not self._file_tree or self.is_updated() or force: root_node = OrderedNodeBase(node_name=FileUtil.normalize_dir_name( dir_name=dir_name)) self._file_tree = TreeBase(root=root_node, max_dict_size=self._max_capacity) from_scratch = True _recursive_initialize_from_dir( node=self._file_tree.get_root_node(), max_recursion=self.PARTITIONER_TYPE_TO_HEIGHT_MAP[ self.PARTITIONER_TYPE])
def test_normalize_path(self): dir_name = 'test/foo//' self.assertEqual(FileUtil.normalize_dir_name(dir_name=dir_name), 'test/foo/') file_name = 'test/foo.txt' self.assertEqual(FileUtil.normalize_file_name(file_name=file_name), 'test/foo.txt')