def _load_data(self): """This function will load data once and ignore it if the status is loading.""" with self._status_mutex: if self.status == DataManagerStatus.LOADING.value: logger.debug( "Current status is %s , will ignore to load data.", self.status) return self.status = DataManagerStatus.LOADING.value with ComputingResourceManager( executors_cnt=1, max_processes_cnt=settings.MAX_PROCESSES_COUNT ) as computing_resource_mgr: with computing_resource_mgr.get_executor() as executor: self._brief_cache.update_cache(executor) brief_cache_update = time.time() for _ in self._detail_cache.update_cache(executor): update_interval = time.time() - brief_cache_update logger.debug( 'Loading one round of detail cache taking %ss.', update_interval) if update_interval > 3: # Use 3 seconds as threshold to avoid updating too often self._brief_cache.update_cache(executor) brief_cache_update += update_interval executor.wait_all_tasks_finish() with self._status_mutex: if not self._brief_cache.has_content( ) and not self._detail_cache.has_content(): self.status = DataManagerStatus.INVALID.value else: self.status = DataManagerStatus.DONE.value logger.info("Load brief data end, and loader pool size is %r.", self._detail_cache.loader_pool_size())
def load(self, executor=None): """ Load all log valid files. When the file is reloaded, it will continue to load from where it left off. Args: executor (Optional[executor]): The Executor instance. Returns: bool, True if the train job is finished loading. """ logger.debug("Start to load data in ms data loader.") if isinstance(executor, Executor): return self._load(executor) if executor is not None: raise TypeError( "'executor' should be an Executor instance or None.") with ComputingResourceManager() as mgr: with mgr.get_executor() as new_executor: while not self._load(new_executor): pass new_executor.wait_all_tasks_finish() return True
def _execute_load_data(self): """Load data through multiple threads.""" threads_count = self._get_threads_count() if not threads_count: logger.info( "Can not find any valid train log path to load, loader pool is empty." ) return logger.info("Start to execute load data. threads_count: %s.", threads_count) with ComputingResourceManager( executors_cnt=threads_count, max_processes_cnt=settings.MAX_PROCESSES_COUNT ) as computing_resource_mgr: with ThreadPoolExecutor(max_workers=threads_count) as executor: futures = [] loader_pool = self._get_snapshot_loader_pool() for loader_id in loader_pool: future = executor.submit(self._execute_loader, loader_id, computing_resource_mgr) futures.append(future) wait(futures, return_when=ALL_COMPLETED)
def test_load_with_not_file_list(self): """Test loading method with empty file list.""" loader = DataLoader(self._summary_dir) with pytest.raises(SummaryLogPathInvalid): loader.load(ComputingResourceManager(1, 1)) assert 'No valid files can be loaded' in str( MockLogger.log_msg['warning'])
def test_load_with_invalid_file_list(self): """Test loading method with valid path and invalid file_list.""" file_list = ['summary.abc01', 'summary.abc02'] self._generate_files(self._summary_dir, file_list) loader = DataLoader(self._summary_dir) with pytest.raises(SummaryLogPathInvalid): loader.load(ComputingResourceManager(1, 1)) assert 'No valid files can be loaded' in str( MockLogger.log_msg['warning'])
def test_load_with_crc_fail(self): """Test when crc_fail and will not go to func _event_parse.""" summary_dir = tempfile.mkdtemp() file2 = os.path.join(summary_dir, 'summary.02') write_file(file2, SCALAR_RECORD) ms_loader = MSDataLoader(summary_dir) ms_loader.load(ComputingResourceManager(1, 1)) shutil.rmtree(summary_dir) assert 'Check crc faild and ignore this file' in str( MockLogger.log_msg['warning'])
def test_load_success(self): """Test loading method with valid path and file_list.""" dir_path = tempfile.NamedTemporaryFile().name if not os.path.exists(dir_path): os.mkdir(dir_path) file_list = ['summary.001', 'summary.002'] self._generate_files(dir_path, file_list) dataloader = DataLoader(dir_path) dataloader.load(ComputingResourceManager(1, 1)) assert dataloader._loader is not None shutil.rmtree(dir_path)
def test_load_success_with_crc_pass(self): """Test load success.""" summary_dir = tempfile.mkdtemp() file1 = os.path.join(summary_dir, 'summary.01') write_file(file1, SCALAR_RECORD) ms_loader = MSDataLoader(summary_dir) ms_loader._latest_summary_filename = 'summary.00' ms_loader.load(ComputingResourceManager(1, 1)) shutil.rmtree(summary_dir) tag = ms_loader.get_events_data().list_tags_by_plugin('scalar') tensors = ms_loader.get_events_data().tensors(tag[0]) assert len(tensors) == 3
def test_load_single_pb_file(self): """Test load pb file success.""" filename = 'ms_output.pb' summary_dir = tempfile.mkdtemp() create_graph_pb_file(output_dir=summary_dir, filename=filename) ms_loader = MSDataLoader(summary_dir) ms_loader.load(ComputingResourceManager(1, 1)) events_data = ms_loader.get_events_data() plugins = events_data.list_tags_by_plugin(PluginNameEnum.GRAPH.value) shutil.rmtree(summary_dir) assert len(plugins) == 1 assert plugins[0] == filename