def test_full_pipeline_mask_project(self, tmpdir, data_test_dir): plan = self.create_calculation_plan2() file_pattern = os.path.join(data_test_dir, "*nucleus.seg") file_paths = glob(file_pattern) calc = Calculation( file_paths, base_prefix=data_test_dir, result_prefix=data_test_dir, measurement_file_path=os.path.join(tmpdir, "test2.xlsx"), sheet_name="Sheet1", calculation_plan=plan, voxel_size=(1, 1, 1), ) manager = CalculationManager() manager.set_number_of_workers(2) manager.add_calculation(calc) while manager.has_work: time.sleep(0.1) manager.get_results() if sys.platform == "darwin": time.sleep(2) else: time.sleep(0.4) manager.writer.finish() assert os.path.exists(os.path.join(tmpdir, "test2.xlsx")) df = pd.read_excel(os.path.join(tmpdir, "test2.xlsx"), index_col=0, header=[0, 1], engine=ENGINE) assert df.shape == (2, 4)
def test_full_pipeline_component_split(self, tmpdir, data_test_dir): plan = self.create_calculation_plan3() file_pattern = os.path.join(data_test_dir, "stack1_components", "stack1_component*[0-9].tif") file_paths = glob(file_pattern) calc = Calculation( file_paths, base_prefix=data_test_dir, result_prefix=data_test_dir, measurement_file_path=os.path.join(tmpdir, "test3.xlsx"), sheet_name="Sheet1", calculation_plan=plan, voxel_size=(1, 1, 1), ) manager = CalculationManager() manager.set_number_of_workers(2) manager.add_calculation(calc) while manager.has_work: time.sleep(0.1) res = manager.get_results() if res.errors: print(res.errors, file=sys.stderr) if sys.platform == "darwin": time.sleep(2) else: time.sleep(0.4) manager.writer.finish() assert os.path.exists(os.path.join(tmpdir, "test3.xlsx")) df = pd.read_excel(os.path.join(tmpdir, "test3.xlsx"), index_col=0, header=[0, 1], engine=ENGINE) assert df.shape == (8, 10) df2 = pd.read_excel(os.path.join(tmpdir, "test3.xlsx"), sheet_name=1, index_col=0, header=[0, 1], engine=ENGINE) assert df2.shape[0] > 8 assert df2.shape == ( df["Segmentation Components Number"]["count"].sum(), 6) df3 = pd.read_excel(os.path.join(tmpdir, "test3.xlsx"), sheet_name=2, index_col=0, header=[0, 1], engine=ENGINE) assert df3.shape == ( df["Segmentation Components Number"]["count"].sum(), 6) df4 = pd.read_excel(os.path.join(tmpdir, "test3.xlsx"), sheet_name=3, index_col=0, header=[0, 1], engine=ENGINE) assert df4.shape == ( df["Segmentation Components Number"]["count"].sum(), 8)
def get_data(self): res = { "file_list": self.file_list, "base_prefix": str(self.base_prefix.text()), "result_prefix": str(self.result_prefix.text()), "measurement_file_path": str(self.measurement_file_path_view.text()), "sheet_name": str(self.sheet_name.text()), "calculation_plan": self.calculation_plan, "voxel_size": self.voxel_size.get_values(), } return Calculation(**res)
def test_full_pipeline_error(self, tmp_path_factory, data_test_dir, monkeypatch): plan = self.create_calculation_plan() data_dir = tmp_path_factory.mktemp("data") file_pattern_copy = os.path.join(data_test_dir, "stack1_components", "stack1_component*.tif") file_paths = sorted(glob(file_pattern_copy)) for el in file_paths: shutil.copy(el, data_dir) shutil.copy(data_dir / "stack1_component1.tif", data_dir / "stack1_component10.tif") file_pattern = os.path.join(data_dir, "stack1_component*[0-9].tif") file_paths = sorted(glob(file_pattern)) result_dir = tmp_path_factory.mktemp("result") assert os.path.basename(file_paths[0]) == "stack1_component1.tif" calc = Calculation( file_paths, base_prefix=str(data_dir), result_prefix=str(data_dir), measurement_file_path=os.path.join(result_dir, "test.xlsx"), sheet_name="Sheet1", calculation_plan=plan, voxel_size=(1, 1, 1), ) manager = CalculationManager() manager.set_number_of_workers(3) manager.add_calculation(calc) while manager.has_work: time.sleep(0.1) manager.get_results() manager.writer.finish() if sys.platform == "darwin": time.sleep(2) else: time.sleep(0.4) assert os.path.exists(os.path.join(result_dir, "test.xlsx")) df = pd.read_excel(os.path.join(result_dir, "test.xlsx"), index_col=0, header=[0, 1], engine=ENGINE) assert df.shape == (8, 4) for i in range(8): assert os.path.basename( df.name.units[i]) == f"stack1_component{i + 1}.tif" df2 = pd.read_excel(os.path.join(result_dir, "test.xlsx"), sheet_name="Errors", index_col=0, engine=ENGINE) assert df2.shape == (1, 2) str(df2.loc[0]["error description"]).startswith("[Errno 2]")
def add_calculation(self, calculation: Calculation): """ :param calculation: Calculation """ self.calculation_dict[calculation.uuid] = calculation self.counter_dict[calculation.uuid] = 0 size = len(calculation.file_list) self.calculation_sizes.append(size) self.calculation_size += size self.batch_manager.add_work(list(enumerate(calculation.file_list)), calculation.get_base_calculation(), do_calculation) self.writer.add_data_part(calculation)
def test_do_calculation_calculation_process(self, tmpdir, data_test_dir): plan = self.create_calculation_plan3() file_path = os.path.join(data_test_dir, "stack1_components", "stack1_component1.tif") calc = Calculation( [file_path], base_prefix=data_test_dir, result_prefix=data_test_dir, measurement_file_path=os.path.join(tmpdir, "test3.xlsx"), sheet_name="Sheet1", calculation_plan=plan, voxel_size=(1, 1, 1), ) calc_process = CalculationProcess() res = calc_process.do_calculation(FileCalculation(file_path, calc)) assert isinstance(res, list) assert isinstance(res[0], ResponseData)
def test_full_pipeline(self, tmpdir, data_test_dir, monkeypatch): monkeypatch.setattr(batch_backend, "CalculationProcess", MockCalculationProcess) plan = self.create_calculation_plan() file_pattern = os.path.join(data_test_dir, "stack1_components", "stack1_component*[0-9].tif") file_paths = sorted(glob(file_pattern)) assert os.path.basename(file_paths[0]) == "stack1_component1.tif" calc = Calculation( file_paths, base_prefix=data_test_dir, result_prefix=data_test_dir, measurement_file_path=os.path.join(tmpdir, "test.xlsx"), sheet_name="Sheet1", calculation_plan=plan, voxel_size=(1, 1, 1), ) manager = CalculationManager() manager.set_number_of_workers(3) manager.add_calculation(calc) for _ in range(int(120 / 0.1)): manager.get_results() if manager.has_work: time.sleep(0.1) else: break else: manager.kill_jobs() pytest.fail("jobs hanged") manager.writer.finish() if sys.platform == "darwin": time.sleep(2) else: time.sleep(0.4) assert os.path.exists(os.path.join(tmpdir, "test.xlsx")) df = pd.read_excel(os.path.join(tmpdir, "test.xlsx"), index_col=0, header=[0, 1], engine=ENGINE) assert df.shape == (8, 4) for i in range(8): assert os.path.basename( df.name.units[i]) == f"stack1_component{i+1}.tif"
def test_do_calculation_save(self, tmpdir, data_test_dir, file_name, root_type, save_method: SaveBase): save_desc = Save("_test", "", save_method.get_name(), save_method.get_short_name(), save_method.get_default_values()) plan = self.create_simple_plan(root_type, save_desc) file_path = os.path.join(data_test_dir, file_name) calc = Calculation( [file_path], base_prefix=os.path.dirname(file_path), result_prefix=tmpdir, measurement_file_path=os.path.join(tmpdir, "test3.xlsx"), sheet_name="Sheet1", calculation_plan=plan, voxel_size=(1, 1, 1), ) calc_process = CalculationProcess() res = calc_process.do_calculation(FileCalculation(file_path, calc)) assert isinstance(res, list) assert isinstance(res[0], ResponseData)
def test_full_pipeline_base(self, tmpdir, data_test_dir, monkeypatch): monkeypatch.setattr(batch_backend, "CalculationProcess", MockCalculationProcess) plan = self.create_calculation_plan() file_pattern = os.path.join(data_test_dir, "stack1_components", "stack1_component*[0-9].tif") file_paths = sorted(glob(file_pattern)) assert os.path.basename(file_paths[0]) == "stack1_component1.tif" calc = Calculation( file_paths, base_prefix=data_test_dir, result_prefix=data_test_dir, measurement_file_path=os.path.join(tmpdir, "test.xlsx"), sheet_name="Sheet1", calculation_plan=plan, voxel_size=(1, 1, 1), ) calc_process = CalculationProcess() for file_path in file_paths: res = calc_process.do_calculation(FileCalculation(file_path, calc)) assert isinstance(res, list) assert isinstance(res[0], ResponseData)