def _all_gather(engine): scores = engine.state.metric_details["metric3"] engine.state.metric_details[ "metric3"] = evenly_divisible_all_gather(data=scores, concat=True) scores = engine.state.metric_details["metric4"] engine.state.metric_details[ "metric4"] = evenly_divisible_all_gather(data=scores, concat=True)
def _finalize(self, _engine: Engine) -> None: """ All gather classification results from ranks and save to CSV file. Args: _engine: Ignite Engine, unused argument. """ ws = idist.get_world_size() if self.save_rank >= ws: raise ValueError( "target save rank is greater than the distributed group size.") outputs = torch.stack(self._outputs, dim=0) filenames = self._filenames if ws > 1: outputs = evenly_divisible_all_gather(outputs, concat=True) filenames = string_list_all_gather(filenames) if len(filenames) == 0: meta_dict = None else: if len(filenames) != len(outputs): warnings.warn( f"filenames length: {len(filenames)} doesn't match outputs length: {len(outputs)}." ) meta_dict = {Key.FILENAME_OR_OBJ: filenames} # save to CSV file only in the expected rank if idist.get_rank() == self.save_rank: saver = self.saver or CSVSaver(output_dir=self.output_dir, filename=self.filename, overwrite=self.overwrite, delimiter=self.delimiter) saver.save_batch(outputs, meta_dict) saver.finalize()
def _finalize(self, engine: Engine) -> None: """ All gather classification results from ranks and save to CSV file. Args: engine: Ignite Engine, it can be a trainer, validator or evaluator. """ ws = idist.get_world_size() if self.save_rank >= ws: raise ValueError( "target save rank is greater than the distributed group size." ) outputs = torch.stack(self._outputs, dim=0) filenames = self._filenames if ws > 1: outputs = evenly_divisible_all_gather(outputs, concat=True) filenames = string_list_all_gather(filenames) if len(filenames) == 0: meta_dict = None else: if len(filenames) != len(outputs): warnings.warn( f"filenames length: {len(filenames)} doesn't match outputs length: {len(outputs)}." ) meta_dict = {Key.FILENAME_OR_OBJ: filenames} # save to CSV file only in the expected rank if idist.get_rank() == self.save_rank: # print('Output:', type(outputs), len(outputs), type(outputs[0]), len(outputs[0])) # print('Labels:', type(self._labels), len(self._labels), type(self._labels[0]), len(self._labels[0])) # print('Meta:', type(meta_dict[Key.FILENAME_OR_OBJ]), len(meta_dict[Key.FILENAME_OR_OBJ])) self.saver.save_batch(outputs, self._labels, meta_dict) self.saver.finalize()
def _sync(self): """ All gather the buffers across distributed ranks for aggregating. Every buffer will be concatenated as a PyTorch Tensor. """ self._synced_tensors = [ evenly_divisible_all_gather(torch.cat(b, dim=0), concat=True) for b in self._buffers ] self._synced = True
def _run(self): if dist.get_rank() == 0: data1 = torch.tensor([[1, 2], [3, 4]]) data2 = torch.tensor([[1.0, 2.0]]) data3 = torch.tensor(7) if dist.get_rank() == 1: data1 = torch.tensor([[5, 6]]) data2 = torch.tensor([[3.0, 4.0], [5.0, 6.0]]) data3 = torch.tensor(8) result1 = evenly_divisible_all_gather(data=data1, concat=True) torch.testing.assert_allclose(result1, torch.tensor([[1, 2], [3, 4], [5, 6]])) result2 = evenly_divisible_all_gather(data=data2, concat=False) for r, e in zip(result2, [ torch.tensor([[1.0, 2.0]]), torch.tensor([[3.0, 4.0], [5.0, 6.0]]) ]): torch.testing.assert_allclose(r, e) result3 = evenly_divisible_all_gather(data=data3, concat=False) for r in result3: self.assertEqual(r.ndimension(), 0)
def _sync(self): """ All gather the buffers across distributed ranks for aggregating. Each buffer will be concatenated as a PyTorch Tensor. """ if self._synced or self._buffers is None: return try: self._synced_tensors = [ evenly_divisible_all_gather(torch.stack(b, dim=0), concat=True) for b in self._buffers ] except (RuntimeError, TypeError, ValueError) as e: raise TypeError(f"{e}. unable to sync buffer contents: {self._buffers}.") from e self._synced = True