def _all_gather(engine):
     scores = engine.state.metric_details["metric3"]
     engine.state.metric_details[
         "metric3"] = evenly_divisible_all_gather(data=scores,
                                                  concat=True)
     scores = engine.state.metric_details["metric4"]
     engine.state.metric_details[
         "metric4"] = evenly_divisible_all_gather(data=scores,
                                                  concat=True)
Ejemplo n.º 2
0
    def _finalize(self, _engine: Engine) -> None:
        """
        All gather classification results from ranks and save to CSV file.

        Args:
            _engine: Ignite Engine, unused argument.
        """
        ws = idist.get_world_size()
        if self.save_rank >= ws:
            raise ValueError(
                "target save rank is greater than the distributed group size.")

        outputs = torch.stack(self._outputs, dim=0)
        filenames = self._filenames
        if ws > 1:
            outputs = evenly_divisible_all_gather(outputs, concat=True)
            filenames = string_list_all_gather(filenames)

        if len(filenames) == 0:
            meta_dict = None
        else:
            if len(filenames) != len(outputs):
                warnings.warn(
                    f"filenames length: {len(filenames)} doesn't match outputs length: {len(outputs)}."
                )
            meta_dict = {Key.FILENAME_OR_OBJ: filenames}

        # save to CSV file only in the expected rank
        if idist.get_rank() == self.save_rank:
            saver = self.saver or CSVSaver(output_dir=self.output_dir,
                                           filename=self.filename,
                                           overwrite=self.overwrite,
                                           delimiter=self.delimiter)
            saver.save_batch(outputs, meta_dict)
            saver.finalize()
Ejemplo n.º 3
0
    def _finalize(self, engine: Engine) -> None:
        """
        All gather classification results from ranks and save to CSV file.

        Args:
            engine: Ignite Engine, it can be a trainer, validator or evaluator.
        """
        ws = idist.get_world_size()
        if self.save_rank >= ws:
            raise ValueError(
                "target save rank is greater than the distributed group size."
            )

        outputs = torch.stack(self._outputs, dim=0)
        filenames = self._filenames
        if ws > 1:
            outputs = evenly_divisible_all_gather(outputs, concat=True)
            filenames = string_list_all_gather(filenames)

        if len(filenames) == 0:
            meta_dict = None
        else:
            if len(filenames) != len(outputs):
                warnings.warn(
                    f"filenames length: {len(filenames)} doesn't match outputs length: {len(outputs)}."
                )
            meta_dict = {Key.FILENAME_OR_OBJ: filenames}

        # save to CSV file only in the expected rank
        if idist.get_rank() == self.save_rank:
            # print('Output:', type(outputs), len(outputs), type(outputs[0]), len(outputs[0]))
            # print('Labels:', type(self._labels), len(self._labels), type(self._labels[0]), len(self._labels[0]))
            # print('Meta:', type(meta_dict[Key.FILENAME_OR_OBJ]), len(meta_dict[Key.FILENAME_OR_OBJ]))
            self.saver.save_batch(outputs, self._labels, meta_dict)
            self.saver.finalize()
Ejemplo n.º 4
0
    def _sync(self):
        """
        All gather the buffers across distributed ranks for aggregating.
        Every buffer will be concatenated as a PyTorch Tensor.

        """
        self._synced_tensors = [
            evenly_divisible_all_gather(torch.cat(b, dim=0), concat=True)
            for b in self._buffers
        ]
        self._synced = True
Ejemplo n.º 5
0
    def _run(self):
        if dist.get_rank() == 0:
            data1 = torch.tensor([[1, 2], [3, 4]])
            data2 = torch.tensor([[1.0, 2.0]])
            data3 = torch.tensor(7)

        if dist.get_rank() == 1:
            data1 = torch.tensor([[5, 6]])
            data2 = torch.tensor([[3.0, 4.0], [5.0, 6.0]])
            data3 = torch.tensor(8)

        result1 = evenly_divisible_all_gather(data=data1, concat=True)
        torch.testing.assert_allclose(result1,
                                      torch.tensor([[1, 2], [3, 4], [5, 6]]))
        result2 = evenly_divisible_all_gather(data=data2, concat=False)
        for r, e in zip(result2, [
                torch.tensor([[1.0, 2.0]]),
                torch.tensor([[3.0, 4.0], [5.0, 6.0]])
        ]):
            torch.testing.assert_allclose(r, e)
        result3 = evenly_divisible_all_gather(data=data3, concat=False)
        for r in result3:
            self.assertEqual(r.ndimension(), 0)
Ejemplo n.º 6
0
    def _sync(self):
        """
        All gather the buffers across distributed ranks for aggregating.
        Each buffer will be concatenated as a PyTorch Tensor.

        """
        if self._synced or self._buffers is None:
            return
        try:
            self._synced_tensors = [
                evenly_divisible_all_gather(torch.stack(b, dim=0), concat=True) for b in self._buffers
            ]
        except (RuntimeError, TypeError, ValueError) as e:
            raise TypeError(f"{e}. unable to sync buffer contents: {self._buffers}.") from e
        self._synced = True