예제 #1
0
    def after_train_iter(self, runner):
        if not self.every_n_iters(runner, self.interval):
            return

        self.logger.info("start to eval for iter: {}".format(runner.iter + 1))
        save_path = os.path.join(self.save_path,
                                 "iter_{}".format(runner.iter + 1))
        mkdir_or_exist(save_path)
        results = []  # list of dict
        if self.multi_process:
            assert is_distributed(
            ), "when set multiprocess eval, you should use multi process training"
            raise NotImplementedError("not support multi process for eval now")
        elif self.local_rank == 0:  # 全部交给rank0来处理
            for data in self.dataloader:
                outputs = runner.model.test_step(data,
                                                 save_image=self.save_image,
                                                 save_path=save_path,
                                                 ensemble=self.ensemble)
                result = runner.model.cal_for_eval(outputs, data)
                assert isinstance(result, list)
                results += result
            self.evaluate(results, runner.iter + 1)
        else:
            pass

        if is_distributed():
            dist.group_barrier()
예제 #2
0
 def worker(rank, q):
     dist.init_process_group("localhost", port, world_size, rank, rank)
     dist.group_barrier()
     if rank == 0:
         func(0, q)  # q.put(0)
         q.put(2)
     else:
         _assert_q_val(q, 0)  # func executed in rank 0
         _assert_q_empty(q)  # q.put(2) is not executed
         func(1, q)
         _assert_q_val(
             q,
             1)  # func in rank 1 executed earlier than q.put(2) in rank 0
         _assert_q_val(q, 2)  # q.put(2) executed in rank 0
예제 #3
0
 def worker(rank, q):
     if not mge.is_cuda_available():
         return
     _init_process_group_wrapper(world_size, rank, rank, backend, q)
     dist.group_barrier()
     if rank == 0:
         func(0, q)  # q.put(0)
         q.put(2)
     else:
         _assert_q_val(q, 0)  # func executed in rank 0
         _assert_q_empty(q)  # q.put(2) is not executed
         func(1, q)
         _assert_q_val(
             q, 1
         )  # func in rank 1 executed earlier than q.put(2) in rank 0
         _assert_q_val(q, 2)  # q.put(2) executed in rank 0
예제 #4
0
 def worker(rank, q):
     dist.init_process_group("localhost", port, world_size, rank, rank)
     dist.group_barrier()
     if rank == 0:
         dist.group_barrier()
         q.put(0)  # to be observed in rank 1
     else:
         _assert_q_empty(q)  # q.put(0) is not executed in rank 0
         dist.group_barrier()
         _assert_q_val(q, 0)  # q.put(0) executed in rank 0
예제 #5
0
 def worker(rank, q):
     if not mge.is_cuda_available():
         return
     _init_process_group_wrapper(world_size, rank, rank, backend, q)
     dist.group_barrier()
     if rank == 0:
         dist.group_barrier()
         q.put(0)  # to be observed in rank 1
     else:
         _assert_q_empty(q)  # q.put(0) is not executed in rank 0
         dist.group_barrier()
         _assert_q_val(q, 0)  # q.put(0) executed in rank 0