Esempi in Python per init_distributed

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: simdeblur.utils.dist_utils

Metodo/funzione: init_distributed

Esempi su hotexamples.com: 4

init_distributed in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per simdeblur.utils.dist_utils.init_distributed, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

    def __init__(self, cfg):
        """
        Args
            cfg(edict): the config file, which contains arguments form comand line
        """
        self.cfg = copy.deepcopy(cfg)
        # initialize the distributed training
        if cfg.args.gpus > 1:
            dist_utils.init_distributed(cfg)
        # create the working dirs
        self.proj_dir = os.path.join(cfg.work_dir, cfg.name)
        self.experiment_name = f"{len(os.listdir(self.proj_dir)) + 1:03d}"
        self.experiment_time = cfg.experiment_time
        self.current_work_dir = os.path.join(self.proj_dir,
                                             self.experiment_time)
        if not os.path.exists(self.current_work_dir):
            os.makedirs(self.current_work_dir, exist_ok=True)
        init_logger(log_file_path=self.current_work_dir)

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        # self.device = torch.device("cpu")

        # construct the modules
        self.arch = build_meta_arch(self.cfg)

        # construct data
        self.train_dataloader, self.train_sampler = self.build_dataloder(
            cfg, mode="train")
        self.val_datalocaer, _ = self.build_dataloder(cfg, mode="val")

        # build the optimizer and lr_scheduler
        if hasattr(self.arch, "build_scheduler"):
            # arch-specific optimizer and lr_scheduler building
            self.optimizer, self.lr_scheduler = self.arch.build_scheduler()
        else:
            # default general optimizer and lr_scheduler building
            self.optimizer = self.build_optimizer(cfg, self.arch.model)
            self.lr_scheduler = self.build_lr_scheduler(cfg, self.optimizer)

        # trainer hooks
        self._hooks = self.build_hooks()

        # some induces when training
        self.epochs = 0
        self.iters = 0
        self.batch_idx = 0

        self.start_epoch = 0
        self.start_iter = 0
        self.total_train_epochs = self.cfg.schedule.epochs
        self.total_train_iters = self.total_train_epochs * len(
            self.train_dataloader)

        # resume or load the ckpt as init-weights
        if self.cfg.resume_from != "None":
            self.resume_or_load_ckpt(ckpt_path=self.cfg.resume_from)

        # log bufffer(dict to save)
        self.log_buffer = LogBuffer()

Esempio n. 2

Mostra file

    def __init__(self, cfg):
        """
        Args
            cfg(edict): the config file, which contains arguments form comand line
        """
        self.cfg = copy.deepcopy(cfg)
        # initialize the distributed training
        if cfg.args.gpus > 1:
            dist_utils.init_distributed(cfg)

        # create the working dirs
        self.current_work_dir = os.path.join(cfg.work_dir, cfg.name)
        if not os.path.exists(self.current_work_dir):
            os.makedirs(self.current_work_dir, exist_ok=True)

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        # self.device = torch.device("cpu")

        # default logger
        logger = logging.getLogger("simdeblur")
        logger.setLevel(logging.INFO)
        logger.addHandler(
            logging.FileHandler(
                os.path.join(self.current_work_dir,
                             self.cfg.name.split("_")[0] + ".json")))

        # construct the modules
        self.model = self.build_model(cfg).to(self.device)
        self.criterion = build_loss(cfg.loss).to(self.device)
        self.train_dataloader, self.train_sampler = self.build_dataloder(
            cfg, mode="train")
        self.val_datalocaer, _ = self.build_dataloder(cfg, mode="val")
        self.optimizer = self.build_optimizer(cfg, self.model)
        self.lr_scheduler = self.build_lr_scheduler(cfg, self.optimizer)

        # trainer hooks
        self._hooks = self.build_hooks()

        # some induces when training
        self.epochs = 0
        self.iters = 0
        self.batch_idx = 0

        self.start_epoch = 0
        self.start_iter = 0
        self.total_train_epochs = self.cfg.schedule.epochs
        self.total_train_iters = self.total_train_epochs * len(
            self.train_dataloader)

        # resume or load the ckpt as init-weights
        if self.cfg.resume_from != "None":
            self.resume_or_load_ckpt(ckpt_path=self.cfg.resume_from)

        # log bufffer(dict to save)
        self.log_buffer = LogBuffer()

Esempio n. 3

Mostra file

    def test(cls, cfg):
        """
        Only single GPU testing is surppored at now.
        TODO: Separate the testing process.
        Args:
            cfg(edict): the config file for testing, which contains "model" and "test dataloader" configs etc.
        """
        experiment_time = time.strftime("%Y%m%d_%H%M%S")
        current_work_dir = os.path.join(cfg.work_dir, cfg.name, "tested",
                                        experiment_time)
        if not os.path.exists(current_work_dir):
            os.makedirs(current_work_dir, exist_ok=True)
        init_logger(log_file_path=current_work_dir)
        logger = logging.getLogger("simdeblur")

        if cfg.args.gpus > 1:
            dist_utils.init_distributed(cfg)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        arch = build_meta_arch(cfg)
        test_dataloader, _ = Trainer.build_dataloder(cfg, "val")

        # load the trained checkpoint
        try:
            kwargs = {
                'map_location':
                lambda storage, loc: storage.cuda(cfg.args.local_rank)
            }
            ckpt = torch.load(os.path.abspath(cfg.args.ckpt_file), **kwargs)

            arch.load_ckpt(ckpt, strict=True)

            logger.info("Using checkpoint loaded from %s for testing." %
                        (cfg.args.ckpt_file))
        except Exception as e:
            logger.warning(e)
            logger.warning(
                "Checkpoint loaded failed, cannot find ckpt file from %s." %
                (cfg.args.ckpt_file))

        arch.model.eval()
        psnr_dict = {}
        ssim_dict = {}
        total_time = 0.
        with torch.no_grad():
            for batch_data in tqdm(
                    test_dataloader,
                    ncols=80,
                    desc=f"validation on gpu{cfg.args.local_rank}:"):
                input_frames = arch.preprocess(batch_data)
                gt_frames = batch_data["gt_frames"].to(device)

                # record the testing time.
                torch.cuda.synchronize()
                time_start = time.time()
                if hasattr(arch, "inference"):
                    outputs = arch.postprocess(arch.inference(input_frames))
                else:
                    outputs = arch.postprocess(arch.model(input_frames))
                torch.cuda.synchronize()
                total_time += time.time() - time_start

                # print("video name: ", batch_data["video_name"])
                # print("frame name: ", batch_data["gt_names"])
                # calculate metrics
                b, n, c, h, w = gt_frames.shape
                outputs = outputs.view(b, n, c, h, w)
                # single image output
                if outputs.dim() == 4:
                    outputs = outputs.detach().unsqueeze(1)  # (b, 1, c, h, w)
                for b_idx in range(b):
                    for n_idx in range(n):
                        frame_name = "{}_{}".format(
                            batch_data["video_name"][b_idx],
                            batch_data["gt_names"][n_idx][b_idx])
                        psnr_dict[frame_name] = calculate_psnr(
                            gt_frames[b_idx, n_idx:n_idx + 1],
                            outputs[b_idx, n_idx:n_idx + 1]).item()
                        ssim_dict[frame_name] = calculate_ssim(
                            gt_frames[b_idx, n_idx:n_idx + 1],
                            outputs[b_idx, n_idx:n_idx + 1]).item()

                        # save the output images
                        save_path_base = os.path.abspath(
                            os.path.join(current_work_dir,
                                         batch_data["video_name"][b_idx]))
                        if not os.path.exists(save_path_base):
                            os.makedirs(save_path_base, exist_ok=True)
                        save_path = os.path.join(
                            save_path_base,
                            batch_data["gt_names"][n_idx][b_idx])
                        save_image(outputs[b_idx, n_idx:n_idx + 1], save_path)
                        # save testing logs
                        with open(
                                os.path.abspath(
                                    os.path.join(current_work_dir,
                                                 "test_log.txt")), "a") as f:
                            f.write("{}, {}, {}, {} \n".format(
                                batch_data["video_name"][b_idx],
                                batch_data["gt_names"][n_idx][b_idx],
                                psnr_dict[frame_name], ssim_dict[frame_name]))
        print("mean PSNR: {:.2f}  mean SSIM: {:.4f} ".format(
            sum(psnr_dict.values()) / len(psnr_dict),
            sum(ssim_dict.values()) / len(ssim_dict)))

Esempio n. 4

Mostra file

    def test(cls, cfg):
        """
        Args:
            cfg(edict): the config file for testing, which contains "model" and "test dataloader" configs etc.
        """
        logger = logging.getLogger(__name__)

        if cfg.args.gpus > 1:
            dist_utils.init_distributed(cfg)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        model = Trainer.build_model(cfg).to(device)
        test_datalocaer, _ = Trainer.build_dataloder(cfg, "val")

        try:
            kwargs = {
                'map_location':
                lambda storage, loc: storage.cuda(cfg.args.local_rank)
            }
            ckpt = torch.load(cfg.args.ckpt_file, **kwargs)

            model_ckpt = ckpt["model"]
            print(model_ckpt.keys())
            if self.cfg.args.gpus <= 1:
                model_ckpt = {
                    k[7:]: v
                    for k, v in model_ckpt.items()
                }  # for cpu or single gpu model, it doesn't have the .module property
            # strict=false if fine-tune from exist .pth,
            model.load_state_dict(model_ckpt, strict=True)

            logging.info("Using checkpoint loaded from %s for testing." %
                         (cfg.args.ckpt_file))
        except:
            logging.warning(
                "Checkpoint loaded failed, cannot find ckpt file from %s." %
                (cfg.args.ckpt_file))
        # writers
        # SimpleMetricPrinter(cfg.current_work_dir, cfg.name.split("_")[0])
        # TensorboardWriter(os.path.join(cfg.current_work_dir, self.cfg.name.split("_")[0], str(datetime.now())))

        model.eval()
        psnr_dict = {}
        ssim_dict = {}
        for batch_data in tqdm(test_datalocaer,
                               desc="validation on gpu{}: ".format(
                                   cfg.args.local_rank)):
            input_frames = batch_data["input_frames"].to(device)
            gt_frames = batch_data["gt_frames"].to(device)
            outputs = model(input_frames)

            print("video name: ", batch_data["video_name"])
            print("frame name: ", batch_data["gt_names"])
            break
            # calculate metrics
            b, n, c, h, w = gt_frames.shape
            # single image output
            if outputs.dim() == 4:
                outputs = outputs.detach().unsqueeze(1)  # (b, 1, c, h, w)
            for b_idx in range(b):
                for n_idx in range(n):
                    frame_name = "{}_{}".format(
                        batch_data["video_name"][b_idx],
                        batch_data["gt_names"][n_idx][b_idx])
                    psnr_dict[frame_name] = calculate_psnr(
                        gt_frames[b_idx, n_idx:n_idx + 1],
                        outputs[b_idx, n_idx:n_idx + 1]).item()
                    ssim_dict[frame_name] = calculate_ssim(
                        gt_frames[b_idx, n_idx:n_idx + 1],
                        outputs[b_idx, n_idx:n_idx + 1]).item()
                    print(frame_name, "psnr: ", psnr_dict[frame_name],
                          "ssim: ", ssim_dict[frame_name])
        print("mean psnr: ", psnr_dict.values())
        print("mean ssim: ", ssim_dict.values())