def dcgan_train(config, checkpoint_dir=None):
    step = 0
    use_cuda = config.get("use_gpu") and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    netD = Discriminator().to(device)
    netD.apply(weights_init)
    netG = Generator().to(device)
    netG.apply(weights_init)
    criterion = nn.BCELoss()
    optimizerD = optim.Adam(netD.parameters(),
                            lr=config.get("lr", 0.01),
                            betas=(beta1, 0.999))
    optimizerG = optim.Adam(netG.parameters(),
                            lr=config.get("lr", 0.01),
                            betas=(beta1, 0.999))
    with FileLock(os.path.expanduser("~/.data.lock")):
        dataloader = get_data_loader()

    if checkpoint_dir is not None:
        path = os.path.join(checkpoint_dir, "checkpoint")
        checkpoint = torch.load(path)
        netD.load_state_dict(checkpoint["netDmodel"])
        netG.load_state_dict(checkpoint["netGmodel"])
        optimizerD.load_state_dict(checkpoint["optimD"])
        optimizerG.load_state_dict(checkpoint["optimG"])
        step = checkpoint["step"]

        if "netD_lr" in config:
            for param_group in optimizerD.param_groups:
                param_group["lr"] = config["netD_lr"]
        if "netG_lr" in config:
            for param_group in optimizerG.param_groups:
                param_group["lr"] = config["netG_lr"]

    while True:
        lossG, lossD, is_score = train(
            netD,
            netG,
            optimizerG,
            optimizerD,
            criterion,
            dataloader,
            step,
            device,
            config["mnist_model_ref"],
        )
        step += 1
        with tune.checkpoint_dir(step=step) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save(
                {
                    "netDmodel": netD.state_dict(),
                    "netGmodel": netG.state_dict(),
                    "optimD": optimizerD.state_dict(),
                    "optimG": optimizerG.state_dict(),
                    "step": step,
                },
                path,
            )
        tune.report(lossg=lossG, lossd=lossD, is_score=is_score)
 def setup(self, config):
     use_cuda = config.get("use_gpu") and torch.cuda.is_available()
     self.device = torch.device("cuda" if use_cuda else "cpu")
     self.netD = Discriminator().to(self.device)
     self.netD.apply(weights_init)
     self.netG = Generator().to(self.device)
     self.netG.apply(weights_init)
     self.criterion = nn.BCELoss()
     self.optimizerD = optim.Adam(self.netD.parameters(),
                                  lr=config.get("lr", 0.01),
                                  betas=(beta1, 0.999))
     self.optimizerG = optim.Adam(self.netG.parameters(),
                                  lr=config.get("lr", 0.01),
                                  betas=(beta1, 0.999))
     with FileLock(os.path.expanduser("~/.data.lock")):
         self.dataloader = get_data_loader(config.get("data_dir", "~/data"))
     self.mnist_model_ref = config["mnist_model_ref"]
Exemple #3
0
class PytorchTrainable(tune.Trainable):
    def setup(self, config):
        use_cuda = config.get("use_gpu") and torch.cuda.is_available()
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self.netD = Discriminator().to(self.device)
        self.netD.apply(weights_init)
        self.netG = Generator().to(self.device)
        self.netG.apply(weights_init)
        self.criterion = nn.BCELoss()
        self.optimizerD = optim.Adam(
            self.netD.parameters(),
            lr=config.get("lr", 0.01),
            betas=(beta1, 0.999))
        self.optimizerG = optim.Adam(
            self.netG.parameters(),
            lr=config.get("lr", 0.01),
            betas=(beta1, 0.999))
        with FileLock(os.path.expanduser("~/.data.lock")):
            self.dataloader = get_data_loader()
        self.mnist_model_ref = config["mnist_model_ref"]

    def step(self):
        lossG, lossD, is_score = train(self.netD, self.netG, self.optimizerG,
                                       self.optimizerD, self.criterion,
                                       self.dataloader, self._iteration,
                                       self.device, self.mnist_model_ref)
        return {"lossg": lossG, "lossd": lossD, "is_score": is_score}

    def save_checkpoint(self, checkpoint_dir):
        path = os.path.join(checkpoint_dir, "checkpoint")
        torch.save({
            "netDmodel": self.netD.state_dict(),
            "netGmodel": self.netG.state_dict(),
            "optimD": self.optimizerD.state_dict(),
            "optimG": self.optimizerG.state_dict(),
        }, path)

        return checkpoint_dir

    def load_checkpoint(self, checkpoint_dir):
        path = os.path.join(checkpoint_dir, "checkpoint")
        checkpoint = torch.load(path)
        self.netD.load_state_dict(checkpoint["netDmodel"])
        self.netG.load_state_dict(checkpoint["netGmodel"])
        self.optimizerD.load_state_dict(checkpoint["optimD"])
        self.optimizerG.load_state_dict(checkpoint["optimG"])

    def reset_config(self, new_config):
        if "netD_lr" in new_config:
            for param_group in self.optimizerD.param_groups:
                param_group["lr"] = new_config["netD_lr"]
        if "netG_lr" in new_config:
            for param_group in self.optimizerG.param_groups:
                param_group["lr"] = new_config["netG_lr"]

        self.config = new_config
        return True

    def _export_model(self, export_formats, export_dir):
        if export_formats == [ExportFormat.MODEL]:
            path = os.path.join(export_dir, "exported_models")
            torch.save({
                "netDmodel": self.netD.state_dict(),
                "netGmodel": self.netG.state_dict()
            }, path)
            return {ExportFormat.MODEL: path}
        else:
            raise ValueError("unexpected formats: " + str(export_formats))
def dcgan_train(config):
    step = 0
    use_cuda = config.get("use_gpu") and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    netD = Discriminator().to(device)
    netD.apply(weights_init)
    netG = Generator().to(device)
    netG.apply(weights_init)
    criterion = nn.BCELoss()
    optimizerD = optim.Adam(netD.parameters(),
                            lr=config.get("lr", 0.01),
                            betas=(beta1, 0.999))
    optimizerG = optim.Adam(netG.parameters(),
                            lr=config.get("lr", 0.01),
                            betas=(beta1, 0.999))
    with FileLock(os.path.expanduser("~/.data.lock")):
        dataloader = get_data_loader()

    if session.get_checkpoint():
        loaded_checkpoint = session.get_checkpoint()
        with loaded_checkpoint.as_directory() as loaded_checkpoint_dir:
            path = os.path.join(loaded_checkpoint_dir, "checkpoint.pt")
            checkpoint = torch.load(path)
            netD.load_state_dict(checkpoint["netDmodel"])
            netG.load_state_dict(checkpoint["netGmodel"])
            optimizerD.load_state_dict(checkpoint["optimD"])
            optimizerG.load_state_dict(checkpoint["optimG"])
            step = checkpoint["step"]

        if "netD_lr" in config:
            for param_group in optimizerD.param_groups:
                param_group["lr"] = config["netD_lr"]
        if "netG_lr" in config:
            for param_group in optimizerG.param_groups:
                param_group["lr"] = config["netG_lr"]

    while True:
        lossG, lossD, is_score = train(
            netD,
            netG,
            optimizerG,
            optimizerD,
            criterion,
            dataloader,
            step,
            device,
            config["mnist_model_ref"],
        )
        step += 1
        os.makedirs("my_model", exist_ok=True)
        torch.save(
            {
                "netDmodel": netD.state_dict(),
                "netGmodel": netG.state_dict(),
                "optimD": optimizerD.state_dict(),
                "optimG": optimizerG.state_dict(),
                "step": step,
            },
            "my_model/checkpoint.pt",
        )

        session.report(
            {
                "lossg": lossG,
                "lossd": lossD,
                "is_score": is_score
            },
            checkpoint=Checkpoint.from_directory("my_model"),
        )