def __setup_model(self, inference, gpu):

        # TODO: re-write to pure DDP
        if inference or gpu is None:
            self.device = torch.device('cpu')
            self.model = Encoder_rotation(hparams=self.hparams['model']).to(
                self.device)
        else:
            if torch.cuda.device_count() > 1:
                if len(gpu) > 1:
                    print("Number of GPUs will be used: ", len(gpu))
                    self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda.
                                               is_available() else "cpu")
                    self.model = Encoder_rotation(
                        hparams=self.hparams['model']).to(self.device)
                    self.model = DP(self.model,
                                    device_ids=gpu,
                                    output_device=gpu[0])
                else:
                    print("Only one GPU will be used")
                    self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda.
                                               is_available() else "cpu")
                    self.model = Encoder_rotation(
                        hparams=self.hparams['model']).to(self.device)
            else:
                self.device = torch.device(
                    f"cuda:{gpu[0]}" if torch.cuda.is_available() else "cpu")
                self.model = Encoder_rotation(
                    hparams=self.hparams['model']).to(self.device)
                print('Only one GPU is available')

        print('Cuda available: ', torch.cuda.is_available())

        return True
def train_model(user_n,
                movie_n,
                train_data,
                val_data,
                gpus=[],
                epochs=100,
                lr=0.3,
                k=17,
                batch_size=1000):
    """
    train_model:
        (user_n, movie_n): net parameter.
        train_data = (users, movies, scores, weight) 4 1DTensor of Train Data, in same length.
        val_data = (users, movies, scores) 3 1DTensor of Validation Data, in same length.
    Returns:
        model: PyTorch model. 
    """
    dataset = D.TensorDataset(*train_data)
    dataloader = D.DataLoader(dataset, batch_size)

    model = DualEmbedding(user_n, movie_n, k).cuda()
    model = DP(model, device_ids=gpus, output_device=gpus[0])

    optimizer = optim.SGD(model.parameters(), lr)

    def criterion(pred, score, weight):
        return torch.dot(weight, (pred - score)**2) / len(pred)

    mseloss = nn.MSELoss()
    (val_users, val_movies, val_scores) = val_data

    li = list(dataloader)
    for epoch in range(epochs):
        running_loss = 0.0
        for i, (user, movie, score, weight) in enumerate(li):
            user = user.cuda(non_blocking=True)
            movie = movie.cuda(non_blocking=True)
            score = score.cuda(non_blocking=True)
            weight = weight.cuda(non_blocking=True)

            optimizer.zero_grad()

            pred, l1_loss = model(user, movie)

            loss = criterion(pred, score, weight) + 1e-2 * l1_loss / len(li)
            loss.backward()

            optimizer.step()

            running_loss += loss.item()
            if i % 2000 == 0:
                print(f"batch: {i}")
        pred, _ = model(val_users, val_movies)
        val_loss = mseloss(torch.round(pred * 5), val_scores * 5)
        print(
            f"epoch: {epoch}, loss: {running_loss / len(li)}, val_loss:{val_loss}"
        )
    return model
Exemple #3
0
    def __init__(self, input_size, n_channels, hparams):

        self.hparams = hparams

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

        # define the models
        self.model = WaveNet(n_channels=n_channels).to(self.device)
        summary(self.model, (input_size, n_channels))
        # self.model.half()

        if torch.cuda.device_count() > 1:
            print("Number of GPUs will be used: ",
                  torch.cuda.device_count() - 3)
            self.model = DP(self.model,
                            device_ids=list(
                                range(torch.cuda.device_count() - 3)))
        else:
            print('Only one GPU is available')

        self.metric = Metric()
        self.num_workers = 1
        ########################## compile the model ###############################

        # define optimizer
        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                          lr=self.hparams['lr'],
                                          weight_decay=1e-5)

        # weights = torch.Tensor([0.025,0.033,0.039,0.046,0.069,0.107,0.189,0.134,0.145,0.262,1]).cuda()
        self.loss = nn.BCELoss()  # CompLoss(self.device)

        # define early stopping
        self.early_stopping = EarlyStopping(
            checkpoint_path=self.hparams['checkpoint_path'] + '/checkpoint.pt',
            patience=self.hparams['patience'],
            delta=self.hparams['min_delta'],
        )
        # lr cheduler
        self.scheduler = ReduceLROnPlateau(
            optimizer=self.optimizer,
            mode='max',
            factor=0.2,
            patience=3,
            verbose=True,
            threshold=self.hparams['min_delta'],
            threshold_mode='abs',
            cooldown=0,
            eps=0,
        )

        self.seed_everything(42)
        self.threshold = 0.75
        self.scaler = torch.cuda.amp.GradScaler()
    def __setup_model(self, inference, gpu):

        # TODO: re-write to pure DDP
        if inference or gpu is None:
            self.device = torch.device('cpu')
            self.model = EfficientNet.from_pretrained(
                self.hparams['model']['pre_trained_model'],
                num_classes=self.hparams['model']['n_classes']).to(self.device)
            # self.model.freeze_layers()
        else:
            if torch.cuda.device_count() > 1:
                if len(gpu) > 1:
                    print("Number of GPUs will be used: ", len(gpu))
                    self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda.
                                               is_available() else "cpu")
                    self.model = EfficientNet.from_pretrained(
                        self.hparams['model']['pre_trained_model'],
                        num_classes=self.hparams['model']['n_classes'],
                    ).to(self.device)
                    self.model = DP(self.model,
                                    device_ids=gpu,
                                    output_device=gpu[0])
                    # self.model.module.freeze_layers()
                else:
                    print("Only one GPU will be used")
                    self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda.
                                               is_available() else "cpu")
                    self.model = EfficientNet.from_pretrained(
                        self.hparams['model']['pre_trained_model'],
                        num_classes=self.hparams['model']['n_classes'],
                    ).to(self.device)
                    # self.model.freeze_layers()
            else:
                self.device = torch.device(
                    f"cuda:{gpu[0]}" if torch.cuda.is_available() else "cpu")
                self.model = EfficientNet.from_pretrained(
                    self.hparams['model']['pre_trained_model'],
                    num_classes=self.hparams['model']['n_classes'],
                ).to(self.device)
                # self.model.freeze_layers()
                print('Only one GPU is available')

        print('Cuda available: ', torch.cuda.is_available())

        if self.hparams['freeze']:
            if len(gpu) > 1:
                self.model.module.freeze_layers()
            else:
                self.model.freeze_layers()

        return True
    def __setup_model(self, inference, gpu):

        # TODO: re-write to pure DDP
        if inference or gpu is None:
            self.device = torch.device('cpu')
            self.model = EfficientNet.from_pretrained(
                self.hparams['model']['pre_trained_model']).to(self.device)
        else:
            if torch.cuda.device_count() > 1:
                if len(gpu) > 1:
                    print("Number of GPUs will be used: ", len(gpu))
                    self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda.
                                               is_available() else "cpu")
                    self.model = EfficientNet.from_pretrained(
                        self.hparams['model']['pre_trained_model']).to(
                            self.device)
                    self.model = DP(self.model,
                                    device_ids=gpu,
                                    output_device=gpu[0])
                else:
                    print("Only one GPU will be used")
                    self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda.
                                               is_available() else "cpu")
                    self.model = EfficientNet.from_pretrained(
                        self.hparams['model']['pre_trained_model']).to(
                            self.device)
            else:
                self.device = torch.device(
                    f"cuda:{gpu[0]}" if torch.cuda.is_available() else "cpu")
                self.model = EfficientNet.from_pretrained(
                    self.hparams['model']['pre_trained_model']).to(self.device)
                print('Only one GPU is available')

        if len(gpu) > 1:
            self.model.module.build_projection_network(
                self.hparams['model']['emb_dim'], device=self.device)
        else:
            self.model.build_projection_network(
                self.hparams['model']['emb_dim'], device=self.device)

        print('Cuda available: ', torch.cuda.is_available())

        return True
Exemple #6
0
    def on_pretrain_routine_start(self, trainer: Trainer,
                                  pl_module: LightningModule) -> None:
        # must move to device after setup, as during setup, pl_module is still on cpu
        self.online_evaluator = SSLEvaluator(
            n_input=self.z_dim,
            n_classes=self.num_classes,
            p=self.drop_p,
            n_hidden=self.hidden_dim,
        ).to(pl_module.device)

        # switch fo PL compatibility reasons
        accel = (trainer.accelerator_connector if hasattr(
            trainer, "accelerator_connector") else
                 trainer._accelerator_connector)
        if accel.is_distributed:
            if accel.use_ddp:
                from torch.nn.parallel import DistributedDataParallel as DDP

                self.online_evaluator = DDP(self.online_evaluator,
                                            device_ids=[pl_module.device])
            elif accel.use_dp:
                from torch.nn.parallel import DataParallel as DP

                self.online_evaluator = DP(self.online_evaluator,
                                           device_ids=[pl_module.device])
            else:
                rank_zero_warn(
                    "Does not support this type of distributed accelerator. The online evaluator will not sync."
                )

        self.optimizer = torch.optim.Adam(self.online_evaluator.parameters(),
                                          lr=1e-4)

        if self._recovered_callback_state is not None:
            self.online_evaluator.load_state_dict(
                self._recovered_callback_state["state_dict"])
            self.optimizer.load_state_dict(
                self._recovered_callback_state["optimizer_state"])
Exemple #7
0
                                          num_classes=10)
        self.conv1 = torch.nn.Conv2d(1,
                                     64,
                                     kernel_size=(7, 7),
                                     stride=(2, 2),
                                     padding=(3, 3),
                                     bias=False)

    def forward(self, x):
        return torch.softmax(super(MnistResNet, self).forward(x), dim=-1)


# net = resnet18()
net = MnistResNet()
net.cuda()
net = DP(net)


class ToNumpy(object):
    def __call__(self, sample):
        return np.array(sample)


data_root = 'dataset'
trainset = MNIST(root=data_root,
                 download=True,
                 train=True,
                 transform=torchvision.transforms.Compose(
                     [ToNumpy(), torchvision.transforms.ToTensor()]))

valset = MNIST(root=data_root,
Exemple #8
0
def main():

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    seed_everything(7)

    args = parse_args()

    Path(args.save_path).mkdir(parents=True, exist_ok=True)
    entity = "demiurge"
    project = "melgan"
    load_from_run_id = args.load_from_run_id
    resume_run_id = args.resume_run_id
    restore_run_id = load_from_run_id or resume_run_id
    batch_size = args.batch_size

    # Getting initial run steps and epoch
    # if restore run, replace args
    steps = None
    if restore_run_id:
        api = wandb.Api()
        previous_run = api.run(f"{entity}/{project}/{restore_run_id}")
        steps = previous_run.lastHistoryStep
        prev_args = argparse.Namespace(**previous_run.config)
        args = vars(args)
        args.update(vars(prev_args))
        args = Namespace(**args)
        args.batch_size = batch_size

    load_initial_weights = bool(restore_run_id)
    sampling_rate = args.sampling_rate
    ratios = args.ratios
    if isinstance(ratios, str):
        ratios = ratios.replace(" ", "")
        ratios = ratios.strip("][").split(",")
        ratios = [int(i) for i in ratios]
        ratios = np.array(ratios)

    if load_from_run_id and resume_run_id:
        raise RuntimeError("Specify either --load_from_id or --resume_run_id.")

    if resume_run_id:
        print(f"Resuming run ID {resume_run_id}.")
    elif load_from_run_id:
        print(
            f"Starting new run with initial weights from run ID {load_from_run_id}."
        )
    else:
        print("Starting new run from scratch.")

    # read 1 line in train files to log dataset location
    train_files = Path(args.data_path) / "train_files.txt"
    with open(train_files, encoding="utf-8", mode="r") as f:
        file = f.readline()
    args.train_file_sample = str(file)

    wandb.init(
        entity=entity,
        project=project,
        id=resume_run_id,
        config=args,
        resume=True if resume_run_id else False,
        save_code=True,
        dir=args.save_path,
        notes=args.notes,
    )

    print("run id: " + str(wandb.run.id))
    print("run name: " + str(wandb.run.name))

    root = Path(wandb.run.dir)
    root.mkdir(parents=True, exist_ok=True)

    ####################################
    # Dump arguments and create logger #
    ####################################
    with open(root / "args.yml", "w") as f:
        yaml.dump(args, f)
    wandb.save("args.yml")

    ###############################################
    # The file modules.py is needed by the unagan #
    ###############################################
    wandb.save(mel2wav.modules.__file__, base_path=".")

    #######################
    # Load PyTorch Models #
    #######################

    netG = Generator(args.n_mel_channels,
                     args.ngf,
                     args.n_residual_layers,
                     ratios=ratios).to(device)
    netD = Discriminator(args.num_D, args.ndf, args.n_layers_D,
                         args.downsamp_factor).to(device)
    fft = Audio2Mel(
        n_mel_channels=args.n_mel_channels,
        pad_mode=args.pad_mode,
        sampling_rate=sampling_rate,
    ).to(device)

    for model in [netG, netD, fft]:
        wandb.watch(model)

    #####################
    # Create optimizers #
    #####################
    optG = torch.optim.Adam(netG.parameters(),
                            lr=args.learning_rate,
                            betas=(0.5, 0.9))
    optD = torch.optim.Adam(netD.parameters(),
                            lr=args.learning_rate,
                            betas=(0.5, 0.9))

    if load_initial_weights:

        for model, filenames in [
            (netG, ["netG.pt", "netG_prev.pt"]),
            (optG, ["optG.pt", "optG_prev.pt"]),
            (netD, ["netD.pt", "netD_prev.pt"]),
            (optD, ["optD.pt", "optD_prev.pt"]),
        ]:
            recover_model = False
            filepath = None
            for filename in filenames:
                try:
                    run_path = f"{entity}/{project}/{restore_run_id}"
                    print(f"Restoring {filename} from run path {run_path}")
                    restored_file = wandb.restore(filename, run_path=run_path)
                    filepath = restored_file.name
                    model = load_state_dict_handleDP(model, filepath)
                    recover_model = True
                    break
                except RuntimeError as e:
                    print("RuntimeError", e)
                    print(f"recover model weight file: '{filename}'' failed")
            if not recover_model:
                raise RuntimeError(
                    f"Cannot load model weight files for component {filenames[0]}."
                )
            else:
                # store successfully recovered model weight file ("***_prev.pt")
                path_parent = Path(filepath).parent
                newfilepath = str(path_parent / filenames[1])
                os.rename(filepath, newfilepath)
                wandb.save(newfilepath)
    if torch.cuda.device_count() > 1:
        netG = DP(netG).to(device)
        netD = DP(netD).to(device)
        fft = DP(fft).to(device)
        print(f"We have {torch.cuda.device_count()} gpus. Use data parallel.")
    else:
        print(f"We have {torch.cuda.device_count()} gpu.")

    #######################
    # Create data loaders #
    #######################
    train_set = AudioDataset(
        Path(args.data_path) / "train_files.txt",
        args.seq_len,
        sampling_rate=sampling_rate,
    )
    test_set = AudioDataset(
        Path(args.data_path) / "test_files.txt",
        sampling_rate * 4,
        sampling_rate=sampling_rate,
        augment=False,
    )
    wandb.save(str(Path(args.data_path) / "train_files.txt"))
    wandb.save(str(Path(args.data_path) / "test_files.txt"))

    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              num_workers=4)
    test_loader = DataLoader(test_set, batch_size=1)

    if len(train_loader) == 0:
        raise RuntimeError("Train dataset is empty.")

    if len(test_loader) == 0:
        raise RuntimeError("Test dataset is empty.")

    if not restore_run_id:
        steps = wandb.run.step
    start_epoch = steps // len(train_loader)
    print(f"Starting with epoch {start_epoch} and step {steps}.")

    ##########################
    # Dumping original audio #
    ##########################
    test_voc = []
    test_audio = []
    samples = []
    melImages = []
    num_fix_samples = args.n_test_samples - (args.n_test_samples // 2)
    cmap = cm.get_cmap("inferno")
    for i, x_t in enumerate(test_loader):
        x_t = x_t.to(device)
        s_t = fft(x_t).detach()

        test_voc.append(s_t.to(device))
        test_audio.append(x_t)

        audio = x_t.squeeze().cpu()
        save_sample(root / ("original_%d.wav" % i), sampling_rate, audio)
        samples.append(
            wandb.Audio(audio,
                        caption=f"sample {i}",
                        sample_rate=sampling_rate))
        melImage = s_t.squeeze().detach().cpu().numpy()
        melImage = (melImage - np.amin(melImage)) / (np.amax(melImage) -
                                                     np.amin(melImage))
        # melImage = Image.fromarray(np.uint8(cmap(melImage)) * 255)
        # melImage = melImage.resize((melImage.width * 4, melImage.height * 4))
        melImages.append(wandb.Image(cmap(melImage), caption=f"sample {i}"))

        if i == num_fix_samples - 1:
            break

    # if not resume_run_id:
    wandb.log({"audio/original": samples}, step=start_epoch)
    wandb.log({"mel/original": melImages}, step=start_epoch)
    # else:
    #     print("We are resuming, skipping logging of original audio.")

    costs = []
    start = time.time()

    # enable cudnn autotuner to speed up training
    torch.backends.cudnn.benchmark = True

    best_mel_reconst = 1000000

    for epoch in range(start_epoch, start_epoch + args.epochs + 1):
        for iterno, x_t in enumerate(train_loader):
            x_t = x_t.to(device)
            s_t = fft(x_t).detach()
            x_pred_t = netG(s_t.to(device))

            with torch.no_grad():
                s_pred_t = fft(x_pred_t.detach())
                s_error = F.l1_loss(s_t, s_pred_t).item()

            #######################
            # Train Discriminator #
            #######################
            D_fake_det = netD(x_pred_t.to(device).detach())
            D_real = netD(x_t.to(device))

            loss_D = 0
            for scale in D_fake_det:
                loss_D += F.relu(1 + scale[-1]).mean()

            for scale in D_real:
                loss_D += F.relu(1 - scale[-1]).mean()

            netD.zero_grad()
            loss_D.backward()
            optD.step()

            ###################
            # Train Generator #
            ###################
            D_fake = netD(x_pred_t.to(device))

            loss_G = 0
            for scale in D_fake:
                loss_G += -scale[-1].mean()

            loss_feat = 0
            feat_weights = 4.0 / (args.n_layers_D + 1)
            D_weights = 1.0 / args.num_D
            wt = D_weights * feat_weights
            for i in range(args.num_D):
                for j in range(len(D_fake[i]) - 1):
                    loss_feat += wt * F.l1_loss(D_fake[i][j],
                                                D_real[i][j].detach())

            netG.zero_grad()
            (loss_G + args.lambda_feat * loss_feat).backward()
            optG.step()

            costs.append(
                [loss_D.item(),
                 loss_G.item(),
                 loss_feat.item(), s_error])

            wandb.log(
                {
                    "loss/discriminator": costs[-1][0],
                    "loss/generator": costs[-1][1],
                    "loss/feature_matching": costs[-1][2],
                    "loss/mel_reconstruction": costs[-1][3],
                },
                step=steps,
            )
            steps += 1

            if steps % args.save_interval == 0:
                st = time.time()
                with torch.no_grad():
                    samples = []
                    melImages = []
                    # fix samples
                    for i, (voc, _) in enumerate(zip(test_voc, test_audio)):
                        pred_audio = netG(voc)
                        pred_audio = pred_audio.squeeze().cpu()
                        save_sample(root / ("generated_%d.wav" % i),
                                    sampling_rate, pred_audio)
                        samples.append(
                            wandb.Audio(
                                pred_audio,
                                caption=f"sample {i}",
                                sample_rate=sampling_rate,
                            ))
                        melImage = voc.squeeze().detach().cpu().numpy()
                        melImage = (melImage - np.amin(melImage)) / (
                            np.amax(melImage) - np.amin(melImage))
                        # melImage = Image.fromarray(np.uint8(cmap(melImage)) * 255)
                        # melImage = melImage.resize(
                        #     (melImage.width * 4, melImage.height * 4)
                        # )
                        melImages.append(
                            wandb.Image(cmap(melImage), caption=f"sample {i}"))
                    wandb.log(
                        {
                            "audio/generated": samples,
                            "mel/generated": melImages,
                            "epoch": epoch,
                        },
                        step=steps,
                    )

                    # var samples
                    source = []
                    pred = []
                    pred_mel = []
                    num_var_samples = args.n_test_samples - num_fix_samples
                    for i, x_t in enumerate(test_loader):
                        # source
                        x_t = x_t.to(device)
                        audio = x_t.squeeze().cpu()
                        source.append(
                            wandb.Audio(audio,
                                        caption=f"sample {i}",
                                        sample_rate=sampling_rate))
                        # pred
                        s_t = fft(x_t).detach()
                        voc = s_t.to(device)
                        pred_audio = netG(voc)
                        pred_audio = pred_audio.squeeze().cpu()
                        pred.append(
                            wandb.Audio(
                                pred_audio,
                                caption=f"sample {i}",
                                sample_rate=sampling_rate,
                            ))
                        melImage = voc.squeeze().detach().cpu().numpy()
                        melImage = (melImage - np.amin(melImage)) / (
                            np.amax(melImage) - np.amin(melImage))
                        # melImage = Image.fromarray(np.uint8(cmap(melImage)) * 255)
                        # melImage = melImage.resize(
                        #     (melImage.width * 4, melImage.height * 4)
                        # )
                        pred_mel.append(
                            wandb.Image(cmap(melImage), caption=f"sample {i}"))

                        # stop when reach log sample
                        if i == num_var_samples - 1:
                            break

                    wandb.log(
                        {
                            "audio/var_original": source,
                            "audio/var_generated": pred,
                            "mel/var_generated": pred_mel,
                        },
                        step=steps,
                    )

                print("Saving models ...")
                torch.save(netG.state_dict(), root / "netG.pt")
                torch.save(optG.state_dict(), root / "optG.pt")
                wandb.save(str(root / "netG.pt"))
                wandb.save(str(root / "optG.pt"))

                torch.save(netD.state_dict(), root / "netD.pt")
                torch.save(optD.state_dict(), root / "optD.pt")
                wandb.save(str(root / "netD.pt"))
                wandb.save(str(root / "optD.pt"))

                if np.asarray(costs).mean(0)[-1] < best_mel_reconst:
                    best_mel_reconst = np.asarray(costs).mean(0)[-1]
                    torch.save(netD.state_dict(), root / "best_netD.pt")
                    torch.save(netG.state_dict(), root / "best_netG.pt")
                    wandb.save(str(root / "best_netD.pt"))
                    wandb.save(str(root / "best_netG.pt"))

                print("Took %5.4fs to generate samples" % (time.time() - st))
                print("-" * 100)

            if steps % args.log_interval == 0:
                print("Epoch {} | Iters {} / {} | ms/batch {:5.2f} | loss {}".
                      format(
                          epoch,
                          iterno,
                          len(train_loader),
                          1000 * (time.time() - start) / args.log_interval,
                          np.asarray(costs).mean(0),
                      ))
                costs = []
                start = time.time()
Exemple #9
0
    parser.add_argument("--config-path", type=str, required=True)
    parser.add_argument("--save-dir-path", type=str, default=".")
    parser.add_argument("--tol", type=float, default=0)
    parser.add_argument("--batch-size", type=int, default=512)
    parser.add_argument("--distance", default="l1", choices=["l1", "l2"])
    args = parser.parse_args()

    cfg, G, lidar, device = utils.setup(
        args.model_path,
        args.config_path,
        ema=True,
        fix_noise=True,
    )

    utils.set_requires_grad(G, False)
    G = DP(G)

    # hyperparameters
    num_step = 1000
    perturb_latent = True
    noise_ratio = 0.75
    noise_sigma = 1.0
    lr_rampup_ratio = 0.05
    lr_rampdown_ratio = 0.25

    # prepare reference
    dataset = define_dataset(cfg.dataset, phase="test")
    loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=args.batch_size,
        shuffle=False,
Exemple #10
0
    # print(f"with configuration\n{dict_to_str(config)}")

    model_name = f"seq_lab_{config.model_name.lower()}"
    model_config = deepcopy(ModelCfg[model_name])
    model_config.cnn.name = config.cnn_name
    model_config.rnn.name = config.rnn_name
    model_config.attn.name = config.attn_name

    model = ECG_SEQ_LAB_NET_CPSC2019(
        n_leads=config.n_leads,
        input_len=config.input_len,
        config=model_config,
    )

    if torch.cuda.device_count() > 1:
        model = DP(model)
        # model = DDP(model)

    model.to(device=device)
    model.__DEBUG__ = False

    try:
        train(
            model=model,
            model_config=model_config,
            config=train_config,
            device=device,
            logger=logger,
            debug=train_config.debug,
        )
    except KeyboardInterrupt:
Exemple #11
0
    def __init__(self, input_size, n_channels, hparams, gpu, inference=False):

        self.hparams = hparams

        if inference:
            self.device = torch.device('cpu')
            self.model = ECGNet(n_channels=n_channels,
                                hparams=self.hparams).to(self.device)
        else:
            if torch.cuda.device_count() > 1:
                if len(gpu) > 0:
                    print("Number of GPUs will be used: ", len(gpu))
                    self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda.
                                               is_available() else "cpu")
                    self.model = ECGNet(n_channels=n_channels,
                                        hparams=self.hparams).to(self.device)
                    self.model = DP(self.model,
                                    device_ids=gpu,
                                    output_device=gpu[0])
                else:
                    print("Number of GPUs will be used: ",
                          torch.cuda.device_count() - 5)
                    self.device = torch.device(
                        "cuda:0" if torch.cuda.is_available() else "cpu")
                    self.model = ECGNet(n_channels=n_channels,
                                        hparams=self.hparams).to(self.device)
                    self.model = DP(self.model,
                                    device_ids=list(
                                        range(torch.cuda.device_count() - 5)))
            else:
                self.device = torch.device(
                    "cuda:0" if torch.cuda.is_available() else "cpu")
                self.model = ECGNet(n_channels=n_channels,
                                    hparams=self.hparams).to(self.device)
                print('Only one GPU is available')

        # define the models
        #summary(self.model, (input_size, n_channels))
        #print(torch.cuda.is_available())

        self.metric = Metric()
        self.num_workers = 18
        self.threshold = 0.5

        ########################## compile the model ###############################

        # define optimizer
        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                          lr=self.hparams['lr'])

        weights = torch.Tensor([
            1., 1., 1., 1., 0.5, 1., 1., 1., 1., 1., 1., 1., 0.5, 0.5, 1., 1.,
            1., 1., 0.5, 1., 1., 1., 1., 0.5, 1., 1., 0.5
        ]).to(self.device)

        self.loss = nn.BCELoss(weight=weights)  # CompLoss(self.device) #
        self.decoder_loss = nn.MSELoss()

        # define early stopping
        self.early_stopping = EarlyStopping(
            checkpoint_path=self.hparams['checkpoint_path'] + '/checkpoint' +
            str(self.hparams['start_fold']) + '.pt',
            patience=self.hparams['patience'],
            delta=self.hparams['min_delta'],
            is_maximize=True,
        )
        # lr cheduler
        self.scheduler = ReduceLROnPlateau(
            optimizer=self.optimizer,
            mode='max',
            factor=0.2,
            patience=1,
            verbose=True,
            threshold=self.hparams['min_delta'],
            threshold_mode='abs',
            cooldown=0,
            eps=0,
        )

        self.seed_everything(42)

        self.postprocessing = PostProcessing(fold=self.hparams['start_fold'])
        self.scaler = torch.cuda.amp.GradScaler()
Exemple #12
0
 def set_multiple_gpu(self):
     if torch.cuda.device_count() > 1:
         print("more than 1")
         self.dot_applier = DP(self.dot_applier)
         self.patch_applier = DP(self.patch_applier)
         self.detections = DP(self.detections)
Exemple #13
0
    def __init__(self, args) -> None:
        """Use ELM with fintuned language model for sentiment classification

        Args:
            args (dict): contain all the arguments needed.
                - model_name(str): the name of the transformer model
                - bsz(int): batch size
                - epoch: epochs to train
                - type(str): fintuned type
                  - base: train only ELM
                  - finetune_elm: train transformers with ELM directly
                  - finetune_classifier: train transformers with classifier
                  - finetune_classifier_elm: train transformers with classifier,
                    and use elm replace the classifier
                  - finetune_classifier_beta: train transformers with classifier,
                    and use pinv to calculate beta in classifier
                - learning_rate(float): learning_rate for finetuning
        """
        # load configuration
        self.model_name = args.get('model_name', 'bert-base-uncased')
        self.bsz = args.get('batch_size', 10)
        self.epoch = args.get('epoch_num', 2)
        self.learning_rate = args.get('learning_rate', 0.001)
        self.training_type = args.get('training_type', 'base')
        self.debug = args.get('debug', True)
        self.eval_epoch = args.get('eval_epoch', 1)
        self.lr_decay = args.get('learning_rate_decay', 0.99)
        if torch.cuda.is_available():
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')
        self.device = device
        self.n_gpu = torch.cuda.device_count()

        # load pretrained model
        if (self.model_name == 'bert-base-uncased') or \
                (self.model_name == 'distilbert-base-uncased') or \
                (self.model_name == 'albert-base-v2'):
            self.pretrained_model = AutoModel.from_pretrained(self.model_name)
            self.pretrained_tokenizer = AutoTokenizer.from_pretrained(
                self.model_name)
            input_shape = 768
            output_shape = 256
        elif (self.model_name == 'prajjwal1/bert-tiny'):
            self.pretrained_model = AutoModel.from_pretrained(self.model_name)
            self.pretrained_tokenizer = AutoTokenizer.from_pretrained(
                self.model_name, model_max_length=512)
            input_shape = 128
            output_shape = 64
        elif self.model_name == 'voidful/albert_chinese_xxlarge':
            self.pretrained_model = AlbertForMaskedLM.from_pretrained(
                self.model_name)
            self.pretrained_tokenizer = BertTokenizer.from_pretrained(
                self.model_name)
            input_shape = 768
            output_shape = 256
        else:
            raise TypeError("Unsupported model name")
        self.pretrained_model.to(device)
        device_ids = None
        if self.n_gpu > 1:
            device_ids = range(torch.cuda.device_count())
            self.pretrained_model = DP(self.pretrained_model,
                                       device_ids=device_ids)

        # load specific model
        if (self.training_type == 'finetune_classifier') or \
            (self.training_type == 'finetune_classifier_elm'):
            self.classifier = torch.nn.Sequential(
                torch.nn.Linear(input_shape, 2))
            self.loss_func = torch.nn.CrossEntropyLoss()
            self.classifier.to(device)
            if self.n_gpu > 1:
                self.classifier = DP(self.classifier, device_ids=device_ids)
        if (self.training_type == 'base') or \
            (self.training_type =='finetune_classifier_elm'):
            self.elm = classic_ELM(input_shape, output_shape)
        if (self.training_type == 'finetune_classifier_linear'):
            self.elm = classic_ELM(None, None)
            self.classifier = torch.nn.Sequential(
                OrderedDict([
                    ('w', torch.nn.Linear(input_shape, output_shape)),
                    ('act', torch.nn.Sigmoid()),
                    ('beta', torch.nn.Linear(output_shape, 2)),
                ]))
            self.loss_func = torch.nn.CrossEntropyLoss()
            self.classifier.to(device)
            if self.n_gpu > 1:
                self.classifier = DP(self.classifier, device_ids=device_ids)

        # load processor, trainer, evaluator, inferer.
        processors = {
            'base': self.__processor_base__,
            'finetune_classifier': self.__processor_base__,
            'finetune_classifier_elm': self.__processor_base__,
            'finetune_classifier_linear': self.__processor_base__,
        }
        trainers = {
            'base':
            self.__train_base__,
            'finetune_classifier':
            self.__train_finetune_classifier__,
            'finetune_classifier_elm':
            self.__train_finetune_classifier_elm__,
            'finetune_classifier_linear':
            self.__train_finetune_classifier_linear__,
        }
        evaluators = {
            'base': self.__eval_base__,
            'finetune_classifier': self.__eval_finetune_classifier__,
            'finetune_classifier_elm': self.__eval_base__,
            'finetune_classifier_linear':
            self.__eval_finetune_classifier_linear__,
        }
        inferers = {
            'base': self.__infer_base__,
            'finetune_classifier': self.__infer_finetune_classifier__,
            'finetune_classifier_elm': self.__infer_finetune_classifier_elm__,
            'finetune_classifier_linear': self.__infer_base__
        }
        self.processor = processors[self.training_type]
        self.trainer = trainers[self.training_type]
        self.evaluator = evaluators[self.training_type]
        self.inferer = inferers[self.training_type]
    # also save a copy for wav generation
    if melgan_run_id:
        temp_dir = Path(args.wav_generate_dir)
        temp_dir.mkdir(parents=True, exist_ok=True)
        shutil.copy(mean_fp, temp_dir / f"mean.{feat_type}.npy")
        shutil.copy(std_fp, temp_dir / f"std.{feat_type}.npy")

    mean = torch.from_numpy(np.load(mean_fp)).float().to(device).view(
        1, feat_dim, 1)
    std = torch.from_numpy(np.load(std_fp)).float().to(device).view(
        1, feat_dim, 1)

    # Model

    if torch.cuda.device_count() > 1:
        netG = DP(NetG(feat_dim, z_dim, z_scale_factors).to(device))
        netD = DP(NetD(feat_dim).to(device))
        netE = DP(Encoder(feat_dim, z_dim, z_scale_factors).to(device))
        recorder = BEGANRecorder(lambda_k, init_k, gamma)
        print(f"We have {torch.cuda.device_count()} gpus. Use data parallel.")
    else:
        netG = NetG(feat_dim, z_dim, z_scale_factors).to(device)
        netD = NetD(feat_dim).to(device)
        netE = Encoder(feat_dim, z_dim, z_scale_factors).to(device)
        recorder = BEGANRecorder(lambda_k, init_k, gamma)
        print(f"We have {torch.cuda.device_count()} gpu.")

    # Optimizers
    optimizerG = optim.Adam(netG.parameters(), lr=init_lr)
    optimizerD = optim.Adam(netD.parameters(), lr=init_lr)
    optimizerE = optim.Adam(netE.parameters(), lr=init_lr)