Exemple #1
0
    def test_mandatory_mapping(self):
        mandatory_mapping = [[['"group.1.1"', '"group.1.2"', '"group.1.3"']]]
        self.assertTrue(
            validate(self.variables,
                     mandatory_mapping=mandatory_mapping)['passed'])

        mandatory_mapping = [[['"group.2.1"', '"group.2.2"']]]
        self.assertFalse(
            validate(self.variables,
                     mandatory_mapping=mandatory_mapping)['passed'])

        mandatory_mapping = [[['"group.3.1"', '"group.3.2"', '"group.3.3"']]]
        self.assertFalse(
            validate(self.variables,
                     mandatory_mapping=mandatory_mapping)['passed'])

        mandatory_mapping = [[['"group.1.1"', '"group.1.2"', '"group.1.3"'],
                              ['"group.3.1"', '"group.3.2"', '"group.3.3"']]]
        self.assertTrue(
            validate(self.variables,
                     mandatory_mapping=mandatory_mapping)['passed'])

        mandatory_mapping = [[['"group.2.1"', '"group.2.2"'],
                              ['"group.3.1"', '"group.3.2"', '"group.3.3"']]]
        self.assertFalse(
            validate(self.variables,
                     mandatory_mapping=mandatory_mapping)['passed'])
Exemple #2
0
    def test_type_mapping_int(self):
        type_mapping = {'int': 'int'}
        self.assertTrue(
            validate(self.variables, type_mapping=type_mapping)['passed'])

        type_mapping = {'string': 'int'}
        self.assertFalse(
            validate(self.variables, type_mapping=type_mapping)['passed'])

        type_mapping = {'int': {'type': 'int', 'min_value': -10}}
        self.assertTrue(
            validate(self.variables, type_mapping=type_mapping)['passed'])

        type_mapping = {'int': {'type': 'int', 'min_value': 10}}
        self.assertFalse(
            validate(self.variables, type_mapping=type_mapping)['passed'])

        type_mapping = {'int': {'type': 'int', 'max_value': 10}}
        self.assertTrue(
            validate(self.variables, type_mapping=type_mapping)['passed'])

        type_mapping = {'int': {'type': 'int', 'max_value': -10}}
        self.assertFalse(
            validate(self.variables, type_mapping=type_mapping)['passed'])

        type_mapping = {
            'int': {
                'type': 'int',
                'min_value': -10,
                'max_value': 10
            }
        }
        self.assertTrue(
            validate(self.variables, type_mapping=type_mapping)['passed'])

        type_mapping = {
            'int': {
                'type': 'int',
                'min_value': 5,
                'max_value': 10
            }
        }
        self.assertFalse(
            validate(self.variables, type_mapping=type_mapping)['passed'])

        type_mapping = {'int': {'type': 'int', 'ranges': [[-10, 10]]}}
        self.assertTrue(
            validate(self.variables, type_mapping=type_mapping)['passed'])

        type_mapping = {'int': {'type': 'int', 'ranges': [[5, 10]]}}
        self.assertFalse(
            validate(self.variables, type_mapping=type_mapping)['passed'])
Exemple #3
0
    def test_incompatible_mapping(self):
        incompatible_mapping = [[['"group.1.1"', '"group.1.2"', '"group.1.3"'],
                                 ['"group.2.1"', '"group.2.2"']]]
        self.assertFalse(
            validate(self.variables,
                     incompatible_mapping=incompatible_mapping)['passed'])

        incompatible_mapping = [[['"group.1.1"', '"group.1.2"', '"group.1.3"'],
                                 ['"group.3.1"', '"group.3.2"',
                                  '"group.3.3"']]]
        self.assertTrue(
            validate(self.variables,
                     incompatible_mapping=incompatible_mapping)['passed'])

        incompatible_mapping = [[['"group.2.1"', '"group.2.2"'],
                                 ['"group.3.1"', '"group.3.2"',
                                  '"group.3.3"']]]
        self.assertTrue(
            validate(self.variables,
                     incompatible_mapping=incompatible_mapping)['passed'])
Exemple #4
0
from models.own_model import create_own_model
import torch
from utils.dataloaders import validation_dataloader, training_dataloader
from utils.confusion_matrix import generate_conf_matrix
from utils.validation import get_predicted_actual, validate

device = get_device()

if len(sys.argv) > 1 and sys.argv[1] == '-s':
    print("Using Squeezenet model")
    model = create_squeezenet_model()
    path = PATH_TO_SQUEEZENET_MODEL
else:
    print("Using own model")
    model = create_own_model()
    path = PATH_TO_OWN_MODEL

model.to(device)

# data = training_dataloader
data = validation_dataloader

# Load from map of layers to parameter tensors
model.load_state_dict(torch.load(path))

accuracy = validate(model, data)
print(f"Accuracy: {accuracy}%")

predicted, actual = get_predicted_actual(model, data)
generate_conf_matrix(predicted, actual)
Exemple #5
0
def train(args, pt_dir, chkpt_path, trainloader, valloader, writer, logger, hp,
          hp_str):
    model_g = Generator(hp.audio.n_mel_channels).cuda()
    model_d = MultiScaleDiscriminator(hp.model.num_D, hp.model.ndf,
                                      hp.model.n_layers,
                                      hp.model.downsampling_factor,
                                      hp.model.disc_out).cuda()
    model_d_mpd = MPD().cuda()

    optim_g = torch.optim.Adam(model_g.parameters(),
                               lr=hp.train.adam.lr,
                               betas=(hp.train.adam.beta1,
                                      hp.train.adam.beta2))
    optim_d = torch.optim.Adam(itertools.chain(model_d.parameters(),
                                               model_d_mpd.parameters()),
                               lr=hp.train.adam.lr,
                               betas=(hp.train.adam.beta1,
                                      hp.train.adam.beta2))

    stft = TacotronSTFT(filter_length=hp.audio.filter_length,
                        hop_length=hp.audio.hop_length,
                        win_length=hp.audio.win_length,
                        n_mel_channels=hp.audio.n_mel_channels,
                        sampling_rate=hp.audio.sampling_rate,
                        mel_fmin=hp.audio.mel_fmin,
                        mel_fmax=hp.audio.mel_fmax)

    # githash = get_commit_hash()

    init_epoch = -1
    step = 0

    if chkpt_path is not None:
        logger.info("Resuming from checkpoint: %s" % chkpt_path)
        checkpoint = torch.load(chkpt_path)
        model_g.load_state_dict(checkpoint['model_g'])
        model_d.load_state_dict(checkpoint['model_d'])
        model_d_mpd.load_state_dict(checkpoint['model_d_mpd'])
        optim_g.load_state_dict(checkpoint['optim_g'])
        optim_d.load_state_dict(checkpoint['optim_d'])
        step = checkpoint['step']
        init_epoch = checkpoint['epoch']

        if hp_str != checkpoint['hp_str']:
            logger.warning(
                "New hparams is different from checkpoint. Will use new.")

        # if githash != checkpoint['githash']:
        #     logger.warning("Code might be different: git hash is different.")
        #     logger.warning("%s -> %s" % (checkpoint['githash'], githash))

    else:
        logger.info("Starting new training run.")

    # this accelerates training when the size of minibatch is always consistent.
    # if not consistent, it'll horribly slow down.
    torch.backends.cudnn.benchmark = True

    try:
        model_g.train()
        model_d.train()
        stft_loss = MultiResolutionSTFTLoss()
        criterion = torch.nn.MSELoss().cuda()
        l1loss = torch.nn.L1Loss()

        for epoch in itertools.count(init_epoch + 1):
            if epoch % hp.log.validation_interval == 0:
                with torch.no_grad():
                    validate(hp, model_g, model_d, model_d_mpd, valloader,
                             stft_loss, l1loss, criterion, stft, writer, step)

            trainloader.dataset.shuffle_mapping()
            loader = tqdm.tqdm(trainloader, desc='Loading train data')
            avg_g_loss = []
            avg_d_loss = []
            avg_adv_loss = []
            for (melG, audioG), (melD, audioD) in loader:
                melG = melG.cuda()  # torch.Size([16, 80, 64])
                audioG = audioG.cuda()  # torch.Size([16, 1, 16000])
                melD = melD.cuda()  # torch.Size([16, 80, 64])
                audioD = audioD.cuda()  # torch.Size([16, 1, 16000]

                # generator
                optim_g.zero_grad()
                fake_audio = model_g(
                    melG)[:, :, :hp.audio.
                          segment_length]  # torch.Size([16, 1, 12800])

                loss_g = 0.0

                sc_loss, mag_loss = stft_loss(
                    fake_audio[:, :, :audioG.size(2)].squeeze(1),
                    audioG.squeeze(1))
                loss_g += sc_loss + mag_loss  # STFT Loss

                adv_loss = 0.0
                loss_mel = 0.0
                if step > hp.train.discriminator_train_start_steps:
                    disc_real = model_d(audioG)
                    disc_fake = model_d(fake_audio)
                    # for multi-scale discriminator

                    for feats_fake, score_fake in disc_fake:
                        # adv_loss += torch.mean(torch.sum(torch.pow(score_fake - 1.0, 2), dim=[1, 2]))
                        adv_loss += criterion(score_fake,
                                              torch.ones_like(score_fake))
                    adv_loss = adv_loss / len(disc_fake)  # len(disc_fake) = 3

                    # MPD Adverserial loss
                    out1, out2, out3, out4, out5 = model_d_mpd(fake_audio)
                    adv_mpd_loss = criterion(out1, torch.ones_like(out1)) + criterion(out2, torch.ones_like(out2)) + \
                                        criterion(out3, torch.ones_like(out3)) + criterion(out4, torch.ones_like(out4)) + \
                                        criterion(out5, torch.ones_like(out5))
                    adv_mpd_loss = adv_mpd_loss / 5
                    adv_loss = adv_loss + adv_mpd_loss  # Adv Loss

                    # Mel Loss
                    mel_fake = stft.mel_spectrogram(fake_audio.squeeze(1))
                    loss_mel += l1loss(melG[:, :, :mel_fake.size(2)],
                                       mel_fake.cuda())  # Mel L1 loss
                    loss_g += hp.model.lambda_mel * loss_mel

                    if hp.model.feat_loss:
                        for (feats_fake,
                             score_fake), (feats_real,
                                           _) in zip(disc_fake, disc_real):
                            for feat_f, feat_r in zip(feats_fake, feats_real):
                                adv_loss += hp.model.feat_match * torch.mean(
                                    torch.abs(feat_f - feat_r))

                    loss_g += hp.model.lambda_adv * adv_loss

                loss_g.backward()
                optim_g.step()

                # discriminator
                loss_d_avg = 0.0
                if step > hp.train.discriminator_train_start_steps:
                    fake_audio = model_g(melD)[:, :, :hp.audio.segment_length]
                    fake_audio = fake_audio.detach()
                    loss_d_sum = 0.0
                    for _ in range(hp.train.rep_discriminator):
                        optim_d.zero_grad()
                        disc_fake = model_d(fake_audio)
                        disc_real = model_d(audioD)
                        loss_d = 0.0
                        loss_d_real = 0.0
                        loss_d_fake = 0.0
                        for (_, score_fake), (_, score_real) in zip(
                                disc_fake, disc_real):
                            loss_d_real += criterion(
                                score_real, torch.ones_like(score_real))
                            loss_d_fake += criterion(
                                score_fake, torch.zeros_like(score_fake))
                        loss_d_real = loss_d_real / len(
                            disc_real)  # len(disc_real) = 3
                        loss_d_fake = loss_d_fake / len(
                            disc_fake)  # len(disc_fake) = 3
                        loss_d += loss_d_real + loss_d_fake  # MSD loss

                        loss_d_sum += loss_d

                        # MPD Adverserial loss
                        out1, out2, out3, out4, out5 = model_d_mpd(fake_audio)
                        out1_real, out2_real, out3_real, out4_real, out5_real = model_d_mpd(
                            audioD)
                        loss_mpd_fake = criterion(out1, torch.zeros_like(out1)) + criterion(out2, torch.zeros_like(out2)) + \
                                            criterion(out3, torch.zeros_like(out3)) + criterion(out4, torch.zeros_like(out4)) + \
                                            criterion(out5, torch.zeros_like(out5))
                        loss_mpd_real = criterion(out1_real, torch.ones_like(out1_real)) + criterion(out2_real, torch.ones_like(out2_real)) + \
                                            criterion(out3_real, torch.ones_like(out3_real)) + criterion(out4_real, torch.ones_like(out4_real)) + \
                                            criterion(out5_real, torch.ones_like(out5_real))
                        loss_mpd = (loss_mpd_fake +
                                    loss_mpd_real) / 5  # MPD Loss
                        loss_d += loss_mpd
                        loss_d.backward()
                        optim_d.step()
                        loss_d_sum += loss_mpd

                    loss_d_avg = loss_d_sum / hp.train.rep_discriminator
                    loss_d_avg = loss_d_avg.item()

                step += 1
                # logging
                loss_g = loss_g.item()
                avg_g_loss.append(loss_g)
                avg_d_loss.append(loss_d_avg)
                avg_adv_loss.append(adv_loss)

                if any([
                        loss_g > 1e8,
                        math.isnan(loss_g), loss_d_avg > 1e8,
                        math.isnan(loss_d_avg)
                ]):
                    logger.error("loss_g %.01f loss_d_avg %.01f at step %d!" %
                                 (loss_g, loss_d_avg, step))
                    raise Exception("Loss exploded")

                if step % hp.log.summary_interval == 0:
                    writer.log_training(loss_g, loss_d_avg, adv_loss, loss_mel,
                                        step)
                    loader.set_description(
                        "Avg : g %.04f d %.04f ad %.04f| step %d" %
                        (sum(avg_g_loss) / len(avg_g_loss),
                         sum(avg_d_loss) / len(avg_d_loss),
                         sum(avg_adv_loss) / len(avg_adv_loss), step))
            if epoch % hp.log.save_interval == 0:
                save_path = os.path.join(pt_dir,
                                         '%s_%04d.pt' % (args.name, epoch))
                torch.save(
                    {
                        'model_g': model_g.state_dict(),
                        'model_d': model_d.state_dict(),
                        'model_d_mpd': model_d_mpd.state_dict(),
                        'optim_g': optim_g.state_dict(),
                        'optim_d': optim_d.state_dict(),
                        'step': step,
                        'epoch': epoch,
                        'hp_str': hp_str
                    }, save_path)
                logger.info("Saved checkpoint to: %s" % save_path)

    except Exception as e:
        logger.info("Exiting due to exception: %s" % e)
        traceback.print_exc()
def main_worker(local_rank, nprocs, args):
    args.local_rank = local_rank
    init_seeds(local_rank+1)
    # 获得init_method的通信端口
    init_method = 'tcp://' + args.ip + ':' + args.port

    # 1. 分布式初始化,对于每一个进程都需要进行初始化,所以定义在 main_worker中
    cudnn.benchmark = True
    dist.init_process_group(backend='nccl', init_method=init_method, world_size=args.nprocs,
                            rank=local_rank)

    # 2. 基本定义,模型-损失函数-优化器
    model = resnet18()
    torch.cuda.set_device(local_rank)
    model.cuda(local_rank)
    criterion = nn.CrossEntropyLoss().cuda(local_rank)
    optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=0.9, weight_decay=1e-4)
    train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160], gamma=0.2)

    # apex初始化
    model = apex.parallel.convert_syncbn_model(model).to(local_rank) # 使用 apex 提供的 SyncBatchNorm 操作
    model, optimizer = amp.initialize(model, optimizer)
    model = DDP(model)

    # 3. 加载数据,
    batch_size = int(args.batch_size / nprocs)

    train_dataset = get_train_dataset()
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=4, pin_memory=True, sampler=train_sampler)

    test_dataset = get_test_dataset()
    test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, num_workers=4, pin_memory=True, sampler=test_sampler)

    for epoch in range(args.epochs):
        start = time.time()
        model.train()
        train_sampler.set_epoch(epoch)
        train_scheduler.step(epoch)

        for step, (images, labels) in enumerate(train_loader):
            # 将对应进程的数据放到对应 GPU 上
            images = images.cuda(local_rank, non_blocking=True)
            labels = labels.cuda(local_rank, non_blocking=True)

            outputs = model(images)
            loss = criterion(outputs, labels)

            torch.distributed.barrier()
            reduced_loss = reduce_mean(loss, args.nprocs)

            # 更新优化模型权重, 用scale_loss修饰loss
            optimizer.zero_grad()
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            optimizer.step()

            if args.local_rank == 0:
                print(
                    'Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format(
                        reduced_loss,
                        optimizer.param_groups[0]['lr'],
                        epoch=epoch+1,
                        trained_samples=step * args.batch_size + len(images),
                        total_samples=len(train_loader.dataset)
                    ))

        finish = time.time()
        if args.local_rank == 0:
            print('epoch {} training time consumed: {:.2f}s'.format(epoch, finish - start))

        # validate after every epoch
        validate(test_loader, model, criterion, local_rank, args)
Exemple #7
0
def main_worker(local_rank, nprocs, args):
    args.local_rank = local_rank
    init_seeds(local_rank + 1)  # set different seed for each worker
    # 获得init_method的通信端口
    init_method = 'tcp://' + args.ip + ':' + args.port

    # 1. 分布式初始化,对于每一个进程都需要进行初始化,所以定义在 main_worker中
    cudnn.benchmark = True
    dist.init_process_group(backend='nccl',
                            init_method=init_method,
                            world_size=args.nprocs,
                            rank=local_rank)

    # 2. 基本定义,模型-损失函数-优化器
    model = resnet18(
    )  # 定义模型,将对应进程放到对应的GPU上, .cuda(local_rank) / .set_device(local_rank)

    # 以下是需要加 local_rank 的部分:模型
    # ================================
    torch.cuda.set_device(local_rank)  # 使用 set_device 和 cuda 来指定需要的 GPU
    model.cuda(local_rank)
    model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(local_rank)
    model = torch.nn.parallel.DistributedDataParallel(
        model, device_ids=[local_rank])  # 将模型用 DistributedDataParallel 包裹
    # =================================
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=0.9,
                                weight_decay=1e-4)
    train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[60, 120, 160],
                                                     gamma=0.2)

    # 3. 加载数据,
    batch_size = int(args.batch_size /
                     nprocs)  # 需要手动划分 batch_size 为 mini-batch_size

    train_dataset = get_train_dataset()
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               num_workers=4,
                                               pin_memory=True,
                                               sampler=train_sampler)

    test_dataset = get_test_dataset()
    test_sampler = torch.utils.data.distributed.DistributedSampler(
        test_dataset)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size,
                                              num_workers=4,
                                              pin_memory=True,
                                              sampler=test_sampler)

    for epoch in range(args.epochs):
        start = time.time()
        model.train()
        # 需要设置sampler的epoch为当前epoch来保证dataloader的shuffle的有效性
        train_sampler.set_epoch(epoch)

        # 设置 train_scheduler 来调整学习率
        train_scheduler.step(epoch)

        for step, (images, labels) in enumerate(train_loader):
            # 将对应进程的数据放到 GPU 上
            images = images.cuda(non_blocking=True)
            labels = labels.cuda(non_blocking=True)

            outputs = model(images)
            loss = criterion(outputs, labels)

            # torch.distributed.barrier()的作用是,阻塞进程,保证每个进程运行完这一行代码之前的所有代码,才能继续执行,这样才计算平均loss和平均acc的时候不会出现因为进程执行速度不一致的错误
            torch.distributed.barrier()
            reduced_loss = reduce_mean(loss, args.nprocs)

            # 更新优化模型权重
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if args.local_rank == 0:
                print(
                    'Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'
                    .format(reduced_loss,
                            optimizer.param_groups[0]['lr'],
                            epoch=epoch + 1,
                            trained_samples=step * args.batch_size +
                            len(images),
                            total_samples=len(train_loader.dataset)))

        finish = time.time()
        if args.local_rank == 0:
            print('epoch {} training time consumed: {:.2f}s'.format(
                epoch, finish - start))

        # validate after every epoch
        validate(test_loader, model, criterion, local_rank, args)