コード例 #1
0
def main(config, cuda, gpu):
    # Configuration
    CONFIG = Dict(yaml.load(open(config)))

    # CUDA check
    cuda = cuda and torch.cuda.is_available()

    if cuda:
        gpu_ids = [int(string) for string in gpu.split(',')]
        current_device = torch.cuda.current_device()
        print('Running on', torch.cuda.get_device_name(current_device),
              gpu_ids)

    # Dataset
    dataset = CocoStuff10k(
        root=CONFIG.ROOT,
        split='train',
        image_size=513,
        crop_size=CONFIG.IMAGE.SIZE.TRAIN,
        scale=True,
        flip=True,
    )

    # DataLoader
    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=CONFIG.BATCH_SIZE,
        num_workers=CONFIG.NUM_WORKERS,
        shuffle=True,
    )
    loader_iter = iter(loader)

    # Model
    model = DeepLabV2_ResNet101_MSC(n_classes=CONFIG.N_CLASSES)
    state_dict = torch.load(CONFIG.INIT_MODEL)
    model.load_state_dict(state_dict, strict=False)  # Skip "aspp" layer
    model = nn.DataParallel(model, device_ids=gpu_ids)
    if cuda:
        model.cuda()

    # Optimizer
    optimizer = {
        'sgd':
        torch.optim.SGD(
            # cf lr_mult and decay_mult in train.prototxt
            params=[{
                'params': get_lr_params(model.module, key='1x'),
                'lr': CONFIG.LR,
                'weight_decay': CONFIG.WEIGHT_DECAY
            }, {
                'params': get_lr_params(model.module, key='10x'),
                'lr': 10 * CONFIG.LR,
                'weight_decay': CONFIG.WEIGHT_DECAY
            }, {
                'params': get_lr_params(model.module, key='20x'),
                'lr': 20 * CONFIG.LR,
                'weight_decay': 0.0
            }],
            momentum=CONFIG.MOMENTUM,
        ),
    }.get(CONFIG.OPTIMIZER)

    # Loss definition
    criterion = CrossEntropyLoss2d(ignore_index=CONFIG.IGNORE_LABEL)
    if cuda:
        criterion.cuda()

    # TensorBoard Logger
    writer = SummaryWriter(CONFIG.LOG_DIR)
    loss_meter = MovingAverageValueMeter(20)

    model.train()
    model.module.scale.freeze_bn()

    for iteration in tqdm(
            range(1, CONFIG.ITER_MAX + 1),
            total=CONFIG.ITER_MAX,
            leave=False,
            dynamic_ncols=True,
    ):

        # Set a learning rate
        poly_lr_scheduler(
            optimizer=optimizer,
            init_lr=CONFIG.LR,
            iter=iteration - 1,
            lr_decay_iter=CONFIG.LR_DECAY,
            max_iter=CONFIG.ITER_MAX,
            power=CONFIG.POLY_POWER,
        )

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        iter_loss = 0
        for i in range(1, CONFIG.ITER_SIZE + 1):
            data, target = next(loader_iter)

            # Image
            data = data.cuda() if cuda else data
            data = Variable(data)

            # Propagate forward
            outputs = model(data)

            # Loss
            loss = 0
            for output in outputs:
                # Resize target for {100%, 75%, 50%, Max} outputs
                target_ = resize_target(target, output.size(2))
                target_ = target_.cuda() if cuda else target_
                target_ = Variable(target_)
                # Compute crossentropy loss
                loss += criterion(output, target_)

            # Backpropagate (just compute gradients wrt the loss)
            loss /= float(CONFIG.ITER_SIZE)
            loss.backward()

            iter_loss += loss.data[0]

            # Reload dataloader
            if ((iteration - 1) * CONFIG.ITER_SIZE + i) % len(loader) == 0:
                loader_iter = iter(loader)

        loss_meter.add(iter_loss)

        # Update weights with accumulated gradients
        optimizer.step()

        # TensorBoard
        if iteration % CONFIG.ITER_TF == 0:
            writer.add_scalar('train_loss', loss_meter.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar('train_lr_group{}'.format(i), o['lr'],
                                  iteration)
            if iteration % 1000 != 0:
                continue
            for name, param in model.named_parameters():
                name = name.replace('.', '/')
                writer.add_histogram(name, param, iteration, bins="auto")
                if param.requires_grad:
                    writer.add_histogram(name + '/grad',
                                         param.grad,
                                         iteration,
                                         bins="auto")

        # Save a model
        if iteration % CONFIG.ITER_SNAP == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR,
                         'checkpoint_{}.pth'.format(iteration)),
            )

        # Save a model
        if iteration % 100 == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR, 'checkpoint_current.pth'),
            )

    torch.save(
        model.module.state_dict(),
        osp.join(CONFIG.SAVE_DIR, 'checkpoint_final.pth'),
    )
コード例 #2
0
class Generator(nn.Module):
    def __init__(self, hyperparameters, log_dir, gpu_id):
        super(Generator, self).__init__()
        self.hyp = hyperparameters
        print(hyperparameters)
        self.gpu_id = gpu_id
        self.noise_dim = self.hyp['noise_dim']
        self.vis_noise = torch.randn(1, self.hyp['noise_dim']).cuda(
            self.gpu_id).requires_grad_(False)
        self.g_loss_meter = MovingAverageValueMeter(5)
        self.log_dir = log_dir

        # Architecture:
        self.lab0 = nn.Linear(1, self.hyp['p1'], bias=False)
        self.fc0 = nn.Linear(self.noise_dim, self.hyp['p2'], bias=False)
        self.nonlin0 = nn.Sequential(*[
            nn.BatchNorm2d(self.hyp['p1'] + self.hyp['p2']),
            nn.LeakyReLU(self.hyp['lrelu_g'])
        ] if self.hyp['bg0'] else [
            nn.LeakyReLU(self.hyp['lrelu_g']),
        ])

        self.conv1 = nn.ConvTranspose2d(self.hyp['p1'] + self.hyp['p2'],
                                        self.hyp['p3'], (1, 55),
                                        bias=True)
        self.nonlin1 = nn.Sequential(*[
            nn.BatchNorm2d(self.hyp['p3']),
            nn.LeakyReLU(self.hyp['lrelu_g'])
        ] if self.hyp['bg1'] else [
            nn.LeakyReLU(self.hyp['lrelu_g']),
        ])

        self.conv2 = nn.ConvTranspose2d(self.hyp['p3'], 1, (55, 1), bias=True)
        self.sigmoid = nn.Tanh()

        self.cuda(self.gpu_id)
        opt_param_list = [{
            'params': [
                param for name, param in self.named_parameters()
                if 'lab0' not in name
            ]
        }, {
            'params': self.lab0.parameters(),
            'lr': 1 * self.hyp['lr_g']
        }]

        self.optimizer = torch.optim.Adam(opt_param_list,
                                          lr=self.hyp['lr_g'],
                                          betas=(self.hyp['b1_g'],
                                                 self.hyp['b2_g']),
                                          weight_decay=self.hyp['wd_g'])
        # rand init
        for m in self.modules():
            if isinstance(m, nn.ConvTranspose2d):
                torch.nn.init.kaiming_normal_(m.weight,
                                              a=self.hyp['lrelu_g'],
                                              nonlinearity='leaky_relu')
                if not m.bias is None:
                    torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                torch.nn.init.kaiming_normal_(m.weight)
                if not m.bias is None:
                    torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                torch.nn.init.constant_(m.weight, 1)
                torch.nn.init.constant_(m.bias, 0)

    def forward(self, z, labels):
        x = z.view(-1, self.noise_dim)
        labels = labels.view(-1, 1).float() * 2 - 1
        x = torch.cat([
            self.fc0(x).view(-1, self.hyp['p2'], 1, 1),
            self.lab0(labels).view(-1, self.hyp['p1'], 1, 1)
        ], 1)
        x = self.nonlin0(x)
        x = self.conv1(x)
        x = self.nonlin1(x)
        x = self.conv2(x)
        x = (x + torch.transpose(x, -1, -2)) / 2
        x = self.sigmoid(x)
        return x

    def train_step(self, netd):
        self.zero_grad()

        self.fake_labels = torch.randint(0, 2,
                                         (self.hyp['batch_size'], )).type(
                                             torch.float).cuda(self.gpu_id)

        self.noise = torch.randn(self.hyp['batch_size'],
                                 self.hyp['noise_dim']).cuda(self.gpu_id)
        self.g = self(self.noise, self.fake_labels)
        self.g_cost = -netd(self.g, self.fake_labels).mean()
        self.g_cost.backward()
        self.optimizer.step()
        self.g_loss_meter.add(self.g_cost.detach().cpu())

    def generate_fake_images(self, num_images):
        self.eval()
        labels = (torch.randint(0, 2, (num_images, ))).type(torch.long).cuda(
            self.gpu_id)
        noise = torch.randn(num_images, self.hyp['noise_dim']).cuda(
            self.gpu_id).requires_grad_(False)
        images = self(noise, labels).detach()
        self.train()
        return (images, labels)

    def visualize_gen_images(self, global_step):
        """
        Saves sample of generated images to a eps and png file. Note that the noise input of
        the generator for visualizing is the same during training.
        :param global_step:
        :return:
        """
        self.eval()

        noise = torch.cat([self.vis_noise, self.vis_noise], 0)

        labels = (torch.from_numpy(np.array([0, 1]))).type(torch.long).view(
            -1, 1).cuda(self.gpu_id).requires_grad_(False)
        samples = self(noise, labels)

        i = str(global_step)
        os.makedirs(os.path.join(self.log_dir, 'vis_imgs'), exist_ok=True)
        filename = os.path.join(self.log_dir, 'vis_imgs', 'gen_img_it_' + i)
        b, chs, h, w = samples.shape
        imgs = samples.view(b, h, w).detach().cpu().data.numpy()
        np.save(filename + '.npy', imgs)
        labels = labels.view(b).detach().cpu().data.numpy()
        fig = plt.figure()
        for i in range(b):
            plt.subplot(1, 2, i + 1)
            plt.imshow(imgs[i],
                       cmap='jet',
                       interpolation='nearest',
                       vmin=-1,
                       vmax=1)
            plt.title('Sex: ' + ['Female', 'Male'][labels[i]])
            plt.axis('off')
        plt.savefig(filename + '.eps')
        plt.savefig(filename + '.png')
        plt.close()
        self.train()
コード例 #3
0
def main(config, cuda):
    device = torch.device("cuda" if cuda and torch.cuda.is_available() else "cpu")

    if cuda:
        current_device = torch.cuda.current_device()
        print("Running on", torch.cuda.get_device_name(current_device))
    else:
        print("Running on CPU")

    # Configuration
    CONFIG = Dict(yaml.load(open(config)))

    # Dataset
    dataset = CocoStuff10k(
        root=CONFIG.ROOT,
        split="train",
        image_size=513,
        crop_size=CONFIG.IMAGE.SIZE.TRAIN,
        scale=True,
        flip=True,
    )

    # DataLoader
    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=CONFIG.BATCH_SIZE,
        num_workers=CONFIG.NUM_WORKERS,
        shuffle=True,
    )
    loader_iter = iter(loader)

    # Model
    model = DeepLabV2_ResNet101_MSC(n_classes=CONFIG.N_CLASSES)
    state_dict = torch.load(CONFIG.INIT_MODEL)
    model.load_state_dict(state_dict, strict=False)  # Skip "aspp" layer
    model = nn.DataParallel(model)
    model.to(device)

    # Optimizer
    optimizer = {
        "sgd": torch.optim.SGD(
            # cf lr_mult and decay_mult in train.prototxt
            params=[
                {
                    "params": get_lr_params(model.module, key="1x"),
                    "lr": CONFIG.LR,
                    "weight_decay": CONFIG.WEIGHT_DECAY,
                },
                {
                    "params": get_lr_params(model.module, key="10x"),
                    "lr": 10 * CONFIG.LR,
                    "weight_decay": CONFIG.WEIGHT_DECAY,
                },
                {
                    "params": get_lr_params(model.module, key="20x"),
                    "lr": 20 * CONFIG.LR,
                    "weight_decay": 0.0,
                },
            ],
            momentum=CONFIG.MOMENTUM,
        )
    }.get(CONFIG.OPTIMIZER)

    # Loss definition
    criterion = CrossEntropyLoss2d(ignore_index=CONFIG.IGNORE_LABEL)
    criterion.to(device)

    # TensorBoard Logger
    writer = SummaryWriter(CONFIG.LOG_DIR)
    loss_meter = MovingAverageValueMeter(20)

    model.train()
    model.module.scale.freeze_bn()

    for iteration in tqdm(
        range(1, CONFIG.ITER_MAX + 1),
        total=CONFIG.ITER_MAX,
        leave=False,
        dynamic_ncols=True,
    ):

        # Set a learning rate
        poly_lr_scheduler(
            optimizer=optimizer,
            init_lr=CONFIG.LR,
            iter=iteration - 1,
            lr_decay_iter=CONFIG.LR_DECAY,
            max_iter=CONFIG.ITER_MAX,
            power=CONFIG.POLY_POWER,
        )

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        iter_loss = 0
        for i in range(1, CONFIG.ITER_SIZE + 1):
            try:
                data, target = next(loader_iter)
            except:
                loader_iter = iter(loader)
                data, target = next(loader_iter)

            # Image
            data = data.to(device)

            # Propagate forward
            outputs = model(data)

            # Loss
            loss = 0
            for output in outputs:
                # Resize target for {100%, 75%, 50%, Max} outputs
                target_ = resize_target(target, output.size(2))
                target_ = target_.to(device)
                # Compute crossentropy loss
                loss += criterion(output, target_)

            # Backpropagate (just compute gradients wrt the loss)
            loss /= float(CONFIG.ITER_SIZE)
            loss.backward()

            iter_loss += float(loss)

        loss_meter.add(iter_loss)

        # Update weights with accumulated gradients
        optimizer.step()

        # TensorBoard
        if iteration % CONFIG.ITER_TF == 0:
            writer.add_scalar("train_loss", loss_meter.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar("train_lr_group{}".format(i), o["lr"], iteration)
            # for name, param in model.named_parameters():
            #     name = name.replace('.', '/')
            #     writer.add_histogram(name, param, iteration, bins="auto")
            #     if param.requires_grad:
            #         writer.add_histogram(name + '/grad', param.grad, iteration, bins="auto")

        # Save a model
        if iteration % CONFIG.ITER_SNAP == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR, "checkpoint_{}.pth".format(iteration)),
            )

        # Save a model
        if iteration % 100 == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR, "checkpoint_current.pth"),
            )

    torch.save(
        model.module.state_dict(), osp.join(CONFIG.SAVE_DIR, "checkpoint_final.pth")
    )
コード例 #4
0
 def generateTB(self, period):
     self.writer = SummaryWriter(self.savepath + '/runs')
     self.loss_meter = MovingAverageValueMeter(20)
     self.tb = period
コード例 #5
0
ファイル: plot.py プロジェクト: lotharschulz/DL
train_file = "train_loss.txt"
text_file = "test_loss.txt"
save_file = "loss_curve.pdf"

train_f = open(train_file)
train_d = train_f.readlines()
train_f.close()

valid_f = open(text_file)
valid_d = valid_f.readlines()
valid_f.close()

train_iter = []
train_loss = []
i = 0
ma_loss = MovingAverageValueMeter(windowsize=500)
for s in train_d:
    i = i + 1
    t = s.strip().split(' ')
    t_iter = int(t[0])
    ma_loss.add(float(t[1]))
    if i % 500 == 0:
        train_iter.append(t_iter)
        train_loss.append(ma_loss.value()[0])

valid_iter = []
valid_loss = []
i = 0
for s in valid_d:
    i = i + 1
    if i >= 0:
コード例 #6
0
ファイル: cygan_model.py プロジェクト: yuhf98/MusicCritique
    def train(self):
        torch.cuda.empty_cache()

        ######################
        # Save / Load model
        ######################

        if self.opt.continue_train:
            try:
                self.continue_from_latest_checkpoint()
            except CyganException as e:
                self.logger.error(e)
                self.opt.continue_train = False
                self.reset_save()

        else:
            self.reset_save()

        self.add_file_logger()

        ######################
        # Dataset
        ######################

        if self.opt.model == 'base':
            dataset = SteelyDataset(self.opt.genreA,
                                    self.opt.genreB,
                                    self.opt.phase,
                                    use_mix=False)
        else:
            dataset = SteelyDataset(self.opt.genreA,
                                    self.opt.genreB,
                                    self.opt.phase,
                                    use_mix=True)

        dataset_size = len(dataset)
        iter_num = int(dataset_size / self.opt.batch_size)

        self.logger.info(
            f'Dataset loaded, genreA: {self.opt.genreA}, genreB: {self.opt.genreB}, total size: {dataset_size}.'
        )

        ######################
        # Initiate
        ######################

        lambda_A = 10.0  # weight for cycle loss (A -> B -> A^)
        lambda_B = 10.0  # weight for cycle loss (B -> A -> B^)

        lambda_identity = 0.5

        criterionGAN = GANLoss(gan_mode='lsgan')

        criterionCycle = nn.L1Loss()

        criterionIdt = nn.L1Loss()

        GLoss_meter = MovingAverageValueMeter(self.opt.plot_every)
        DLoss_meter = MovingAverageValueMeter(self.opt.plot_every)
        CycleLoss_meter = MovingAverageValueMeter(self.opt.plot_every)

        # loss meters
        losses = {}
        scores = {}

        losses_dict = {'loss_G': [], 'loss_D': [], 'loss_C': [], 'epoch': []}

        ######################
        # Start Training
        ######################

        for epoch in range(self.opt.start_epoch, self.opt.max_epoch):
            loader = DataLoader(dataset,
                                batch_size=self.opt.batch_size,
                                shuffle=True,
                                num_workers=self.opt.num_threads,
                                drop_last=True)
            epoch_start_time = time.time()

            for i, data in enumerate(loader):

                real_A = torch.unsqueeze(data[:, 0, :, :],
                                         1).to(self.device, dtype=torch.float)
                real_B = torch.unsqueeze(data[:, 1, :, :],
                                         1).to(self.device, dtype=torch.float)

                gaussian_noise = torch.abs(
                    torch.normal(mean=torch.zeros(self.opt.data_shape),
                                 std=self.opt.gaussian_std)).to(
                                     self.device, dtype=torch.float)

                if self.opt.model == 'base':

                    ######################
                    # Generator
                    ######################

                    fake_B = self.generator_A2B(real_A)  # X -> Y'
                    fake_A = self.generator_B2A(real_B)  # Y -> X'

                    fake_B_copy = copy.copy(fake_B)
                    fake_A_copy = copy.copy(fake_A)

                    DB_fake = self.discriminator_B(
                        fake_B +
                        gaussian_noise)  # netD_x provide feedback to netG_x
                    DA_fake = self.discriminator_A(fake_A + gaussian_noise)

                    loss_G_A2B = criterionGAN(DB_fake, True)
                    loss_G_B2A = criterionGAN(DA_fake, True)

                    # cycle_consistence
                    cycle_A = self.generator_B2A(fake_B)  # Y' -> X^
                    cycle_B = self.generator_A2B(fake_A)  # Y -> X' -> Y^

                    loss_cycle_A2B = criterionCycle(cycle_A, real_A) * lambda_A
                    loss_cycle_B2A = criterionCycle(cycle_B, real_B) * lambda_B

                    # identity loss
                    if lambda_identity > 0:
                        # netG_x should be identity if real_y is fed: ||netG_x(real_y) - real_y||
                        idt_A = self.generator_A2B(real_B)
                        idt_B = self.generator_B2A(real_A)
                        loss_idt_A = criterionIdt(
                            idt_A, real_B) * lambda_A * lambda_identity
                        loss_idt_B = criterionIdt(
                            idt_B, real_A) * lambda_A * lambda_identity

                    else:
                        loss_idt_A = 0.
                        loss_idt_B = 0.

                    loss_idt = loss_idt_A + loss_idt_B

                    self.GA2B_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero
                    loss_A2B = loss_G_A2B + loss_cycle_A2B + loss_idt_A
                    loss_A2B.backward(retain_graph=True)
                    self.GA2B_optimizer.step()

                    self.GB2A_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero
                    loss_B2A = loss_G_B2A + loss_cycle_B2A + loss_idt_B
                    loss_B2A.backward(retain_graph=True)
                    self.GB2A_optimizer.step()

                    cycle_loss = loss_cycle_A2B + loss_cycle_B2A
                    CycleLoss_meter.add(cycle_loss.item())

                    loss_G = loss_G_A2B + loss_G_B2A + loss_idt
                    GLoss_meter.add(loss_G.item())

                    ######################
                    # Sample
                    ######################
                    fake_A_sample, fake_B_sample = (None, None)
                    if self.opt.use_image_pool:
                        [fake_A_sample,
                         fake_B_sample] = self.pool([fake_A_copy, fake_B_copy])

                    ######################
                    # Discriminator
                    ######################

                    # loss_real
                    DA_real = self.discriminator_A(real_A + gaussian_noise)
                    DB_real = self.discriminator_B(real_B + gaussian_noise)

                    loss_DA_real = criterionGAN(DA_real, True)
                    loss_DB_real = criterionGAN(DB_real, True)

                    # loss fake
                    if self.opt.use_image_pool:
                        DA_fake_sample = self.discriminator_A(fake_A_sample +
                                                              gaussian_noise)
                        DB_fake_sample = self.discriminator_B(fake_B_sample +
                                                              gaussian_noise)

                        loss_DA_fake = criterionGAN(DA_fake_sample, False)
                        loss_DB_fake = criterionGAN(DB_fake_sample, False)

                    else:
                        loss_DA_fake = criterionGAN(DA_fake, False)
                        loss_DB_fake = criterionGAN(DB_fake, False)

                    # loss and backward
                    self.DA_optimizer.zero_grad()
                    loss_DA = (loss_DA_real + loss_DA_fake) * 0.5
                    loss_DA.backward()
                    self.DA_optimizer.step()

                    self.DB_optimizer.zero_grad()
                    loss_DB = (loss_DB_real + loss_DB_fake) * 0.5
                    loss_DB.backward()
                    self.DB_optimizer.step()

                    loss_D = loss_DA + loss_DB
                    DLoss_meter.add(loss_D.item())

                else:
                    real_mixed = torch.unsqueeze(data[:, 2, :, :],
                                                 1).to(self.device,
                                                       dtype=torch.float)

                    ######################
                    # Generator
                    ######################

                    fake_B = self.generator_A2B(real_A)  # X -> Y'
                    fake_A = self.generator_B2A(real_B)  # Y -> X'

                    fake_B_copy = fake_B.detach().clone()
                    fake_A_copy = fake_A.detach().clone()

                    DB_fake = self.discriminator_B(
                        fake_B +
                        gaussian_noise)  # netD_x provide feedback to netG_x
                    DA_fake = self.discriminator_A(fake_A + gaussian_noise)

                    loss_G_A2B = criterionGAN(DB_fake, True)
                    loss_G_B2A = criterionGAN(DA_fake, True)

                    # cycle_consistence
                    cycle_A = self.generator_B2A(fake_B)  # Y' -> X^
                    cycle_B = self.generator_A2B(fake_A)  # Y -> X' -> Y^

                    loss_cycle_A2B = criterionCycle(cycle_A, real_A) * lambda_A
                    loss_cycle_B2A = criterionCycle(cycle_B, real_B) * lambda_B

                    # identity loss
                    if lambda_identity > 0:
                        # netG_x should be identity if real_y is fed: ||netG_x(real_y) - real_y||
                        idt_A = self.generator_A2B(real_B)
                        idt_B = self.generator_B2A(real_A)
                        loss_idt_A = criterionIdt(
                            idt_A, real_B) * lambda_A * lambda_identity
                        loss_idt_B = criterionIdt(
                            idt_B, real_A) * lambda_A * lambda_identity

                    else:
                        loss_idt_A = 0.
                        loss_idt_B = 0.

                    loss_idt = loss_idt_A + loss_idt_B

                    self.GA2B_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero
                    loss_A2B = loss_G_A2B + loss_cycle_A2B + loss_idt_A
                    loss_A2B.backward(retain_graph=True)
                    self.GA2B_optimizer.step()

                    self.GB2A_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero
                    loss_B2A = loss_G_B2A + loss_cycle_B2A + loss_idt_B
                    loss_B2A.backward(retain_graph=True)
                    self.GB2A_optimizer.step()

                    cycle_loss = loss_cycle_A2B + loss_cycle_B2A
                    CycleLoss_meter.add(cycle_loss.item())

                    loss_G = loss_G_A2B + loss_G_B2A + loss_idt
                    GLoss_meter.add(loss_G.item())

                    ######################
                    # Sample
                    ######################
                    fake_A_sample, fake_B_sample = (None, None)
                    if self.opt.use_image_pool:
                        [fake_A_sample,
                         fake_B_sample] = self.pool([fake_A_copy, fake_B_copy])

                    ######################
                    # Discriminator
                    ######################

                    # loss_real
                    DA_real = self.discriminator_A(real_A + gaussian_noise)
                    DB_real = self.discriminator_B(real_B + gaussian_noise)

                    DA_real_all = self.discriminator_A_all(real_mixed +
                                                           gaussian_noise)
                    DB_real_all = self.discriminator_B_all(real_mixed +
                                                           gaussian_noise)

                    loss_DA_real = criterionGAN(DA_real, True)
                    loss_DB_real = criterionGAN(DB_real, True)

                    loss_DA_all_real = criterionGAN(DA_real_all, True)
                    loss_DB_all_real = criterionGAN(DB_real_all, True)

                    # loss fake
                    if self.opt.use_image_pool:
                        DA_fake_sample = self.discriminator_A(fake_A_sample +
                                                              gaussian_noise)
                        DB_fake_sample = self.discriminator_B(fake_B_sample +
                                                              gaussian_noise)

                        DA_fake_sample_all = self.discriminator_A_all(
                            fake_A_sample + gaussian_noise)
                        DB_fake_sample_all = self.discriminator_B_all(
                            fake_B_sample + gaussian_noise)

                        loss_DA_all_fake = criterionGAN(
                            DA_fake_sample_all, False)
                        loss_DB_all_fake = criterionGAN(
                            DB_fake_sample_all, False)

                        loss_DA_fake = criterionGAN(DA_fake_sample, False)
                        loss_DB_fake = criterionGAN(DB_fake_sample, False)

                    else:
                        DA_fake_all = self.discriminator_A_all(fake_A_copy +
                                                               gaussian_noise)
                        DB_fake_all = self.discriminator_B_all(fake_B_copy +
                                                               gaussian_noise)

                        loss_DA_all_fake = criterionGAN(DA_fake_all, False)
                        loss_DB_all_fake = criterionGAN(DB_fake_all, False)

                        loss_DA_fake = criterionGAN(DA_fake, False)
                        loss_DB_fake = criterionGAN(DB_fake, False)

                    # loss and backward
                    self.DA_optimizer.zero_grad()
                    loss_DA = (loss_DA_real + loss_DA_fake) * 0.5
                    loss_DA.backward()
                    self.DA_optimizer.step()

                    self.DB_optimizer.zero_grad()
                    loss_DB = (loss_DB_real + loss_DB_fake) * 0.5
                    loss_DB.backward()
                    self.DB_optimizer.step()

                    self.DA_all_optimizer.zero_grad()
                    loss_DA_all = (loss_DA_all_real + loss_DA_all_fake) * 0.5
                    loss_DA_all.backward()
                    self.DA_all_optimizer.step()

                    self.DB_all_optimizer.zero_grad()
                    loss_DB_all = (loss_DB_all_real + loss_DB_all_fake) * 0.5
                    loss_DB_all.backward()
                    self.DB_all_optimizer.step()

                    loss_D = loss_DA + loss_DB + loss_DB_all + loss_DA_all
                    DLoss_meter.add(loss_D.item())

                ######################
                # Snapshot
                ######################

                if i % self.opt.plot_every == 0:
                    file_name = self.opt.name + '_snap_%03d_%05d.png' % (
                        epoch,
                        i,
                    )
                    # test_path = os.path.join(self.opt.checkpoint_path, file_name)
                    # tv.utils.save_image(fake_B, test_path, normalize=True)
                    # self.logger.info(f'Snapshot {file_name} saved.')

                    losses['loss_C'] = float(CycleLoss_meter.value()[0])
                    losses['loss_G'] = float(GLoss_meter.value()[0])
                    losses['loss_D'] = float(DLoss_meter.value()[0])

                    self.logger.info(str(losses))
                    self.logger.info('Epoch {} progress: {:.2%}\n'.format(
                        epoch, i / iter_num))

            # save model
            if epoch % self.opt.save_every == 0 or epoch == self.opt.max_epoch - 1:
                self.save_model(epoch)

            ######################
            # lr_scheduler
            ######################

            self.GA2B_scheduler.step(epoch)
            self.GB2A_scheduler.step(epoch)
            self.DA_scheduler.step(epoch)
            self.DB_scheduler.step(epoch)

            if self.opt.model != 'base':
                self.DA_all_scheduler.step(epoch)
                self.DB_all_scheduler.step(epoch)

            epoch_time = int(time.time() - epoch_start_time)

            ######################
            # Logging
            ######################

            self.logger.info(
                f'Epoch {epoch} finished, cost time {epoch_time}\n')
            self.logger.info(str(losses) + '\n\n')

            ######################
            # Loss_Dict
            ######################

            losses_dict['loss_C'].append(losses['loss_C'])
            losses_dict['loss_G'].append(losses['loss_G'])
            losses_dict['loss_D'].append(losses['loss_D'])
            losses_dict['epoch'].append(epoch)

            with open(self.opt.loss_save_path, 'w') as f:
                json.dump(losses_dict, f)
コード例 #7
0
ファイル: train.py プロジェクト: YuxinZou/deep-learning-lab
def train(**kwargs):
    opt._parse(kwargs)

    image_folder_path = 'DataSets/images/'
    cvs_file_path = 'DataSets/labels.csv'

    dataset = DataSets(cvs_file_path, image_folder_path)
    data_size = len(dataset)
    indices = list(range(data_size))
    split = int(np.floor(data_size * 0.2))
    np.random.seed(42)
    np.random.shuffle(indices)
    train_indices, val_indices = indices[split:], indices[:split]
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    valid_sampler = torch.utils.data.SubsetRandomSampler(val_indices)

    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=1,
                                               sampler=train_sampler)
    val_loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             sampler=valid_sampler)
    print('load data')

    avg_loss = AverageValueMeter()
    ma20_loss = MovingAverageValueMeter(windowsize=20)
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    start_epoch = 0
    best_map = -100
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    optimizer = optim.SGD(trainer.faster_rcnn.parameters(),
                          lr=opt.lr,
                          momentum=0.9)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    if opt.load_path:
        print('load pretrained model from %s' % opt.load_path)
        checkpoint = torch.load(opt.load_path)
        start_epoch = checkpoint['epoch']
        best_map = checkpoint['best_map']
        trainer.faster_rcnn.load_state_dict(checkpoint['model_state'])
        optimizer.load_state_dict(checkpoint['optimizer_state'])
        print("> Loaded checkpoint '{}' (epoch {})".format(
            args.resume, start_epoch))

    #trainer.vis.text(dataset.db.label_names, win='labels')

# set tensor-board for visualization
    writer = SummaryWriter('runs/' + opt.log_root)

    for epoch in range(start_epoch, opt.epoch):
        trainer.train(mode=True)  #must set as that in tranning
        for ii, (img, _, _, bbox_, label_, scale,
                 _) in enumerate(train_loader):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            optimizer.zero_grad()
            loss = trainer.forward(img, bbox, label, scale)
            loss.total_loss.backward()
            optimizer.step()
            #print(loss)
            #print(loss.total_loss)
            loss_value = loss.total_loss.cpu().data.numpy()
            avg_loss.add(float(loss_value))
            ma20_loss.add(float(loss_value))
            print(
                '[epoch:{}/{}]  [batch:{}/{}]  [sample_loss:{:.4f}] [avg_loss:{:.4f}]  [ma20_loss:{:.4f}]'
                .format(epoch, opt.epoch, ii + 1, len(train_loader),
                        loss.total_loss.data,
                        avg_loss.value()[0],
                        ma20_loss.value()[0]))

            if (ii + 1) % opt.plot_every == 0:
                niter = epoch * len(train_loader) + ii
                writer.add_scalar('Train/Loss', ma20_loss.value()[0], niter)

        eval_result = eval(val_loader, faster_rcnn, test_num=opt.test_num)
        print(eval_result['map'])

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            state = {
                "epoch": epoch + 1,
                "best_map": best_map,
                "model_state": trainer.faster_rcnn.state_dict(),
                "optimizer_state": optimizer.state_dict()
            }
            torch.save(state, opt.model_para)
        scheduler.step()
    state = {
        "epoch": epoch + 1,
        "best_map": best_map,
        "model_state": trainer.faster_rcnn.state_dict(),
        "optimizer_state": optimizer.state_dict()
    }
    torch.save(state, 'last_epoch.pkl')
    writer.close()
コード例 #8
0
    def __init__(self, hyperparameters, gpu_id):
        super(Discriminator, self).__init__()

        self.hyp = hyperparameters
        self.gpu_id = gpu_id
        self.w_loss_meter = MovingAverageValueMeter(5)
        self.d_loss_meter = MovingAverageValueMeter(5)
        self.r_loss_meter = MovingAverageValueMeter(5)
        self.f_loss_meter = MovingAverageValueMeter(5)
        self.gp_loss_meter = MovingAverageValueMeter(5)

        # Architecture
        self.lab0 = nn.ConvTranspose2d(1, self.hyp['q1'], (1, 55), bias=False)
        self.conv0 = nn.Conv2d(1, self.hyp['q2'], (55, 1), bias=False)
        self.nonlin0 = nn.Sequential(*[
            nn.BatchNorm2d(self.hyp['q1'] + self.hyp['q2']),
            nn.LeakyReLU(self.hyp['lrelu_d'])
        ] if self.hyp['bd0'] else [
            nn.LeakyReLU(self.hyp['lrelu_d']),
        ])

        self.conv1 = nn.Conv2d(self.hyp['q1'] + self.hyp['q2'],
                               self.hyp['q3'], (1, 55),
                               bias=False)
        self.nonlin1 = nn.Sequential(*[
            nn.BatchNorm2d(self.hyp['q3']),
            nn.LeakyReLU(self.hyp['lrelu_d'])
        ] if self.hyp['bd1'] else [
            nn.LeakyReLU(self.hyp['lrelu_d']),
        ])

        self.fc = nn.Linear(self.hyp['q3'], 1, bias=False)

        self.cuda(self.gpu_id)
        opt_param_list = [{
            'params': [
                param for name, param in self.named_parameters()
                if 'lab0' not in name
            ]
        }, {
            'params': self.lab0.parameters(),
            'lr': 1 * self.hyp['lr_d']
        }]

        self.optimizer = torch.optim.Adam(opt_param_list,
                                          lr=self.hyp['lr_d'],
                                          betas=(self.hyp['b1_d'],
                                                 self.hyp['b2_d']),
                                          weight_decay=self.hyp['wd_d'])
        # rand init
        for m in self.modules():
            if isinstance(m, nn.ConvTranspose2d):
                torch.nn.init.kaiming_normal_(m.weight,
                                              a=self.hyp['lrelu_d'],
                                              nonlinearity='leaky_relu')
                if not m.bias is None:
                    torch.nn.init.constant_(m.bias, 0)
            if isinstance(m, nn.Conv2d):
                torch.nn.init.kaiming_normal_(m.weight,
                                              a=self.hyp['lrelu_d'],
                                              nonlinearity='leaky_relu')
                if not m.bias is None:
                    torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                torch.nn.init.constant_(m.weight, 1)
                torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                torch.nn.init.kaiming_normal_(m.weight)
                if not m.bias is None:
                    torch.nn.init.constant_(m.bias, 0)
コード例 #9
0
ファイル: train.py プロジェクト: eglrp/studyseg
            save_dir, 'models',
            modelName + '_epoch-' + str(resume_epoch - 1) + '.pth'),
                   map_location=lambda storage, loc: storage)
    )  # Load all tensors onto the CPU

if gpu_id >= 0:
    torch.cuda.set_device(device=gpu_id)
    net.cuda()

if resume_epoch != nEpochs:
    # Logging into Tensorboard
    log_dir = os.path.join(
        save_dir, 'models',
        datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)
    loss_meter = MovingAverageValueMeter(20)

    # Use the following optimizer
    optimizer = optim.SGD(net.parameters(),
                          lr=p['lr'],
                          momentum=p['momentum'],
                          weight_decay=p['wd'])
    p['optimizer'] = str(optimizer)

    composed_transforms_tr = transforms.Compose([
        tr.RandomSized(512),
        tr.RandomRotate(15),
        tr.RandomHorizontalFlip(),
        tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        tr.ToTensor()
    ])
コード例 #10
0
ファイル: train.py プロジェクト: yashkhem1/SPNet
def main(config, cuda, excludeval, embedding, continue_from, nolog, inputmix,
         imagedataset, experimentid, nshot, ishot):
    frame = inspect.currentframe()
    args, _, _, values = inspect.getargvalues(frame)
    #print(values)

    #in case you want to save to the location of script you're running
    datadir = os.path.join(
        '/home/SharedData/omkar/zscoseg/yash_manas/data/datasets',
        imagedataset)
    if not nolog:
        #name the savedir, might add logs/ before the datetime for clarity
        if experimentid is None:
            savedir = time.strftime('%Y%m%d%H%M%S')
        else:
            savedir = experimentid
        #the full savepath is then:
        savepath = os.path.join('logs', imagedataset, savedir)
        #in case the folder has not been created yet / except already exists error:
        try:
            os.makedirs(savepath)
            print("Log dir:", savepath)
        except:
            pass
        if continue_from is None:
            #now join the path in save_screenshot:
            shutil.copytree('./libs/', savepath + '/libs')
            shutil.copy2(osp.abspath(inspect.stack()[0][1]), savepath)
            shutil.copy2(config, savepath)
            args_dict = {}
            for a in args:
                args_dict[a] = values[a]
            with open(savepath + '/args.json', 'w') as fp:
                json.dump(args_dict, fp)

    cuda = cuda and torch.cuda.is_available()
    device = torch.device("cuda" if cuda else "cpu")

    if cuda:
        current_device = torch.cuda.current_device()
        print("Running on", torch.cuda.get_device_name(current_device))
    else:
        print("Running on CPU")

    # Configuration
    CONFIG = Dict(yaml.load(open(config), Loader=yaml.FullLoader))
    visibility_mask = {}
    if excludeval:
        seen_classes = np.load(datadir + '/split/seen_cls.npy')
    else:
        seen_classes = np.asarray(np.concatenate([
            np.load(datadir + '/split/seen_cls.npy'),
            np.load(datadir + '/split/val_cls.npy')
        ]),
                                  dtype=int)

    novel_classes = np.load(datadir + '/split/novel_cls.npy')
    seen_novel_classes = np.concatenate([seen_classes, novel_classes])

    seen_map = np.array([-1] * 256)
    for i, n in enumerate(list(seen_classes)):
        seen_map[n] = i

    visibility_mask[0] = seen_map.copy()
    for i, n in enumerate(list(novel_classes)):
        visibility_mask[i + 1] = seen_map.copy()
        visibility_mask[i + 1][n] = seen_classes.shape[0] + i
    if excludeval:
        train = np.load(datadir + '/split/train_list.npy')[:-CONFIG.VAL_SIZE]
    else:
        train = np.load(datadir + '/split/train_list.npy')

    novelset = []
    seenset = []

    if inputmix == 'novel' or inputmix == 'both':
        inverse_dict = pickle.load(
            open(datadir + '/split/inverse_dict_train.pkl', 'rb'))
        for icls, key in enumerate(novel_classes):
            if (inverse_dict[key].size > 0):
                for v in inverse_dict[key][ishot * 20:ishot * 20 + nshot]:
                    novelset.append((v, icls))
                    #print((v, icls))

    if inputmix == 'both':
        seenset = []
        inverse_dict = pickle.load(
            open(datadir + '/split/inverse_dict_train.pkl', 'rb'))
        for icls, key in enumerate(seen_classes):
            if (inverse_dict[key].size > 0):
                for v in inverse_dict[key][ishot * 20:ishot * 20 + nshot]:
                    seenset.append(v)

    if inputmix == 'seen':
        seenset = range(train.shape[0])

    sampler = RandomImageSampler(seenset, novelset)

    if inputmix == 'novel':
        visible_classes = seen_novel_classes
        if nshot is not None:
            nshot = str(nshot) + 'n'
    elif inputmix == 'seen':
        visible_classes = seen_classes
        if nshot is not None:
            nshot = str(nshot) + 's'
    elif inputmix == 'both':
        visible_classes = seen_novel_classes
        if nshot is not None:
            nshot = str(nshot) + 'b'

    print("Visible classes:", visible_classes.size, " \nClasses are: ",
          visible_classes, "\nTrain Images:", train.shape[0])

    #a Dataset 10k or 164k
    dataset = get_dataset(CONFIG.DATASET)(train=train,
                                          test=None,
                                          root=CONFIG.ROOT,
                                          split=CONFIG.SPLIT.TRAIN,
                                          base_size=513,
                                          crop_size=CONFIG.IMAGE.SIZE.TRAIN,
                                          mean=(CONFIG.IMAGE.MEAN.B,
                                                CONFIG.IMAGE.MEAN.G,
                                                CONFIG.IMAGE.MEAN.R),
                                          warp=CONFIG.WARP_IMAGE,
                                          scale=(0.5, 1.5),
                                          flip=True,
                                          visibility_mask=visibility_mask)

    # DataLoader
    loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=CONFIG.BATCH_SIZE.TRAIN,
                                         num_workers=CONFIG.NUM_WORKERS,
                                         sampler=sampler)

    if embedding == 'word2vec':
        class_emb = pickle.load(
            open(datadir + '/word_vectors/word2vec.pkl', "rb"))
    elif embedding == 'fasttext':
        class_emb = pickle.load(
            open(datadir + '/word_vectors/fasttext.pkl', "rb"))
    elif embedding == 'fastnvec':
        class_emb = np.concatenate([
            pickle.load(open(datadir + '/word_vectors/fasttext.pkl', "rb")),
            pickle.load(open(datadir + '/word_vectors/word2vec.pkl', "rb"))
        ],
                                   axis=1)
    else:
        print("invalid emb ", embedding)
        sys.exit()

    print((class_emb.shape))
    class_emb = F.normalize(torch.tensor(class_emb), p=2, dim=1).cuda()

    loader_iter = iter(loader)
    DeepLab = DeepLabV2_ResNet101_MSC
    #import ipdb; ipdb.set_trace()
    state_dict = torch.load(CONFIG.INIT_MODEL)

    # Model load
    model = DeepLab(class_emb.shape[1], class_emb[visible_classes])
    if continue_from is not None and continue_from > 0:
        print("Loading checkpoint: {}".format(continue_from))
        #import ipdb; ipdb.set_trace()
        model = nn.DataParallel(model)
        state_file = osp.join(savepath,
                              "checkpoint_{}.pth".format(continue_from))
        if osp.isfile(state_file + '.tar'):
            state_dict = torch.load(state_file + '.tar')
            model.load_state_dict(state_dict['state_dict'], strict=True)
        elif osp.isfile(state_file):
            state_dict = torch.load(state_file)
            model.load_state_dict(state_dict, strict=True)
        else:
            print("Checkpoint {} not found".format(continue_from))
            sys.exit()

    else:
        model.load_state_dict(
            state_dict, strict=False
        )  # make strict=True to debug if checkpoint is loaded correctly or not if performance is low
        model = nn.DataParallel(model)
    model.to(device)
    # Optimizer

    optimizer = {
        "sgd":
        torch.optim.SGD(
            # cf lr_mult and decay_mult in train.prototxt
            params=[{
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            }, {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            }, {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.LR,
                "weight_decay": 0.0,
            }],
            momentum=CONFIG.MOMENTUM,
        ),
        "adam":
        torch.optim.Adam(
            # cf lr_mult and decay_mult in train.prototxt
            params=[{
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            }, {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            }, {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.LR,
                "weight_decay": 0.0,
            }])
        # Add any other optimizer
    }.get(CONFIG.OPTIMIZER)

    if 'optimizer' in state_dict:
        optimizer.load_state_dict(state_dict['optimizer'])
    print("Learning rate:", CONFIG.LR)
    # Loss definition
    criterion = nn.CrossEntropyLoss(ignore_index=-1)
    criterion.to(device)

    if not nolog:
        # TensorBoard Logger
        if continue_from is not None:
            writer = SummaryWriter(
                savepath +
                '/runs/fs_{}_{}_{}'.format(continue_from, nshot, ishot))
        else:
            writer = SummaryWriter(savepath + '/runs')
        loss_meter = MovingAverageValueMeter(20)

    model.train()
    model.module.scale.freeze_bn()

    pbar = tqdm(
        range(1, CONFIG.ITER_MAX + 1),
        total=CONFIG.ITER_MAX,
        leave=False,
        dynamic_ncols=True,
    )
    for iteration in pbar:

        # Set a learning rate
        poly_lr_scheduler(
            optimizer=optimizer,
            init_lr=CONFIG.LR,
            iter=iteration - 1,
            lr_decay_iter=CONFIG.LR_DECAY,
            max_iter=CONFIG.ITER_MAX,
            power=CONFIG.POLY_POWER,
        )

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        iter_loss = 0
        for i in range(1, CONFIG.ITER_SIZE + 1):
            try:
                data, target = next(loader_iter)
            except:
                loader_iter = iter(loader)
                data, target = next(loader_iter)

            # Image
            data = data.to(device)

            # Propagate forward
            outputs = model(data)
            # Loss
            loss = 0
            for output in outputs:
                # Resize target for {100%, 75%, 50%, Max} outputs
                target_ = resize_target(target, output.size(2))
                target_ = torch.tensor(target_).to(device)
                loss += criterion.forward(output, target_)

            # Backpropagate (just compute gradients wrt the loss)
            #print(loss)
            loss /= float(CONFIG.ITER_SIZE)
            loss.backward()

            iter_loss += float(loss)
            del data, target, outputs

        #print(iter_loss)
        pbar.set_postfix(loss="%.3f" % iter_loss)

        # Update weights with accumulated gradients
        optimizer.step()
        if not nolog:
            loss_meter.add(iter_loss)
            # TensorBoard
            if iteration % CONFIG.ITER_TB == 0:
                writer.add_scalar("train_loss",
                                  loss_meter.value()[0], iteration)
                for i, o in enumerate(optimizer.param_groups):
                    writer.add_scalar("train_lr_group{}".format(i), o["lr"],
                                      iteration)
                if False:  # This produces a large log file
                    for name, param in model.named_parameters():
                        name = name.replace(".", "/")
                        writer.add_histogram(name,
                                             param,
                                             iteration,
                                             bins="auto")
                        if param.requires_grad:
                            writer.add_histogram(name + "/grad",
                                                 param.grad,
                                                 iteration,
                                                 bins="auto")

            # Save a model
            if continue_from is not None:
                if iteration in CONFIG.ITER_SAVE:
                    torch.save(
                        {
                            'iteration': iteration,
                            'state_dict': model.state_dict(),
                        },
                        osp.join(
                            savepath, "checkpoint_{}_{}_{}_{}.pth.tar".format(
                                continue_from, nshot, ishot, iteration)),
                    )

                # Save a model (short term) [unnecessary for fewshot]
                if False and iteration % 100 == 0:
                    torch.save(
                        {
                            'iteration': iteration,
                            'state_dict': model.state_dict(),
                        },
                        osp.join(
                            savepath,
                            "checkpoint_{}_{}_{}_current.pth.tar".format(
                                continue_from, nshot, ishot)),
                    )
                    print(
                        osp.join(
                            savepath,
                            "checkpoint_{}_{}_{}_current.pth.tar".format(
                                continue_from, nshot, ishot)))
            else:
                if iteration % CONFIG.ITER_SAVE == 0:
                    torch.save(
                        {
                            'iteration': iteration,
                            'state_dict': model.state_dict(),
                            'optimizer': optimizer.state_dict(),
                        },
                        osp.join(savepath,
                                 "checkpoint_{}.pth.tar".format(iteration)),
                    )

                # Save a model (short term)
                if iteration % 100 == 0:
                    torch.save(
                        {
                            'iteration': iteration,
                            'state_dict': model.state_dict(),
                            'optimizer': optimizer.state_dict(),
                        },
                        osp.join(savepath, "checkpoint_current.pth.tar"),
                    )

        torch.cuda.empty_cache()

    if not nolog:
        if continue_from is not None:
            torch.save(
                {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                },
                osp.join(
                    savepath, "checkpoint_{}_{}_{}_{}.pth.tar".format(
                        continue_from, nshot, ishot, CONFIG.ITER_MAX)))
        else:
            torch.save(
                {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                osp.join(savepath,
                         "checkpoint_{}.pth.tar".format(CONFIG.ITER_MAX)))
コード例 #11
0
ファイル: train.py プロジェクト: ganeshnagaraja/SharpNet
def main():
    parser = argparse.ArgumentParser(description="Train the SharpNet network")
    parser.add_argument('-c',
                        '--configFile',
                        required=True,
                        help='Path to config yaml file',
                        metavar='path/to/config')
    args = parser.parse_args()

    CONFIG_FILE_PATH = args.configFile
    with open(CONFIG_FILE_PATH) as fd:
        config_yaml = oyaml.load(
            fd)  # Returns an ordered dict. Used for printing

    config = AttrDict(config_yaml)
    print(
        colored(
            'Config being used for training:\n{}\n\n'.format(
                oyaml.dump(config_yaml)), 'green'))

    os.environ['CUDA_VISIBLE_DEVICES'] = config.train.cuda_device
    cuda = False if config.train.nocuda else True

    resnet50_url = 'https://download.pytorch.org/models/resnet50-19c8e357.pth'

    cuda = cuda and torch.cuda.is_available()
    device = torch.device("cuda" if cuda else "cpu")
    if cuda:
        current_device = torch.cuda.current_device()
        print("Running on " + torch.cuda.get_device_name(current_device))
    else:
        print("Running on CPU")

    now = datetime.datetime.now()
    date_str = now.strftime("%d-%m-%Y_%H-%M")

    t = []
    torch.manual_seed(329)

    bias = True if config.train.bias else False

    # build model
    model = SharpNet(ResBlock, [3, 4, 6, 3], [2, 2, 2, 2, 2],
                     use_normals=True if config.train.normals else False,
                     use_depth=True if config.train.depth else False,
                     use_boundary=True if config.train.boundary else False,
                     bias_decoder=bias)

    model_dict = model.state_dict()

    # Load pretrained weights

    resnet_path = 'models/resnet50-19c8e357.pth'

    if not os.path.exists(resnet_path):
        command = 'wget ' + resnet50_url + ' && mkdir models/ && mv resnet50-19c8e357.pth models/'
        os.system(command)

    resnet50_dict = torch.load(resnet_path)

    resnet_dict = {
        k.replace('.', '_img.', 1): v
        for k, v in resnet50_dict.items()
        if k.replace('.', '_img.', 1) in model_dict
    }  # load weights up to pool

    print('Loading checkpoint from {}'.format(config.train.pretrained_model))
    if config.train.pretrained_model is not None:
        model_path = config.train.pretrained_model
        tmp_dict = torch.load(model_path)
        if config.train.depth:
            pretrained_dict = {
                k: v
                for k, v in tmp_dict.items() if k in model_dict
            }
        else:
            pretrained_dict = {
                k: v
                for k, v in tmp_dict.items()
                if (k in model_dict and not k.startswith('depth_decoder'))
            }

    else:
        pretrained_dict = resnet_dict

    try:
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)
        print('Successfully loaded pretrained ResNet weights')
    except:
        print('Could not load the pretrained model weights')
        sys.exit(0)

    model = nn.DataParallel(model)
    model.to(device)
    model.zero_grad()
    model.train()

    freeze_decoders = config.train.decoder_freeze.split(',')
    freeze_model_decoders(model.module, freeze_decoders)

    if config.train.dataset != 'NYU':
        sharpnet_loss = SharpNetLoss(
            lamb=0.5,
            mu=1.0,
            use_depth=True if config.train.depth else False,
            use_boundary=True if config.train.boundary else False,
            use_normals=True if config.train.normals else False,
            use_geo_consensus=True if config.train.geo_consensus else False)
    else:
        sharpnet_loss = SharpNetLoss(
            lamb=0.5,
            mu=1.0,
            use_depth=True if config.train.depth else False,
            use_boundary=False,
            use_normals=False,
            use_geo_consensus=True if config.train.geo_consensus else False)

    if config.train.optimizer == 'SGD':
        optimizer = SGD(params=get_params(model),
                        lr=float(config.train.learning_rate),
                        weight_decay=float(config.train.decay),
                        momentum=0.9)
    elif config.train.optimizer == 'Adam':
        optimizer = Adam(params=get_params(model),
                         lr=float(config.train.learning_rate),
                         weight_decay=float(config.train.decay))
    else:
        print(
            'Could not configure the optimizer, please select --optimizer Adam or SGD'
        )
        sys.exit(0)

    # TensorBoard Logger
    train_loss_meter = MovingAverageValueMeter(20)
    val_loss_meter = MovingAverageValueMeter(3)
    depth_loss_meter = MovingAverageValueMeter(
        3) if config.train.depth else None
    normals_loss_meter = MovingAverageValueMeter(
        3) if config.train.normals and config.train.dataset != 'NYU' else None
    grad_loss_meter = MovingAverageValueMeter(
        3) if config.train.depth else None
    boundary_loss_meter = MovingAverageValueMeter(
        3) if config.train.boundary and config.train.dataset != 'NYU' else None
    consensus_loss_meter = MovingAverageValueMeter(
        3) if config.train.geo_consensus else None

    exp_name = config.train.experiment_name if config.train.experiment_name is not None else ''
    print('Experiment Name: {}'.format(exp_name))

    log_dir = os.path.join('logs', 'Joint', str(exp_name) + '_' + date_str)
    cp_dir = os.path.join('logs', 'Joint', str(exp_name) + '_' + date_str)
    print('Checkpoint Directory: {}'.format(cp_dir))

    train_writer = SummaryWriter(os.path.join(log_dir, 'train'))
    val_writer = SummaryWriter(os.path.join(log_dir, 'val'))

    if not os.path.exists(cp_dir):
        os.makedirs(cp_dir)
    if not os.path.exists(log_dir):
        os.makedirs(os.path.join(log_dir, 'train'))
        os.makedirs(os.path.join(log_dir, 'val'))

    train_dataloader, val_dataloader = get_trainval_splits(
        config)  # SHREK: Added Modification to pass in config.
    # Either pass in path to config file, or real yaml and pass in the dict of config file.
    # Config file need only contain the paths to datasets train and val.
    # For val, we'd like to pass real images dataset.

    for epoch in range(config.train.max_epoch):
        if config.train.optimizer == 'SGD':
            adjust_learning_rate(float(config.train.learning_rate),
                                 config.train.lr_mode,
                                 float(config.train.gradient_step),
                                 config.train.max_epoch, optimizer, epoch)

        train_epoch(train_dataloader,
                    val_dataloader,
                    model,
                    sharpnet_loss,
                    optimizer,
                    config.train.start_epoch + epoch,
                    train_writer,
                    val_writer,
                    train_loss_meter,
                    val_loss_meter,
                    depth_loss_meter,
                    grad_loss_meter,
                    normals_loss_meter,
                    date_str=date_str,
                    model_save_path=cp_dir,
                    config=config,
                    boundary_loss_meter=boundary_loss_meter,
                    consensus_loss_meter=consensus_loss_meter)

        # Save a model
        if epoch % 1 == 0 and epoch > int(0.9 * config.train.max_epoch):
            torch.save(
                model.state_dict(),
                os.path.join(
                    cp_dir,
                    'checkpoint_{}_final.pth'.format(config.train.start_epoch +
                                                     epoch)),
            )
        elif epoch % 1 == 0:
            torch.save(
                model.state_dict(),
                os.path.join(
                    cp_dir,
                    'checkpoint_{}_final.pth'.format(config.train.start_epoch +
                                                     epoch)),
            )
    torch.save(
        model.state_dict(),
        os.path.join(
            cp_dir, 'checkpoint_{}_final.pth'.format(config.train.start_epoch +
                                                     config.train.max_epoch)),
    )

    return None
コード例 #12
0
ファイル: riffgan_model.py プロジェクト: josephding23/RiffGAN
    def train(self):
        torch.cuda.empty_cache()

        ######################
        # Save / Load model
        ######################

        if self.opt.continue_train:
            try:
                self.continue_from_latest_checkpoint()
            except Exception as e:
                self.logger.error(e)
                self.opt.continue_train = False
                self.reset_save()
        else:
            self.reset_save()

        dataset = UnitRiffDataset(self.opt.dataset_name, self.opt.instr_type)
        dataset_size = len(dataset)

        self.logger.info(
            f'Dataset {self.opt.dataset_name} loaded, size {dataset_size}')

        ######################
        # Initiate
        ######################

        criterionGAN = nn.BCEWithLogitsLoss()

        GLoss_meter = MovingAverageValueMeter(self.opt.plot_every)
        DLoss_meter = MovingAverageValueMeter(self.opt.plot_every)

        losses = {}

        ######################
        # Start Training
        ######################

        for epoch in range(self.opt.start_epoch, self.opt.max_epoch):
            loader = DataLoader(dataset,
                                batch_size=self.opt.batch_size,
                                shuffle=True,
                                num_workers=self.opt.num_threads,
                                drop_last=False)
            epoch_start_time = time.time()

            for i, data in enumerate(loader):

                batch_size = data.size(0)
                # print(batch_size)

                real_label = torch.ones(size=[batch_size, 1],
                                        device=self.device)
                fake_label = torch.zeros(size=[batch_size, 1],
                                         device=self.device)

                seed = np.array([
                    generate_random_seed(1,
                                         self.opt.instr_type,
                                         pattern=self.opt.chord_type)
                    for _ in range(batch_size)
                ])
                # print(seed.shape)
                noise = torch.randn(batch_size,
                                    self.opt.seed_size,
                                    device=self.device)
                seed = torch.from_numpy(seed).to(device=self.device,
                                                 dtype=torch.float)

                fake_data = self.generator(noise, seed, batch_size)
                D_fake = self.discriminator(fake_data, batch_size)

                real_data = torch.unsqueeze(data, 1).to(device=self.device,
                                                        dtype=torch.float)
                D_real = self.discriminator(real_data, batch_size)
                # print(D_fake.shape)

                ######################
                # Generator
                ######################

                self.G_optimizer.zero_grad()
                loss_G = criterionGAN(D_fake, real_label)
                loss_G.backward(retain_graph=True)

                self.G_optimizer.step()

                self.G_optimizer.zero_grad()
                loss_G = criterionGAN(D_fake, real_label)
                loss_G.backward(retain_graph=True)

                self.G_optimizer.step()

                GLoss_meter.add(loss_G.item())

                ######################
                # Discriminator
                ######################

                self.D_optimizer.zero_grad()

                loss_D_real = criterionGAN(D_real, real_label)
                loss_D_fake = criterionGAN(D_fake, fake_label)

                loss_D = 0.5 * loss_D_real + 0.5 * loss_D_fake
                loss_D.backward()

                self.D_optimizer.step()
                DLoss_meter.add(loss_D.item())

            if epoch % self.opt.save_every == 0 or epoch == self.opt.max_epoch - 1:
                self.save_model(epoch)

            losses['loss_G'] = float(GLoss_meter.value()[0])
            losses['loss_D'] = float(DLoss_meter.value()[0])

            self.G_scheduler.step(epoch)
            self.D_scheduler.step(epoch)

            epoch_time = int(time.time() - epoch_start_time)

            self.logger.info(
                f'Epoch {epoch} finished, cost time {epoch_time}\n')
            self.logger.info(str(losses) + '\n\n')
コード例 #13
0
    def train(self):
        torch.cuda.empty_cache()

        if self.model == 'base':
            dataset = SteelyDataset(self.genreA,
                                    self.genreB,
                                    'train',
                                    use_mix=False)
            dataset_size = len(dataset)

        else:
            dataset = SteelyDataset(self.genreA,
                                    self.genreB,
                                    'train',
                                    use_mix=True)
            dataset_size = len(dataset)

        if self.continue_train:
            self.continue_from_latest_checkpoint()
        else:
            self.empty_checkpoints()
            self.create_save_dirs()

        iter_num = int(dataset_size / self.batch_size)

        print(f'loaded {dataset_size} images for training')

        # optimizers = [optimizer_g, optimizer_d]

        lambda_A = 10.0  # weight for cycle loss (A -> B -> A^)
        lambda_B = 10.0  # weight for cycle loss (B -> A -> B^)

        L1_lambda = 10.0
        lambda_identity = 0.5

        # it's a MSELoss() when initialized, only calculate later during iteration
        # criterionGAN = nn.MSELoss().to(device)
        criterionGAN = GANLoss(gan_mode='vanilla')

        # cycle loss
        criterionCycle = nn.L1Loss()

        # identical loss
        criterionIdt = nn.L1Loss()

        GLoss_meter = MovingAverageValueMeter(self.plot_every)
        DLoss_meter = MovingAverageValueMeter(self.plot_every)
        # score_DA_real_B = MovingAverageValueMeter(self.plot_every)
        # score_DA_fake_B = MovingAverageValueMeter(self.plot_every)

        # loss meters
        losses = {}
        scores = {}

        for epoch in range(self.start_epoch, self.max_epoch):
            loader = DataLoader(dataset,
                                batch_size=self.batch_size,
                                shuffle=True,
                                num_workers=1,
                                drop_last=True)
            epoch_start_time = time.time()

            for i, data in enumerate(loader):

                real_A = torch.unsqueeze(data[:, 0, :, :],
                                         1).to(self.device, dtype=torch.float)
                real_B = torch.unsqueeze(data[:, 1, :, :],
                                         1).to(self.device, dtype=torch.float)

                gaussian_noise = torch.abs(
                    torch.normal(mean=torch.zeros(self.data_shape),
                                 std=1)).to(self.device, dtype=torch.float)

                if self.model == 'base':

                    self.GA2B_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero

                    fake_B = self.generator_A2B(real_A)  # X -> Y'
                    fake_B_copy = copy.copy(fake_B.detach())
                    DB_fake = self.discriminator_B(
                        fake_B +
                        gaussian_noise)  #netD_x provide feedback to netG_x
                    loss_G_A2B = criterionGAN(DB_fake, True)

                    # cycle_consistence
                    cycle_A = self.generator_B2A(fake_B)  # Y' -> X^
                    # Forward cycle loss x^ = || G_y(G_x(real_x)) ||
                    loss_cycle_A2B = criterionCycle(cycle_A, real_A) * lambda_A

                    # identity loss
                    if lambda_identity > 0:
                        # netG_x should be identity if real_y is fed: ||netG_x(real_y) - real_y||
                        idt_A = self.generator_A2B(real_B)
                        loss_idt_A = criterionIdt(
                            idt_A, real_B) * lambda_A * lambda_identity
                    else:
                        loss_idt_A = 0.

                    loss_A = loss_G_A2B + 5. * loss_cycle_A2B
                    loss_A.backward(retain_graph=True)
                    self.GA2B_optimizer.step()

                    ######################
                    # B -> A' -> B^ cycle
                    ######################

                    self.GB2A_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero

                    fake_A = self.generator_B2A(real_B)  # Y -> X'
                    fake_A_copy = copy.copy(fake_A.detach())
                    DA_fake = self.discriminator_A(fake_A + gaussian_noise)
                    loss_G_B2A = criterionGAN(DA_fake, True)
                    # print(f'loss_G_Y = {round(float(loss_G_Y), 3)}')

                    cycle_B = self.generator_A2B(fake_A)  # Y -> X' -> Y^
                    # Forward cycle loss y^ = || G_x(G_y(real_y)) ||
                    loss_cycle_B2A = criterionCycle(cycle_B, real_B) * lambda_B

                    # identity loss
                    if lambda_identity > 0:
                        # netG_y should be identiy if real_x is fed: ||netG_y(real_x) - real_x||
                        idt_B = self.generator_B2A(real_A)
                        loss_idt_B = criterionIdt(
                            idt_B, real_A) * lambda_A * lambda_identity
                    else:
                        loss_idt_B = 0.

                    loss_B = loss_G_B2A + 5. * loss_cycle_B2A
                    loss_B.backward(retain_graph=True)
                    self.GB2A_optimizer.step()

                    ######################
                    # sample
                    ######################

                    if self.use_image_poll:
                        [fake_A_sample,
                         fake_B_sample] = self.pool([fake_A_copy, fake_B_copy])

                    ######################
                    # netD_A
                    ######################

                    # loss_real
                    DA_real = self.discriminator_A(real_A + gaussian_noise)
                    loss_DA_real = criterionGAN(DA_real, True)
                    score_DA_real_B.add(float(DA_real.data.mean()))

                    # loss fake
                    if self.use_image_poll:
                        DA_fake_sample = self.discriminator_A(fake_A_sample +
                                                              gaussian_noise)
                        loss_DA_fake = criterionGAN(DA_fake_sample, False)
                        score_DA_fake_B.add(float(DA_fake_sample.data.mean()))

                    else:
                        loss_DA_fake = criterionGAN(DA_fake, False)
                        score_DA_fake_B.add(float(DA_fake.data.mean()))

                    # loss and backward
                    self.DA_optimizer.zero_grad()
                    loss_DA = (loss_DA_real + loss_DA_fake) * 0.5

                    loss_DA.backward()
                    self.DA_optimizer.step()

                    ######################
                    # netD_B
                    ######################

                    # loss_real
                    DB_real = self.discriminator_B(real_B + gaussian_noise)
                    loss_DB_real = criterionGAN(DB_real, True)

                    # loss_fake
                    if self.use_image_poll:
                        DB_fake_sample = self.discriminator_B(fake_B_sample +
                                                              gaussian_noise)
                        loss_DB_fake = criterionGAN(DB_fake_sample, False)
                    else:
                        loss_DB_fake = criterionGAN(DB_fake, False)

                    # loss and backward
                    self.DB_optimizer.zero_grad()
                    loss_DB = (loss_DB_real + loss_DB_fake) * 0.5

                    loss_DB.backward()
                    self.DB_optimizer.step()

                else:
                    real_mixed = torch.unsqueeze(data[:, 2, :, :],
                                                 1).to(self.device,
                                                       dtype=torch.float)

                    ######################
                    # A -> B' -> A^ cycle
                    ######################

                    self.GA2B_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero

                    fake_B = self.generator_A2B(real_A)  # X -> Y'
                    fake_B_copy = copy.copy(fake_B.detach())
                    DB_fake = self.discriminator_B(
                        fake_B +
                        gaussian_noise)  # netD_x provide feedback to netG_x
                    '''
                    to_binary
                    '''
                    loss_G_A2B = criterionGAN(DB_fake, True)

                    # cycle_consistence
                    cycle_A = self.generator_B2A(fake_B)  # Y' -> X^
                    # Forward cycle loss x^ = || G_y(G_x(real_x)) ||
                    loss_cycle_A2B = criterionCycle(cycle_A, real_A) * lambda_A

                    # identity loss
                    if lambda_identity > 0:
                        # netG_x should be identity if real_y is fed: ||netG_x(real_y) - real_y||
                        idt_A = self.generator_A2B(real_B)
                        loss_idt_A = criterionIdt(
                            idt_A, real_B) * lambda_A * lambda_identity
                    else:
                        loss_idt_A = 0.

                    loss_A = loss_G_A2B + 5. * loss_cycle_A2B
                    loss_A.backward(retain_graph=True)
                    self.GA2B_optimizer.step()

                    loss_A_meter.add(loss_A.item())

                    ######################
                    # B -> A' -> B^ cycle
                    ######################

                    self.GB2A_optimizer.zero_grad(
                    )  # set g_x and g_y gradients to zero

                    fake_A = self.generator_B2A(real_B)  # Y -> X'
                    fake_A_copy = copy.copy(fake_A.detach())
                    DA_fake = self.discriminator_A(fake_A + gaussian_noise)
                    loss_G_B2A = criterionGAN(DA_fake, True)
                    # print(f'loss_G_Y = {round(float(loss_G_Y), 3)}')

                    cycle_B = self.generator_A2B(fake_A)  # Y -> X' -> Y^
                    # Forward cycle loss y^ = || G_x(G_y(real_y)) ||
                    loss_cycle_B2A = criterionCycle(cycle_B, real_B) * lambda_B

                    # identity loss
                    if lambda_identity > 0:
                        # netG_y should be identiy if real_x is fed: ||netG_y(real_x) - real_x||
                        idt_B = self.generator_B2A(real_A)
                        loss_idt_B = criterionIdt(
                            idt_B, real_A) * lambda_A * lambda_identity
                    else:
                        loss_idt_B = 0.

                    loss_B = loss_G_B2A + 5. * loss_cycle_B2A
                    loss_B.backward(retain_graph=True)
                    self.GB2A_optimizer.step()

                    loss_B_meter.add(loss_B.item())

                    ######################
                    # sample
                    ######################

                    if self.use_image_poll:
                        [fake_A_sample,
                         fake_B_sample] = self.pool([fake_A_copy, fake_B_copy])

                    ######################
                    # netD_A $ netD_A_all
                    ######################

                    # loss_real
                    DA_real = self.discriminator_A(real_A + gaussian_noise)
                    loss_DA_real = criterionGAN(DA_real, True)
                    # score_DA_real_B.add(float(DA_real.data.mean()))

                    DA_real_all = self.discriminator_A_all(real_mixed +
                                                           gaussian_noise)
                    loss_DA_all_real = criterionGAN(DA_real_all, True)

                    # loss fake
                    if self.use_image_poll:
                        DA_fake_sample = self.discriminator_A(fake_A_sample +
                                                              gaussian_noise)
                        loss_DA_fake = criterionGAN(DA_fake_sample, False)
                        # score_DA_fake_B.add(float(DA_fake_sample.data.mean()))
                        DA_fake_sample_all = self.discriminator_A_all(
                            fake_A_sample + gaussian_noise)
                        loss_DA_all_fake = criterionGAN(
                            DA_fake_sample_all, False)

                    else:
                        loss_DA_fake = criterionGAN(DA_fake, False)
                        # score_DA_fake_B.add(float(DA_fake.data.mean()))
                        DA_fake_all = self.discriminator_A_all(fake_A_copy +
                                                               gaussian_noise)
                        loss_DA_all_fake = criterionGAN(DA_fake_all, False)

                    # loss and backward
                    self.DA_optimizer.zero_grad()
                    loss_DA = (loss_DA_real + loss_DA_fake) * 0.5
                    loss_DA.backward()
                    self.DA_optimizer.step()

                    self.DA_all_optimizer.zero_grad()
                    loss_DA_all = (loss_DA_all_real + loss_DA_all_fake) * 0.5
                    loss_DA_all.backward()
                    self.DA_all_optimizer.step()

                    ######################
                    # netD_A_all
                    ######################

                    self.DA_all_optimizer.zero_grad()

                    # loss_real
                    DA_real_all = self.discriminator_A_all(real_mixed +
                                                           gaussian_noise)
                    loss_DA_all_real = criterionGAN(DA_real_all, True)

                    # loss fake
                    DA_fake_sample_all = self.discriminator_A_all(
                        fake_A_sample + gaussian_noise)
                    loss_DA_all_fake = criterionGAN(DA_fake_sample_all, False)

                    loss_DA_all = (loss_DA_all_real + loss_DA_all_fake) * 0.5
                    loss_DA_all.backward()
                    self.DA_all_optimizer.step()

                    ######################
                    # netD_B & netD_B_all
                    ######################

                    # loss_real
                    DB_real = self.discriminator_B(real_B + gaussian_noise)
                    loss_DB_real = criterionGAN(DB_real, True)
                    DB_real_all = self.discriminator_B_all(real_mixed +
                                                           gaussian_noise)
                    loss_DB_all_real = criterionGAN(DB_real_all, True)

                    # loss_fake
                    if self.use_image_poll:
                        DB_fake_sample = self.discriminator_B(fake_B_sample +
                                                              gaussian_noise)
                        loss_DB_fake = criterionGAN(DB_fake_sample, False)
                        DB_fake_sample_all = self.discriminator_B_all(
                            fake_B_sample + gaussian_noise)
                        loss_DB_all_fake = criterionGAN(
                            DB_fake_sample_all, False)
                    else:
                        loss_DB_fake = criterionGAN(DB_fake, False)
                        DB_fake_all = self.discriminator_B_all(fake_B_copy +
                                                               gaussian_noise)
                        loss_DB_all_fake = criterionGAN(DB_fake_all, False)

                    # loss and backward
                    self.DB_optimizer.zero_grad()
                    loss_DB = (loss_DB_real + loss_DB_fake) * 0.5 + (
                        loss_DB_all_real + loss_DB_all_fake) * 0.5
                    loss_DB.backward()
                    self.DB_optimizer.step()

                    self.DB_all_optimizer.zero_grad()
                    loss_DB_all = (loss_DB_all_real + loss_DB_all_fake) * 0.5
                    loss_DB_all.backward()
                    self.DB_all_optimizer.step()

                    ######################
                    # netD_all
                    ######################

                    # loss_D_all = loss_DB_all + loss_DA_all
                    # loss_D_all.backward(retain_graph=True)
                    '''
                    ######################
                    # netD_B_all
                    ######################

                    self.DB_all_optimizer.zero_grad()

                    # loss_real
                    DB_real_all = self.discriminator_B_all(real_mixed + gaussian_noise)
                    loss_DB_all_real = criterionGAN(DB_real_all, True)

                    # loss fake
                    DB_fake_sample_all = self.discriminator_B_all(fake_B_sample + gaussian_noise)
                    loss_DB_all_fake = criterionGAN(DB_fake_sample_all, False)

                    loss_DB_all = (loss_DB_all_real + loss_DB_all_fake) * 0.5
                    loss_DB_all.backward()
                    self.DB_all_optimizer.step()
                    '''

                # save snapshot
                if i % self.plot_every == 0:
                    file_name = self.name + '_snap_%03d_%05d.png' % (
                        epoch,
                        i,
                    )
                    test_path = os.path.join(self.checkpoint_path, file_name)
                    tv.utils.save_image(fake_B, test_path, normalize=True)
                    print(f'{file_name} saved.')
                    losses['loss_A'] = loss_A_meter.value()[0]
                    losses['loss_B'] = loss_B_meter.value()[0]
                    scores['score_DA_real_B'] = score_DA_real_B.value()[0]
                    scores['score_DA_fake_B'] = score_DA_fake_B.value()[0]
                    print(losses)
                    print(scores)
                    print('Epoch {} progress: {:.2%}\n'.format(
                        epoch, i / iter_num))

            # save model
            if epoch % self.save_every == 0 or epoch == self.max_epoch - 1:
                self.save_model(epoch)
                print(f'model saved')

            self.GA2B_scheduler.step(epoch)
            self.GB2A_scheduler.step(epoch)
            self.DA_scheduler.step(epoch)
            self.DB_scheduler.step(epoch)

            if self.model != 'base':
                self.DA_all_scheduler.step(epoch)
                self.DB_all_scheduler.step(epoch)

            epoch_time = int(time.time() - epoch_start_time)

            print_options(self.opt,
                          epoch_log=True,
                          epoch=epoch,
                          time=epoch_time,
                          losses=losses,
                          scores=scores)
コード例 #14
0
    def __init__(self, hyperparameters, log_dir, gpu_id):
        super(Generator, self).__init__()
        self.hyp = hyperparameters
        print(hyperparameters)
        self.gpu_id = gpu_id
        self.noise_dim = self.hyp['noise_dim']
        self.vis_noise = torch.randn(1, self.hyp['noise_dim']).cuda(
            self.gpu_id).requires_grad_(False)
        self.g_loss_meter = MovingAverageValueMeter(5)
        self.log_dir = log_dir

        # Architecture:
        self.lab0 = nn.Linear(1, self.hyp['p1'], bias=False)
        self.fc0 = nn.Linear(self.noise_dim, self.hyp['p2'], bias=False)
        self.nonlin0 = nn.Sequential(*[
            nn.BatchNorm2d(self.hyp['p1'] + self.hyp['p2']),
            nn.LeakyReLU(self.hyp['lrelu_g'])
        ] if self.hyp['bg0'] else [
            nn.LeakyReLU(self.hyp['lrelu_g']),
        ])

        self.conv1 = nn.ConvTranspose2d(self.hyp['p1'] + self.hyp['p2'],
                                        self.hyp['p3'], (1, 55),
                                        bias=True)
        self.nonlin1 = nn.Sequential(*[
            nn.BatchNorm2d(self.hyp['p3']),
            nn.LeakyReLU(self.hyp['lrelu_g'])
        ] if self.hyp['bg1'] else [
            nn.LeakyReLU(self.hyp['lrelu_g']),
        ])

        self.conv2 = nn.ConvTranspose2d(self.hyp['p3'], 1, (55, 1), bias=True)
        self.sigmoid = nn.Tanh()

        self.cuda(self.gpu_id)
        opt_param_list = [{
            'params': [
                param for name, param in self.named_parameters()
                if 'lab0' not in name
            ]
        }, {
            'params': self.lab0.parameters(),
            'lr': 1 * self.hyp['lr_g']
        }]

        self.optimizer = torch.optim.Adam(opt_param_list,
                                          lr=self.hyp['lr_g'],
                                          betas=(self.hyp['b1_g'],
                                                 self.hyp['b2_g']),
                                          weight_decay=self.hyp['wd_g'])
        # rand init
        for m in self.modules():
            if isinstance(m, nn.ConvTranspose2d):
                torch.nn.init.kaiming_normal_(m.weight,
                                              a=self.hyp['lrelu_g'],
                                              nonlinearity='leaky_relu')
                if not m.bias is None:
                    torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                torch.nn.init.kaiming_normal_(m.weight)
                if not m.bias is None:
                    torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                torch.nn.init.constant_(m.weight, 1)
                torch.nn.init.constant_(m.bias, 0)
コード例 #15
0
ファイル: train.py プロジェクト: stashvala/bf-vos
def main():
    global global_iter_idx
    args = parse_args()
    if args.verbose:
        default_handler = logging.StreamHandler(sys.stdout)
        logger.addHandler(default_handler)
        logger.setLevel(logging.DEBUG)
    if args.log_file is not None:
        logfile_handler = logging.FileHandler(args.log_file)
        logger_formatter = logging.Formatter(
            '%(name)s - %(levelname)s - %(message)s')
        logfile_handler.setFormatter(logger_formatter)
        logger.addHandler(logfile_handler)
        logger.setLevel(logging.DEBUG)
    train_data_source = davis.DavisDataset(base_dir=os.path.join(
        root_dir, 'dataset', 'DAVIS'),
                                           image_size=args.image_dims,
                                           year=2016,
                                           phase='train',
                                           transform=davis.ToTensor())
    train_triplet_sampler = davis.TripletSampler(dataset=train_data_source,
                                                 num_triplets=args.batch_size,
                                                 randomize=True)
    train_data_loader = DataLoader(dataset=train_data_source,
                                   batch_sampler=train_triplet_sampler)

    val_data_source = davis.DavisDataset(base_dir=os.path.join(
        root_dir, 'dataset', 'DAVIS'),
                                         image_size=args.image_dims,
                                         year=2016,
                                         phase='val',
                                         transform=davis.ToTensor())
    val_triplet_sampler = davis.TripletSampler(
        dataset=val_data_source,
        num_triplets=args.num_val_batches,
        randomize=True)
    val_data_loader = DataLoader(dataset=val_data_source,
                                 batch_sampler=val_triplet_sampler)

    model = network.BFVOSNet(embedding_vector_dims=args.embedding_vector_dims)
    train_loss_fn = loss.MinTripletLoss(alpha=args.alpha)
    val_loss_fn = loss.validation_loss
    if has_cuda:
        model = model.cuda()
        train_loss_fn = train_loss_fn.cuda()
        train_loss_fn.to(device)
        logger.debug("Model and loss function moved to CUDA")

    start_epoch = 0
    if args.checkpoint_path is not None:
        epoch_substr = args.checkpoint_path.split('epoch_')[1]
        start_epoch = int(epoch_substr.split('_')[0])

        batch_substr = epoch_substr.split('_')[1].split('batch_')
        if len(batch_substr) > 1:
            global_iter_idx = int(batch_substr[1].split('_')[0])

    if has_cuda:
        if args.checkpoint_path is not None:
            # Load pre-trained weights for entire model
            model.load_state_dict(
                torch.load(
                    args.checkpoint_path,
                    map_location=lambda storage, loc: storage.cuda(gpu_id)))
            logger.info("Loaded checkpoint from {}".format(
                args.checkpoint_path))
        else:
            # Load pre-trained weights for feature extraction head
            model.load_state_dict(torch.load(
                deeplab_resnet_pre_trained_path,
                map_location=lambda storage, loc: storage.cuda(gpu_id)),
                                  strict=False)
            logger.info("Loaded DeepLab ResNet from {}".format(
                deeplab_resnet_pre_trained_path))
    else:
        if args.checkpoint_path is not None:
            model.load_state_dict(torch.load(args.checkpoint_path))
            logger.info("Loaded checkpoint from {}".format(
                args.checkpoint_path))
        else:
            model.load_state_dict(torch.load(deeplab_resnet_pre_trained_path),
                                  strict=False)
            logger.info("Loaded DeepLab ResNet from {}".format(
                deeplab_resnet_pre_trained_path))
    # Load to appropriate device and set to training mode but freeze feature extraction layer
    model.to(device).train()
    model.freeze_feature_extraction()

    # Initialize optimizer to train only the unfrozen layers
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=args.learning_rate,
                          momentum=args.momentum)

    # Initialize meter and writer
    train_loss_meter = MovingAverageValueMeter(20)
    val_loss_meter = AverageValueMeter()
    summary_writer = SummaryWriter(tensorboard_save_dir)

    # Train
    for epoch in tqdm(range(start_epoch, args.num_epochs)):
        logger.info("Epoch {}/{}".format(epoch + 1, args.num_epochs))
        train(epoch, train_data_loader, val_data_loader, model, train_loss_fn,
              val_loss_fn, optimizer, train_loss_meter, val_loss_meter,
              summary_writer, args.log_interval, args.checkpoint_interval,
              args.val_interval, args.num_val_batches)

    # Save final model after all epochs
    model.eval().cpu()
    save_model_filename = "epoch_{}_{}.model".format(
        args.num_epochs,
        str(time.time()).replace(" ", "_").replace(".", "_"))
    save_model_path = os.path.join(model_dir, save_model_filename)
    torch.save(model.state_dict(), save_model_path)
    logger.info("Model saved to {}".format(save_model_filename))

    training_config_save_path = os.path.join(
        config_save_dir, save_model_filename.replace('.model', '.json'))
    training_config = vars(args)
    training_config['device'] = str(torch.device)
    with open(training_config_save_path, 'w') as f:
        json.dump(training_config, f)
    logger.info(
        "Training config saved to {}".format(training_config_save_path))
コード例 #16
0
class Discriminator(nn.Module):
    def __init__(self, hyperparameters, gpu_id):
        super(Discriminator, self).__init__()

        self.hyp = hyperparameters
        self.gpu_id = gpu_id
        self.w_loss_meter = MovingAverageValueMeter(5)
        self.d_loss_meter = MovingAverageValueMeter(5)
        self.r_loss_meter = MovingAverageValueMeter(5)
        self.f_loss_meter = MovingAverageValueMeter(5)
        self.gp_loss_meter = MovingAverageValueMeter(5)

        # Architecture
        self.lab0 = nn.ConvTranspose2d(1, self.hyp['q1'], (1, 55), bias=False)
        self.conv0 = nn.Conv2d(1, self.hyp['q2'], (55, 1), bias=False)
        self.nonlin0 = nn.Sequential(*[
            nn.BatchNorm2d(self.hyp['q1'] + self.hyp['q2']),
            nn.LeakyReLU(self.hyp['lrelu_d'])
        ] if self.hyp['bd0'] else [
            nn.LeakyReLU(self.hyp['lrelu_d']),
        ])

        self.conv1 = nn.Conv2d(self.hyp['q1'] + self.hyp['q2'],
                               self.hyp['q3'], (1, 55),
                               bias=False)
        self.nonlin1 = nn.Sequential(*[
            nn.BatchNorm2d(self.hyp['q3']),
            nn.LeakyReLU(self.hyp['lrelu_d'])
        ] if self.hyp['bd1'] else [
            nn.LeakyReLU(self.hyp['lrelu_d']),
        ])

        self.fc = nn.Linear(self.hyp['q3'], 1, bias=False)

        self.cuda(self.gpu_id)
        opt_param_list = [{
            'params': [
                param for name, param in self.named_parameters()
                if 'lab0' not in name
            ]
        }, {
            'params': self.lab0.parameters(),
            'lr': 1 * self.hyp['lr_d']
        }]

        self.optimizer = torch.optim.Adam(opt_param_list,
                                          lr=self.hyp['lr_d'],
                                          betas=(self.hyp['b1_d'],
                                                 self.hyp['b2_d']),
                                          weight_decay=self.hyp['wd_d'])
        # rand init
        for m in self.modules():
            if isinstance(m, nn.ConvTranspose2d):
                torch.nn.init.kaiming_normal_(m.weight,
                                              a=self.hyp['lrelu_d'],
                                              nonlinearity='leaky_relu')
                if not m.bias is None:
                    torch.nn.init.constant_(m.bias, 0)
            if isinstance(m, nn.Conv2d):
                torch.nn.init.kaiming_normal_(m.weight,
                                              a=self.hyp['lrelu_d'],
                                              nonlinearity='leaky_relu')
                if not m.bias is None:
                    torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                torch.nn.init.constant_(m.weight, 1)
                torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                torch.nn.init.kaiming_normal_(m.weight)
                if not m.bias is None:
                    torch.nn.init.constant_(m.bias, 0)

    def forward(self, x, l):
        x = self.conv0(x)
        l = self.lab0(l.float().view(-1, 1, 1, 1)) * 2 - 1
        x = torch.cat([x, l], 1)
        x = self.nonlin0(x)
        x = self.conv1(x)
        x = self.nonlin1(x)
        x = x.view(-1, self.hyp['q3'])
        x = self.fc(x)
        return x

    def train_step(self, inputs, netg):
        """
        One training step.
        :param inputs:
        :param netg:
        :return:
        """
        real_data, real_labels = inputs
        real_data = real_data.cuda(self.gpu_id)
        real_labels = real_labels.cuda(self.gpu_id)

        self.zero_grad()

        self.d_real = self(real_data, real_labels).mean()

        # train with fake
        noise = torch.randn(real_data.shape[0],
                            self.hyp['noise_dim']).cuda(self.gpu_id)

        fake = netg(noise, real_labels).data

        self.d_fake = self(fake, real_labels).mean()

        self.d_cost = self.d_fake - self.d_real

        # train with gradient penalty
        if not self.hyp['lambda_gp'] == 0:
            self.gradient_penalty = self.calc_gradient_penalty_cond(
                real_data.data, real_labels, fake.data)
            self.d_cost += self.gradient_penalty * self.hyp['lambda_gp']

        self.wasserstein_d = self.d_real - self.d_fake
        self.d_cost.backward()
        self.optimizer.step()

        self.w_loss_meter.add(self.wasserstein_d.detach().cpu())
        self.d_loss_meter.add(self.d_cost.detach().cpu())
        self.r_loss_meter.add(self.d_real.detach().cpu())
        self.f_loss_meter.add(self.d_fake.detach().cpu())

        if not self.hyp['lambda_gp'] == 0:
            self.gp_loss_meter.add(self.gradient_penalty.detach().cpu())

    def calc_gradient_penalty_cond(self, real_data, real_labels, fake_data):
        """
        Calculates Gradient Penalty.
        :param real_data:
        :param real_labels:
        :param fake_data:
        :return:
        """
        alpha = torch.rand(real_data.size()[0], 1, 1,
                           1).expand(real_data.size()).cuda(self.gpu_id)
        interpolates = alpha * real_data + ((1 - alpha) * fake_data)
        interpolates = interpolates.cuda(self.gpu_id).requires_grad_(True)

        real_labels.requires_grad_(True)
        disc_interpolates = self(interpolates, real_labels)

        gradients = torch.autograd.grad(outputs=disc_interpolates,
                                        inputs=[interpolates, real_labels],
                                        grad_outputs=torch.ones(
                                            disc_interpolates.size()).cuda(
                                                self.gpu_id),
                                        create_graph=True,
                                        retain_graph=True,
                                        only_inputs=True,
                                        allow_unused=True)[0]

        gradient_penalty = ((gradients.norm(2, dim=1) - 1)**2).mean()
        return gradient_penalty
コード例 #17
0
    def train(self):
        torch.cuda.empty_cache()

        ######################
        # Save / Load model
        ######################

        if self.opt.continue_train:
            try:
                self.continue_from_latest_checkpoint()
            except Exception as e:
                self.logger.error(e)
                return

        else:
            self.reset_save()

        self.logger.add_file_logger(self.opt.log_path)

        ######################
        # Dataset
        ######################

        dataset = ClassifierDataset(self.opt.genreA, self.opt.genreB, 'train')

        test_dataset = ClassifierDataset(self.opt.genreA, self.opt.genreB,
                                         'test')

        dataset_size = len(dataset)
        iter_num = int(dataset_size / self.opt.batch_size)

        plot_every = iter_num // 10

        self.logger.info(
            f'Dataset loaded, genreA: {self.opt.genreA}, genreB: {self.opt.genreB}, total size: {dataset_size}.'
        )

        ######################
        # Initiate
        ######################

        softmax_criterion = nn.BCELoss()

        Loss_meter = MovingAverageValueMeter(self.opt.plot_every)

        losses = {}

        ######################
        # Start Training
        ######################

        test_data = torch.from_numpy(test_dataset.get_data()).to(
            self.device, dtype=torch.float)

        gaussian_noise = torch.normal(mean=torch.zeros(test_data.shape),
                                      std=self.opt.gaussian_std).to(
                                          self.device, dtype=torch.float)
        # test_data += gaussian_noise

        real_test_label = torch.from_numpy(test_dataset.get_labels()).view(
            -1, 2).to(self.device, dtype=torch.float)

        for epoch in range(self.opt.start_epoch, self.opt.max_epoch):
            loader = DataLoader(dataset,
                                batch_size=self.opt.batch_size,
                                shuffle=True,
                                num_workers=self.opt.num_threads,
                                drop_last=True)
            epoch_start_time = time.time()

            for i, batch in enumerate(loader):
                data = batch[0].to(self.device, dtype=torch.float)

                real_label = batch[1].view(self.opt.batch_size,
                                           2).to(self.device,
                                                 dtype=torch.float)

                self.classifier_optimizer.zero_grad()

                estimate_train = self.classifier(data)

                loss = softmax_criterion(estimate_train, real_label)

                loss.backward()

                self.classifier_optimizer.step()

                Loss_meter.add(loss.item())

                # test
                if i % plot_every == 0:
                    with torch.no_grad():
                        estimate_test = self.classifier(test_data)
                    estimate_test = nn.functional.softmax(estimate_test, dim=1)
                    test_prediction = torch.argmax(estimate_test, 1).eq(
                        torch.argmax(real_test_label, 1))
                    test_accuracy = torch.mean(
                        test_prediction.type(torch.float32)).cpu()

                    self.logger.info(
                        'Epoch {} progress {:.2%}: Loss: {}, Accuracy: {}\n'.
                        format(epoch, i / iter_num,
                               Loss_meter.value()[0], test_accuracy))

            if epoch % self.opt.save_every == 0 or epoch == self.opt.max_epoch - 1:
                self.save_model(epoch)

            self.classifier_scheduler.step(epoch)

            epoch_time = int(time.time() - epoch_start_time)
            self.logger.info(
                f'Epoch {epoch} finished, cost time {epoch_time}\n')
コード例 #18
0
                         lr_decay_epoch=10):
    """Sets the learning rate to the initial LR decayed by lr_decay_factor every lr_decay_epoch epochs"""
    if epoch % lr_decay_epoch == 0:
        lr = init_lr * (lr_decay_factor**(epoch // lr_decay_epoch))
        print('LR is set to {}'.format(lr))
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr


model = faster_rcnn(20, backbone='vgg16')
if torch.cuda.is_available():
    model = model.cuda()

optimizer = model.get_optimizer(is_adam=False)
avg_loss = AverageValueMeter()
ma20_loss = MovingAverageValueMeter(windowsize=20)
model.train()

for epoch in range(15):
    adjust_learning_rate(optimizer, epoch, 0.001, lr_decay_epoch=10)
    for i in range(len(trainval_dataset)):
        img, bbox, label = trainval_dataset[
            i]  #this is to retrive the data from datasets easy method
        img = img / 255

        loss = model.loss(img, bbox, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_value = loss.cpu().data.numpy()
コード例 #19
0
def main():
    parser = argparse.ArgumentParser(description="Train the SharpNet network")
    parser.add_argument('--dataset',
                        '-d',
                        dest='dataset',
                        help='Name of the dataset (MLT, NYUv2 or pix3d)')
    parser.add_argument('--exp_name',
                        dest='experiment_name',
                        help='Custom name of the experiment',
                        type=str,
                        default=None)
    parser.add_argument('--batch-size',
                        '-b',
                        dest='batch_size',
                        type=int,
                        default=3,
                        help='Batch size')
    parser.add_argument('--iter-size',
                        dest='iter_size',
                        type=int,
                        default=3,
                        help='Iteration size (for accumulated gradients)')
    parser.add_argument('--boundary',
                        action='store_true',
                        help='Use boundary decoder')
    parser.add_argument('--normals',
                        action='store_true',
                        help='Use normals decoder')
    parser.add_argument('--depth',
                        action='store_true',
                        help='Use depth decoder')
    parser.add_argument('--consensus',
                        dest='geo_consensus',
                        action='store_true')
    parser.add_argument('--freeze',
                        dest='decoder_freeze',
                        default='',
                        type=str,
                        help='Decoders to freeze (comma seperated)')
    parser.add_argument('--verbose',
                        action='store_true',
                        help='Activate to display loss components terms')
    parser.add_argument('--rootdir',
                        '-r',
                        dest='root_dir',
                        default='',
                        help='Root Directory of the dataset')
    parser.add_argument(
        '--nocuda',
        action="store_true",
        help='Use flag to use on CPU only (currently not supported)')
    parser.add_argument('--lr',
                        dest='learning_rate',
                        type=float,
                        default=1e-5,
                        help='Initial learning rate')
    parser.add_argument('--lr-mode',
                        dest='lr_mode',
                        default='poly',
                        help='Learning rate decay mode')
    parser.add_argument('--max-epoch',
                        dest='max_epoch',
                        type=int,
                        default=1000,
                        help='MAXITER')
    parser.add_argument('--step',
                        '-s',
                        dest='gradient_step',
                        default=5e-2,
                        help='gradient step')
    parser.add_argument('--cuda',
                        dest='cuda_device',
                        default="0",
                        help='CUDA device ID')
    parser.add_argument('--cpu', dest='num_workers', default=4)
    parser.add_argument('--pretrained-model',
                        dest='pretrained_model',
                        default=None,
                        help="Choose a model to fine tune")
    parser.add_argument('--start_epoch',
                        dest='start_epoch',
                        default=0,
                        type=int,
                        help="Starting epoch")
    parser.add_argument('--bias',
                        action="store_true",
                        help="Flag to learn bias in decoder convnet")
    parser.add_argument('--optimizer',
                        dest='optimizer',
                        default='SGD',
                        type=str,
                        help="Optimizer type: SGD  /  Adam")
    parser.add_argument('--decay',
                        dest='decay',
                        default=5e-5,
                        type=float,
                        help="Weight decay rate")

    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
    cuda = False if args.nocuda else True

    resnet50_url = 'https://download.pytorch.org/models/resnet50-19c8e357.pth'

    cuda = cuda and torch.cuda.is_available()
    device = torch.device("cuda" if cuda else "cpu")
    if cuda:
        current_device = torch.cuda.current_device()
        print("Running on " + torch.cuda.get_device_name(current_device))
    else:
        print("Running on CPU")

    now = datetime.datetime.now()
    date_str = now.strftime("%d-%m-%Y_%H-%M")

    t = []
    torch.manual_seed(329)

    bias = True if args.bias else False

    # build model
    model = SharpNet(ResBlock, [3, 4, 6, 3], [2, 2, 2, 2, 2],
                     use_normals=True if args.normals else False,
                     use_depth=True if args.depth else False,
                     use_boundary=True if args.boundary else False,
                     bias_decoder=bias)

    model_dict = model.state_dict()

    # Load pretrained weights

    resnet_path = 'models/resnet50-19c8e357.pth'

    if not os.path.exists(resnet_path):
        command = 'wget ' + resnet50_url + ' && mkdir models/ && mv resnet50-19c8e357.pth models/'
        os.system(command)

    resnet50_dict = torch.load(resnet_path)

    resnet_dict = {
        k.replace('.', '_img.', 1): v
        for k, v in resnet50_dict.items()
        if k.replace('.', '_img.', 1) in model_dict
    }  # load weights up to pool

    if args.pretrained_model is not None:
        model_path = args.pretrained_model
        tmp_dict = torch.load(model_path)
        if args.depth:
            pretrained_dict = {
                k: v
                for k, v in tmp_dict.items() if k in model_dict
            }
        else:
            pretrained_dict = {
                k: v
                for k, v in tmp_dict.items()
                if (k in model_dict and not k.startswith('depth_decoder'))
            }

    else:
        pretrained_dict = resnet_dict

    try:
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)
        print('Successfully loaded pretrained ResNet weights')
    except:
        print('Could not load the pretrained model weights')
        sys.exit(0)

    model.to(device)
    model.zero_grad()
    model.train()

    freeze_decoders = args.decoder_freeze.split(',')
    freeze_model_decoders(model, freeze_decoders)

    if args.dataset != 'NYU':
        sharpnet_loss = SharpNetLoss(
            lamb=0.5,
            mu=1.0,
            use_depth=True if args.depth else False,
            use_boundary=True if args.boundary else False,
            use_normals=True if args.normals else False,
            use_geo_consensus=True if args.geo_consensus else False)
    else:
        sharpnet_loss = SharpNetLoss(
            lamb=0.5,
            mu=1.0,
            use_depth=True if args.depth else False,
            use_boundary=False,
            use_normals=False,
            use_geo_consensus=True if args.geo_consensus else False)

    if args.optimizer == 'SGD':
        optimizer = SGD(params=get_params(model),
                        lr=args.learning_rate,
                        weight_decay=args.decay,
                        momentum=0.9)
    elif args.optimizer == 'Adam':
        optimizer = Adam(params=get_params(model),
                         lr=args.learning_rate,
                         weight_decay=args.decay)
    else:
        print(
            'Could not configure the optimizer, please select --optimizer Adam or SGD'
        )
        sys.exit(0)

    # TensorBoard Logger
    train_loss_meter = MovingAverageValueMeter(20)
    val_loss_meter = MovingAverageValueMeter(3)
    depth_loss_meter = MovingAverageValueMeter(3) if args.depth else None
    normals_loss_meter = MovingAverageValueMeter(
        3) if args.normals and args.dataset != 'NYU' else None
    grad_loss_meter = MovingAverageValueMeter(3) if args.depth else None
    boundary_loss_meter = MovingAverageValueMeter(
        3) if args.boundary and args.dataset != 'NYU' else None
    consensus_loss_meter = MovingAverageValueMeter(
        3) if args.geo_consensus else None

    exp_name = args.experiment_name if args.experiment_name is not None else ''
    print('Experiment Name: {}'.format(exp_name))

    log_dir = os.path.join('logs', 'Joint', str(exp_name) + '_' + date_str)
    cp_dir = os.path.join('checkpoints', 'Joint',
                          str(exp_name) + '_' + date_str)
    print('Checkpoint Directory: {}'.format(cp_dir))

    train_writer = SummaryWriter(os.path.join(log_dir, 'train'))
    val_writer = SummaryWriter(os.path.join(log_dir, 'val'))

    if not os.path.exists(cp_dir):
        os.makedirs(cp_dir)
    if not os.path.exists(log_dir):
        os.makedirs(os.path.join(log_dir, 'train'))
        os.makedirs(os.path.join(log_dir, 'val'))

    train_dataloader, val_dataloader = get_trainval_splits(args)

    for epoch in range(args.max_epoch):
        if args.optimizer == 'SGD':
            adjust_learning_rate(args.learning_rate, args.lr_mode,
                                 args.gradient_step, args.max_epoch, optimizer,
                                 epoch)

        train_epoch(train_dataloader,
                    val_dataloader,
                    model,
                    sharpnet_loss,
                    optimizer,
                    args.start_epoch + epoch,
                    train_writer,
                    val_writer,
                    train_loss_meter,
                    val_loss_meter,
                    depth_loss_meter,
                    grad_loss_meter,
                    normals_loss_meter,
                    date_str=date_str,
                    model_save_path=cp_dir,
                    args=args,
                    boundary_loss_meter=boundary_loss_meter,
                    consensus_loss_meter=consensus_loss_meter)

        # Save a model
        if epoch % 2 == 0 and epoch > int(0.9 * args.max_epoch):
            torch.save(
                model.state_dict(),
                os.path.join(
                    cp_dir, 'checkpoint_{}_final.pth'.format(args.start_epoch +
                                                             epoch)),
            )
        elif epoch % 10 == 0:
            torch.save(
                model.state_dict(),
                os.path.join(
                    cp_dir, 'checkpoint_{}_final.pth'.format(args.start_epoch +
                                                             epoch)),
            )
    torch.save(
        model.state_dict(),
        os.path.join(
            cp_dir, 'checkpoint_{}_final.pth'.format(args.start_epoch +
                                                     args.max_epoch)),
    )

    return None
コード例 #20
0
def train(config, cuda):
    # Auto-tune cuDNN
    torch.backends.cudnn.benchmark = True

    # Configuration
    device = get_device(cuda)
    CONFIG = Dict(yaml.load(open(config)))

    # Dataset 10k or 164k
    dataset = get_dataset(CONFIG.DATASET.NAME)(
        root=CONFIG.DATASET.ROOT,
        split=CONFIG.DATASET.SPLIT.TRAIN,
        base_size=CONFIG.IMAGE.SIZE.TRAIN.BASE,
        crop_size=CONFIG.IMAGE.SIZE.TRAIN.CROP,
        mean=(CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G, CONFIG.IMAGE.MEAN.R),
        warp=CONFIG.DATASET.WARP_IMAGE,
        scale=CONFIG.DATASET.SCALES,
        flip=True,
    )

    # DataLoader
    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=CONFIG.SOLVER.BATCH_SIZE.TRAIN,
        num_workers=CONFIG.DATALOADER.NUM_WORKERS,
        shuffle=True,
    )
    loader_iter = iter(loader)

    # Model
    model = setup_model(CONFIG.MODEL.INIT_MODEL,
                        CONFIG.DATASET.N_CLASSES,
                        train=True)
    model.to(device)

    # Optimizer
    optimizer = torch.optim.SGD(
        # cf lr_mult and decay_mult in train.prototxt
        params=[
            {
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.SOLVER.LR,
                "weight_decay": 0.0,
            },
        ],
        momentum=CONFIG.SOLVER.MOMENTUM,
    )

    # Learning rate scheduler
    scheduler = PolynomialLR(
        optimizer=optimizer,
        step_size=CONFIG.SOLVER.LR_DECAY,
        iter_max=CONFIG.SOLVER.ITER_MAX,
        power=CONFIG.SOLVER.POLY_POWER,
    )

    # Loss definition
    criterion = nn.CrossEntropyLoss(ignore_index=CONFIG.DATASET.IGNORE_LABEL)
    criterion.to(device)

    # TensorBoard logger
    writer = SummaryWriter(CONFIG.SOLVER.LOG_DIR)
    average_loss = MovingAverageValueMeter(CONFIG.SOLVER.AVERAGE_LOSS)

    # Freeze the batch norm pre-trained on COCO
    model.train()
    model.module.base.freeze_bn()

    for iteration in tqdm(
            range(1, CONFIG.SOLVER.ITER_MAX + 1),
            total=CONFIG.SOLVER.ITER_MAX,
            leave=False,
            dynamic_ncols=True,
    ):

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        loss = 0
        for _ in range(CONFIG.SOLVER.ITER_SIZE):
            try:
                images, labels = next(loader_iter)
            except:
                loader_iter = iter(loader)
                images, labels = next(loader_iter)

            images = images.to(device)
            labels = labels.to(device)

            # Propagate forward
            logits = model(images)

            # Loss
            iter_loss = 0
            for logit in logits:
                # Resize labels for {100%, 75%, 50%, Max} logits
                _, _, H, W = logit.shape
                labels_ = resize_labels(labels, shape=(H, W))
                iter_loss += criterion(logit, labels_)

            # Backpropagate (just compute gradients wrt the loss)
            iter_loss /= CONFIG.SOLVER.ITER_SIZE
            iter_loss.backward()

            loss += float(iter_loss)

        average_loss.add(loss)

        # Update weights with accumulated gradients
        optimizer.step()

        # Update learning rate
        scheduler.step(epoch=iteration)

        # TensorBoard
        if iteration % CONFIG.SOLVER.ITER_TB == 0:
            writer.add_scalar("loss/train", average_loss.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar("lr/group{}".format(i), o["lr"], iteration)
            if False:  # This produces a large log file
                for name, param in model.named_parameters():
                    name = name.replace(".", "/")
                    # Weight/gradient distribution
                    writer.add_histogram(name, param, iteration, bins="auto")
                    if param.requires_grad:
                        writer.add_histogram(name + "/grad",
                                             param.grad,
                                             iteration,
                                             bins="auto")

        # Save a model
        if iteration % CONFIG.SOLVER.ITER_SAVE == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.MODEL.SAVE_DIR,
                         "checkpoint_{}.pth".format(iteration)),
            )

        # To verify progress separately
        torch.save(
            model.module.state_dict(),
            osp.join(CONFIG.MODEL.SAVE_DIR, "checkpoint_current.pth"),
        )

    torch.save(
        model.module.state_dict(),
        osp.join(CONFIG.MODEL.SAVE_DIR, "checkpoint_final.pth"),
    )
コード例 #21
0
ファイル: main.py プロジェクト: bityangke/WSSS2020
def train():
    """Create the model and start the training."""
    # === 1.Configuration
    print(CONFIG_PATH)
    # === select which GPU you want to use
    # === here assume to use 8 GPUs, idx are 0,1,2,3,...,7
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, CONFIG.EXP.GPU_IDX))

    device = get_device(torch.cuda.is_available())
    cudnn.benchmark = True
    comment_init = ""
    writer = SummaryWriter(comment=comment_init)  # Setup loss logger
    # === MovingAverageValueMeter(self,windowsize)
    # === - add(value): 记录value
    # === - reset()
    # === - value() : 返回MA和标准差
    average_loss = MovingAverageValueMeter(CONFIG.SOLVER.AVERAGE_LOSS)
    if not os.path.exists(CONFIG.MODEL.SAVE_PATH):
        os.makedirs(CONFIG.MODEL.SAVE_PATH)
    # Path to save models
    checkpoint_dir = os.path.join(
        CONFIG.EXP.OUTPUT_DIR,  # ./data
        "models",
        CONFIG.MODEL.NAME.lower(),  # DeepLabV2_ResNet101_MSC
        CONFIG.DATASET.SPLIT.TRAIN,  # train_aug
    )
    # === checkpoint_dir: ./data/DeepLabV2_ResNet101_MSC/train_aug
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    print("Checkpoint dst:", checkpoint_dir)

    # === 2.Dataloader ===
    trainloader = data.DataLoader(
        VOCDataSet(
            CONFIG.DATASET.DIRECTORY,
            CONFIG.DATASET.LIST_PATH,
            max_iters=CONFIG.SOLVER.ITER_MAX * CONFIG.SOLVER.BATCH_SIZE.TRAIN,
            crop_size=(CONFIG.IMAGE.SIZE.TRAIN, CONFIG.IMAGE.SIZE.TRAIN),
            scale=CONFIG.DATASET.RANDOM.SCALE,
            mirror=CONFIG.DATASET.RANDOM.MIRROR,
            mean=IMG_MEAN,
            label_path=CONFIG.DATASET.SEG_LABEL),  # for training 
        batch_size=CONFIG.SOLVER.BATCH_SIZE.TRAIN,
        shuffle=True,
        num_workers=CONFIG.DATALOADER.NUM_WORKERS,
        pin_memory=True)

    # 使用iter(dataloader)返回的是一个迭代器,可以使用next访问
    # loader_iter = iter(trainloader)

    # === 3.Create network & weights ===
    print("Model:", CONFIG.MODEL.NAME)

    # model = DeepLabV2_ResNet101_MSC(n_classes=CONFIG.DATASET.N_CLASSES)
    model = DeepLabV2_DRN105_MSC(n_classes=CONFIG.DATASET.N_CLASSES)
    state_dict = torch.load(CONFIG.MODEL.INIT_MODEL)
    # model.base.load_state_dict(state_dict, strict=False)  # to skip ASPP
    print("    Init:", CONFIG.MODEL.INIT_MODEL)
    # === show the skip weight
    for m in model.base.state_dict().keys():
        if m not in state_dict.keys():
            print("    Skip init:", m)

    # === DeepLabv2 = Res101+ASPP
    # === model.base = DeepLabv2
    # === model = MSC(DeepLabv2)
    # model.base.load_state_dict(state_dict,
    #                            strict=False)  # strict=False to skip ASPP
    model = nn.DataParallel(model)  # multi-GPU
    model.to(device)  # put in GPU is available
    # === 4.Loss definition
    criterion = nn.CrossEntropyLoss(ignore_index=CONFIG.DATASET.IGNORE_LABEL)
    criterion.to(device)  # put in GPU is available

    # === 5.optimizer ===
    optimizer = torch.optim.SGD(
        # cf lr_mult and decay_mult in train.prototxt
        params=[
            {
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.SOLVER.LR,
                "weight_decay": 0.0,
            },
        ],
        momentum=CONFIG.SOLVER.MOMENTUM,
    )
    # Learning rate scheduler
    scheduler = PolynomialLR(
        optimizer=optimizer,
        step_size=CONFIG.SOLVER.LR_DECAY,
        iter_max=CONFIG.SOLVER.ITER_MAX,
        power=CONFIG.SOLVER.POLY_POWER,
    )

    time_start = time.time()  # set start time
    # === training iteration ===
    for i_iter, batch in enumerate(trainloader, start=1):
        torch.set_grad_enabled(True)
        model.train()
        model.module.base.freeze_bn()
        optimizer.zero_grad()
        images, labels, _, _ = batch

        logits = model(images.to(device))
        # <<<<<<<<<<<<<<<<<<<<
        # === Loss
        # === logits = [logits] + logits_pyramid + [logits_max]
        iter_loss = 0
        loss = 0
        for logit in logits:
            # Resize labels for {100%, 75%, 50%, Max} logits
            _, _, H, W = logit.shape
            labels_ = resize_labels(labels, size=(H, W))
            iter_loss += criterion(logit, labels_.to(device))
        # iter_loss /= CONFIG.SOLVER.ITER_SIZE
        iter_loss /= 4
        iter_loss.backward()
        loss += float(iter_loss)

        average_loss.add(loss)
        # Update weights with accumulated gradients
        optimizer.step()

        # Update learning rate
        scheduler.step(epoch=i_iter)

        # TensorBoard
        writer.add_scalar("loss", average_loss.value()[0], global_step=i_iter)
        print(
            'iter/max_iter = [{}/{}]  completed, loss = {:4.3} time:{}'.format(
                i_iter, CONFIG.SOLVER.ITER_MAX,
                average_loss.value()[0], show_timing(time_start, time.time())))
        # print('iter = ', i_iter, 'of', args.num_steps, '',
        #       loss.data.cpu().numpy())

        # === save final model
        if i_iter >= CONFIG.SOLVER.ITER_MAX:
            print('save final model as...{}'.format(
                osp.join(CONFIG.MODEL.SAVE_PATH,
                         'VOC12_' + str(CONFIG.SOLVER.ITER_MAX) + '.pth')))
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.MODEL.SAVE_PATH,
                         'VOC12_' + str(CONFIG.SOLVER.ITER_MAX) + '.pth'))
            break
        if i_iter % CONFIG.EXP.EVALUATE_ITER == 0:
            print("Evaluation....")
            evaluate_gpu(model, writer, i_iter)

        # === Save model every 250 iteration==========================
        # because DataParalel will add 'module' in each name of layer.
        # so here use model.module.state_dict()
        # ============================================================
        if i_iter % CONFIG.MODEL.SAVE_EVERY_ITER == 0:
            print('saving model ...')
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.MODEL.SAVE_PATH,
                         'VOC12_{}.pth'.format(i_iter)))
コード例 #22
0
def train(**kwargs):

    # first free all GPU memory
    t.cuda.empty_cache()
    """ Get options """

    opt = Config()
    print_options(opt)

    # overwrite options from commandline
    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)

    device = t.device('cuda') if opt.gpu else t.device('cpu')

    # TODO: visualization
    """ Dataset """

    dataset = create_dataset(opt)
    dataset_size = len(dataset)
    iter_per_epoch = int(dataset_size / opt.batch_size)
    print(f'loaded {dataset_size} images for training')
    """ Create Network Instances """

    model_names = ['netG_x', 'netG_y', 'netD_x', 'netD_y']

    netG_x = ResnetGenerator(opt)
    netG_y = ResnetGenerator(opt)
    # print(netG_x)

    netD_x = NLayerDiscriminator(opt)
    netD_y = NLayerDiscriminator(opt)
    # print(netD_x)

    if opt.gpu:
        netG_x.to(device)
        summary(netG_x, input_size=(3, opt.crop_size, opt.crop_size))
        netG_y.to(device)

        netD_x.to(device)
        summary(netD_x, input_size=(3, opt.crop_size, opt.crop_size))
        netD_y.to(device)
    """ Define optimizer and Loss """
    optimizer_g = t.optim.Adam(itertools.chain(netG_x.parameters(),
                                               netG_y.parameters()),
                               lr=opt.g_lr,
                               betas=(opt.beta1, 0.999))
    optimizer_d = t.optim.Adam(itertools.chain(netD_x.parameters(),
                                               netD_y.parameters()),
                               lr=opt.d_lr,
                               betas=(opt.beta1, 0.999))
    optimizers = [optimizer_g, optimizer_d]
    """
    Forward cycle loss:  lambda_A * ||G_B(G_A(A)) - A|| (Eqn. (2) in the paper)
    Backward cycle loss: lambda_B * ||G_A(G_B(B)) - B|| (Eqn. (2) in the paper)
    Identity loss (optional):
    lambda_identity * (||G_A(B) - B|| * lambda_B + ||G_B(A) - A|| * lambda_A)
    (Sec 5.2 "Photo generation from paintings" in the paper)
    """

    lambda_X = 10.0  # weight for cycle loss (A -> B -> A^)
    lambda_Y = 10.0  # weight for cycle loss (B -> A -> B^)
    lambda_identity = 0.5

    # 定义 GAN 损失,define GAN loss.
    # it's a MSELoss() when initialized, only calculate later during iteration
    # criterionGAN = nn.MSELoss().to(device)
    criterionGAN = GANLoss(gan_mode='lsgan')

    # cycle loss
    criterionCycle = nn.L1Loss()

    # identical loss
    criterionIdt = nn.L1Loss()

    # loss meters
    loss_X_meter = MovingAverageValueMeter(opt.plot_every)
    loss_Y_meter = MovingAverageValueMeter(opt.plot_every)
    score_Dx_real_y = MovingAverageValueMeter(opt.plot_every)
    score_Dx_fake_y = MovingAverageValueMeter(opt.plot_every)

    losses = {}
    scores = {}
    """ use identity mapping. Setting lambda_identity other than 0 has an effect of scaling the weight of the identity mapping loss. For example, if the weight of the identity loss should be 10 times smaller than the weight of the reconstruction loss, please set lambda_identity = 0.1 """

    for epoch in range(opt.max_epochs):
        epoch_start_time = time.time()
        """ calculate losses, gradients, and update network weights;
        called in every iteration
        """

        for i, data in enumerate(dataset):

            real_x = data['A'].to(device)
            real_y = data['B'].to(device)

            ######################
            # X -> Y' -> X^ cycle
            ######################

            optimizer_g.zero_grad()  # set g_x and g_y gradients to zero

            fake_y = netG_x(real_x)  # X -> Y'
            prediction = netD_x(fake_y)  #netD_x provide feedback to netG_x
            loss_G_X = criterionGAN(prediction, True)

            # cycle_consistance
            x_hat = netG_y(fake_y)  # Y' -> X^
            # Forward cycle loss x^ = || G_y(G_x(real_x)) ||
            loss_cycle_X = criterionCycle(x_hat, real_x) * lambda_X

            # identity loss
            if lambda_identity > 0:
                # netG_x should be identity if real_y is fed: ||netG_x(real_y) - real_y||
                idt_x = netG_x(real_y)
                loss_idt_x = criterionIdt(idt_x,
                                          real_y) * lambda_Y * lambda_identity
            else:
                loss_idt_x = 0.

            loss_X = loss_G_X + loss_cycle_X + loss_idt_x
            loss_X.backward(retain_graph=True)
            optimizer_g.step()

            loss_X_meter.add(loss_X.item())

            ######################
            # Y -> X' -> Y^ cycle
            ######################

            optimizer_g.zero_grad()  # set g_x and g_y gradients to zero

            fake_x = netG_y(real_y)  # Y -> X'
            prediction = netD_y(fake_x)
            loss_G_Y = criterionGAN(prediction, True)
            # print(f'loss_G_Y = {round(float(loss_G_Y), 3)}')

            y_hat = netG_x(fake_x)  # Y -> X' -> Y^
            # Forward cycle loss y^ = || G_x(G_y(real_y)) ||
            loss_cycle_Y = criterionCycle(y_hat, real_y) * lambda_Y

            # identity loss
            if lambda_identity > 0:
                # netG_y should be identiy if real_x is fed: ||netG_y(real_x) - real_x||
                idt_y = netG_y(real_x)
                loss_idt_y = criterionIdt(idt_y,
                                          real_x) * lambda_X * lambda_identity
            else:
                loss_idt_y = 0.

            loss_Y = loss_G_Y + loss_cycle_Y + loss_idt_y
            loss_Y.backward(retain_graph=True)
            optimizer_g.step()

            loss_Y_meter.add(loss_Y.item())

            ######################
            # netD_x
            ######################

            optimizer_d.zero_grad()

            # loss_real
            pred_real = netD_x(real_y)
            loss_D_x_real = criterionGAN(pred_real, True)
            score_Dx_real_y.add(float(pred_real.data.mean()))

            # loss_fake
            pred_fake = netD_x(fake_y)
            loss_D_x_fake = criterionGAN(pred_fake, False)
            score_Dx_fake_y.add(float(pred_fake.data.mean()))

            # loss and backward
            loss_D_x = (loss_D_x_real + loss_D_x_fake) * 0.5

            loss_D_x.backward()
            optimizer_d.step()

            ######################
            # netD_y
            ######################

            optimizer_d.zero_grad()

            # loss_real
            pred_real = netD_y(real_x)
            loss_D_y_real = criterionGAN(pred_real, True)

            # loss_fake
            pred_fake = netD_y(fake_x)
            loss_D_y_fake = criterionGAN(pred_fake, False)

            # loss and backward
            loss_D_y = (loss_D_y_real + loss_D_y_fake) * 0.5

            loss_D_y.backward()
            optimizer_d.step()

            # save snapshot
            if i % opt.plot_every == 0:
                filename = opt.name + '_snap_%03d_%05d.png' % (
                    epoch,
                    i,
                )
                test_path = os.path.join(opt.checkpoint_path, filename)
                tv.utils.save_image(fake_y, test_path, normalize=True)
                print(f'{filename} saved.')

                losses['loss_X'] = loss_X_meter.value()[0]
                losses['loss_Y'] = loss_Y_meter.value()[0]
                scores['score_Dx_real_y'] = score_Dx_real_y.value()[0]
                scores['score_Dx_fake_y'] = score_Dx_fake_y.value()[0]
                print(losses)
                print(scores)

            # print(f'iteration {i} finished')

        # save model
        if epoch % opt.save_every == 0 or epoch == opt.max_epochs - 1:
            save_filename = f'{opt.name}_netG_{epoch}.pth'
            save_filepath = os.path.join(opt.model_path, save_filename)
            t.save(netG_x.state_dict(), save_filepath)
            print(f'model saved as {save_filename}')

        # epoch end logs
        epoech_time = int(time.time() - epoch_start_time)

        print_options(opt,
                      epoch_log=True,
                      epoch=epoch,
                      time=epoech_time,
                      losses=losses,
                      scores=scores)
        print()
コード例 #23
0
def main(config, cuda):
    # Configuration
    with open(config) as f:
        CONFIG = yaml.load(f)

    cuda = cuda and torch.cuda.is_available()

    # Dataset
    dataset = get_dataset(CONFIG['DATASET'])(
        root=CONFIG['ROOT'],
        split='train',
        image_size=(CONFIG['IMAGE']['SIZE']['TRAIN'],
                    CONFIG['IMAGE']['SIZE']['TRAIN']),
        scale=True,
        flip=True,
        # preload=True
    )

    # DataLoader
    loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=CONFIG['BATCH_SIZE'],
                                         num_workers=CONFIG['NUM_WORKERS'],
                                         shuffle=True)
    loader_iter = iter(loader)

    # Model
    model = DeepLabV2_ResNet101_MSC(n_classes=CONFIG['N_CLASSES'])
    state_dict = torch.load(CONFIG['INIT_MODEL'])
    model.load_state_dict(state_dict, strict=False)  # Skip "aspp" layer
    if cuda:
        model.cuda()

    # Optimizer
    optimizer = {
        'sgd':
        torch.optim.SGD(
            params=[
                {
                    'params': get_1x_lr_params(model),
                    'lr': float(CONFIG['LR'])
                },
                {
                    'params': get_10x_lr_params(model),
                    'lr': 10 * float(CONFIG['LR'])
                }  # NOQA
            ],
            lr=float(CONFIG['LR']),
            momentum=float(CONFIG['MOMENTUM']),
            weight_decay=float(CONFIG['WEIGHT_DECAY'])),
    }.get(CONFIG['OPTIMIZER'])

    # Loss definition
    criterion = CrossEntropyLoss2d(ignore_index=CONFIG['IGNORE_LABEL'])
    if cuda:
        criterion.cuda()

    # TensorBoard Logger
    writer = SummaryWriter(CONFIG['LOG_DIR'])
    loss_meter = MovingAverageValueMeter(20)

    model.train()
    for iteration in tqdm(range(1, CONFIG['ITER_MAX'] + 1),
                          total=CONFIG['ITER_MAX'],
                          leave=False,
                          dynamic_ncols=True):

        # Polynomial lr decay
        poly_lr_scheduler(optimizer=optimizer,
                          init_lr=float(CONFIG['LR']),
                          iter=iteration - 1,
                          lr_decay_iter=CONFIG['LR_DECAY'],
                          max_iter=CONFIG['ITER_MAX'],
                          power=CONFIG['POLY_POWER'])

        optimizer.zero_grad()

        iter_loss = 0
        for i in range(1, CONFIG['ITER_SIZE'] + 1):
            data, target = next(loader_iter)

            # Image
            data = data.cuda() if cuda else data
            data = Variable(data)

            # Forward propagation
            outputs = model(data)

            # Label
            target = resize_target(target, outputs[0].size(2))
            target = target.cuda() if cuda else target
            target = Variable(target)

            # Aggregate losses for [100%, 75%, 50%, Max]
            loss = 0
            for output in outputs:
                loss += criterion(output, target)

            loss /= CONFIG['ITER_SIZE']
            iter_loss += loss.data[0]
            loss.backward()

            # Reload dataloader
            if ((iteration - 1) * CONFIG['ITER_SIZE'] + i) % len(loader) == 0:
                loader_iter = iter(loader)

        loss_meter.add(iter_loss)

        # Back propagation
        optimizer.step()

        # TensorBoard
        if iteration % CONFIG['ITER_TF'] == 0:
            writer.add_scalar('train_loss', loss_meter.value()[0], iteration)

        # Save a model
        if iteration % CONFIG['ITER_SNAP'] == 0:
            torch.save(
                model.state_dict(),
                osp.join(CONFIG['SAVE_DIR'],
                         'checkpoint_{}.pth.tar'.format(iteration)))  # NOQA
            writer.add_text('log', 'Saved a model', iteration)

    torch.save(model.state_dict(),
               osp.join(CONFIG['SAVE_DIR'], 'checkpoint_final.pth.tar'))
コード例 #24
0
def train(config_path, cuda):
    """
    Training DeepLab by v2 protocol
    """

    # Configuration
    CONFIG = Dict(yaml.load(config_path))
    device = get_device(cuda)
    torch.backends.cudnn.benchmark = True

    # Dataset
    dataset = get_dataset(CONFIG.DATASET.NAME)(
        root=CONFIG.DATASET.ROOT,
        split=CONFIG.DATASET.SPLIT.TRAIN,
        ignore_label=CONFIG.DATASET.IGNORE_LABEL,
        mean_bgr=(CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G, CONFIG.IMAGE.MEAN.R),
        augment=True,
        base_size=CONFIG.IMAGE.SIZE.BASE,
        crop_size=CONFIG.IMAGE.SIZE.TRAIN,
        scales=CONFIG.DATASET.SCALES,
        flip=True,
    )
    print(dataset)

    # DataLoader
    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=CONFIG.SOLVER.BATCH_SIZE.TRAIN,
        num_workers=CONFIG.DATALOADER.NUM_WORKERS,
        shuffle=True,
    )
    loader_iter = iter(loader)

    # Model check
    print("Model:", CONFIG.MODEL.NAME)
    assert (
        CONFIG.MODEL.NAME == "DeepLabV2_ResNet101_MSC"
    ), 'Currently support only "DeepLabV2_ResNet101_MSC"'

    # Model setup
    model = eval(CONFIG.MODEL.NAME)(n_classes=CONFIG.DATASET.N_CLASSES)
    state_dict = torch.load(CONFIG.MODEL.INIT_MODEL)
    print("    Init:", CONFIG.MODEL.INIT_MODEL)
    for m in model.base.state_dict().keys():
        if m not in state_dict.keys():
            print("    Skip init:", m)
    model.base.load_state_dict(state_dict, strict=False)  # to skip ASPP
    model = nn.DataParallel(model)
    model.to(device)

    # Loss definition
    criterion = nn.CrossEntropyLoss(ignore_index=CONFIG.DATASET.IGNORE_LABEL)
    criterion.to(device)

    # Optimizer
    optimizer = torch.optim.SGD(
        # cf lr_mult and decay_mult in train.prototxt
        params=[
            {
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.SOLVER.LR,
                "weight_decay": CONFIG.SOLVER.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.SOLVER.LR,
                "weight_decay": 0.0,
            },
        ],
        momentum=CONFIG.SOLVER.MOMENTUM,
    )

    # Learning rate scheduler
    scheduler = PolynomialLR(
        optimizer=optimizer,
        step_size=CONFIG.SOLVER.LR_DECAY,
        iter_max=CONFIG.SOLVER.ITER_MAX,
        power=CONFIG.SOLVER.POLY_POWER,
    )

    # Setup loss logger
    writer = SummaryWriter(os.path.join(CONFIG.EXP.OUTPUT_DIR, "logs", CONFIG.EXP.ID))
    average_loss = MovingAverageValueMeter(CONFIG.SOLVER.AVERAGE_LOSS)

    # Path to save models
    checkpoint_dir = os.path.join(
        CONFIG.EXP.OUTPUT_DIR,
        "models",
        CONFIG.EXP.ID,
        CONFIG.MODEL.NAME.lower(),
        CONFIG.DATASET.SPLIT.TRAIN,
    )
    makedirs(checkpoint_dir)
    print("Checkpoint dst:", checkpoint_dir)

    # Freeze the batch norm pre-trained on COCO
    model.train()
    model.module.base.freeze_bn()

    for iteration in tqdm(
        range(1, CONFIG.SOLVER.ITER_MAX + 1),
        total=CONFIG.SOLVER.ITER_MAX,
        dynamic_ncols=True,
    ):

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        loss = 0
        for _ in range(CONFIG.SOLVER.ITER_SIZE):
            try:
                _, images, labels = next(loader_iter)
            except:
                loader_iter = iter(loader)
                _, images, labels = next(loader_iter)

            # Propagate forward
            logits = model(images.to(device))

            # Loss
            iter_loss = 0
            for logit in logits:
                # Resize labels for {100%, 75%, 50%, Max} logits
                _, _, H, W = logit.shape
                labels_ = resize_labels(labels, size=(H, W))
                iter_loss += criterion(logit, labels_.to(device))

            # Propagate backward (just compute gradients wrt the loss)
            iter_loss /= CONFIG.SOLVER.ITER_SIZE
            iter_loss.backward()

            loss += float(iter_loss)

        #print(loss)
        average_loss.add(loss)

        # Update weights with accumulated gradients
        optimizer.step()

        # Update learning rate
        scheduler.step(epoch=iteration)

        # TensorBoard
        if iteration % CONFIG.SOLVER.ITER_TB == 0:
            writer.add_scalar("loss/train", average_loss.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar("lr/group_{}".format(i), o["lr"], iteration)
            for i in range(torch.cuda.device_count()):
                writer.add_scalar(
                    "gpu/device_{}/memory_cached".format(i),
                    torch.cuda.memory_cached(i) / 1024 ** 3,
                    iteration,
                )

            if False:
                for name, param in model.module.base.named_parameters():
                    name = name.replace(".", "/")
                    # Weight/gradient distribution
                    writer.add_histogram(name, param, iteration, bins="auto")
                    if param.requires_grad:
                        writer.add_histogram(
                            name + "/grad", param.grad, iteration, bins="auto"
                        )

        # Save a model
        if iteration % CONFIG.SOLVER.ITER_SAVE == 0:
            torch.save(
                model.module.state_dict(),
                os.path.join(checkpoint_dir, "checkpoint_{}.pth".format(iteration)),
            )

    torch.save(
        model.module.state_dict(), os.path.join(checkpoint_dir, "checkpoint_final.pth")
    )
コード例 #25
0
def main(config, cuda):
    cuda = cuda and torch.cuda.is_available()
    device = torch.device("cuda" if cuda else "cpu")

    if cuda:
        current_device = torch.cuda.current_device()
        print("Running on", torch.cuda.get_device_name(current_device))
    else:
        print("Running on CPU")

    # Configuration
    CONFIG = Dict(yaml.load(open(config)))

    dataset = get_dataset(CONFIG.DATASET)(
        data_path=CONFIG.ROOT,
        crop_size=256,
        scale=(0.6, 0.8, 1., 1.2, 1.4),
        rotation=15,
        flip=True,
        mean=(CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G, CONFIG.IMAGE.MEAN.R),
    )
    """
    # Dataset 10k or 164k
    dataset = get_dataset(CONFIG.DATASET)(
        root=CONFIG.ROOT,
        split=CONFIG.SPLIT.TRAIN,
        base_size=513,
        crop_size=CONFIG.IMAGE.SIZE.TRAIN,
        mean=(CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G, CONFIG.IMAGE.MEAN.R),
        warp=CONFIG.WARP_IMAGE,
        scale=(0.5, 0.75, 1.0, 1.25, 1.5),
        flip=True,
    )
    """

    # DataLoader
    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=CONFIG.BATCH_SIZE.TRAIN,
        num_workers=CONFIG.NUM_WORKERS,
        shuffle=True,
    )
    loader_iter = iter(loader)

    # Model
    model = DeepLabV3Plus_ResNet101_MSC(n_classes=CONFIG.N_CLASSES)
    state_dict = torch.load(CONFIG.INIT_MODEL)
    model.load_state_dict(state_dict, strict=False)  # Skip "aspp" layer
    model = nn.DataParallel(model)
    model.to(device)

    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name)

    # Optimizer
    optimizer = torch.optim.Adam(
        params=get_params(model.module),
        lr=CONFIG.LR,
        weight_decay=CONFIG.WEIGHT_DECAY,
    )
    """
    # Optimizer
    optimizer = torch.optim.SGD(
        # cf lr_mult and decay_mult in train.prototxt
        params=[
            {
                "params": get_params(model.module, key="1x"),
                "lr": CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="10x"),
                "lr": 10 * CONFIG.LR,
                "weight_decay": CONFIG.WEIGHT_DECAY,
            },
            {
                "params": get_params(model.module, key="20x"),
                "lr": 20 * CONFIG.LR,
                "weight_decay": 0.0,
            },
        ],
        momentum=CONFIG.MOMENTUM,
    )
    """
    # Loss definition
    criterion = CrossEntropyLoss2d(ignore_index=CONFIG.IGNORE_LABEL)
    criterion.to(device)
    max_pooling_loss = MaxPoolingLoss(ratio=0.3, p=1.7, reduce=True)

    # TensorBoard Logger
    writer = SummaryWriter(CONFIG.LOG_DIR)
    loss_meter = MovingAverageValueMeter(20)

    model.train()
    model.module.scale.freeze_bn()

    for iteration in tqdm(
            range(1, CONFIG.ITER_MAX + 1),
            total=CONFIG.ITER_MAX,
            leave=False,
            dynamic_ncols=True,
    ):
        """
        # Set a learning rate
        poly_lr_scheduler(
            optimizer=optimizer,
            init_lr=CONFIG.LR,
            iter=iteration - 1,
            lr_decay_iter=CONFIG.LR_DECAY,
            max_iter=CONFIG.ITER_MAX,
            power=CONFIG.POLY_POWER,
        )
        """

        # Clear gradients (ready to accumulate)
        optimizer.zero_grad()

        iter_loss = 0
        for i in range(1, CONFIG.ITER_SIZE + 1):
            try:
                images, labels = next(loader_iter)
            except:
                loader_iter = iter(loader)
                images, labels = next(loader_iter)

            images = images.to(device)
            labels = labels.to(device).unsqueeze(1).float()

            # Propagate forward
            logits = model(images)

            # Loss
            loss = 0
            for logit in logits:
                # Resize labels for {100%, 75%, 50%, Max} logits
                labels_ = F.interpolate(labels,
                                        logit.shape[2:],
                                        mode="nearest")
                labels_ = labels_.squeeze(1).long()
                # Compute NLL and MPL
                nll_loss = criterion(logit, labels_)
                # loss += nll_loss
                loss += max_pooling_loss(nll_loss)

            # Backpropagate (just compute gradients wrt the loss)
            loss /= float(CONFIG.ITER_SIZE)
            loss.backward()

            iter_loss += float(loss)

        loss_meter.add(iter_loss)

        # Update weights with accumulated gradients
        optimizer.step()

        if iteration % CONFIG.ITER_TB == 0:
            writer.add_scalar("train_loss", loss_meter.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                writer.add_scalar("train_lr_group{}".format(i), o["lr"],
                                  iteration)

            gt_viz, images_viz, predicts_viz = make_vizs(
                images, labels_, logits,
                (CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G,
                 CONFIG.IMAGE.MEAN.R))
            writer.add_image("gt/images", torch.from_numpy(images_viz[0]),
                             iteration)
            writer.add_image("gt/labels", torch.from_numpy(gt_viz[0]),
                             iteration)
            for i, predict_viz in enumerate(predicts_viz):
                writer.add_image("predict/" + str(i),
                                 torch.from_numpy(predict_viz[0]), iteration)

            if False:  # This produces a large log file
                for name, param in model.named_parameters():
                    name = name.replace(".", "/")
                    writer.add_histogram(name, param, iteration, bins="auto")
                    if param.requires_grad:
                        writer.add_histogram(name + "/grad",
                                             param.grad,
                                             iteration,
                                             bins="auto")

        # Save a model
        if iteration % CONFIG.ITER_SAVE == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR,
                         "checkpoint_{}.pth".format(iteration)),
            )

        # Save a model (short term)
        if iteration % 100 == 0:
            torch.save(
                model.module.state_dict(),
                osp.join(CONFIG.SAVE_DIR, "checkpoint_current.pth"),
            )

    torch.save(model.module.state_dict(),
               osp.join(CONFIG.SAVE_DIR, "checkpoint_final.pth"))
コード例 #26
0
class DataManager:
    def __init__(self, imagedataset, datadir, inputmix, embedding, device):
        self.imagedataset = imagedataset
        self.datadir = datadir
        self.inputmix = inputmix

        self.embedding = embedding
        self.device = device

    def generateSavepath(self, experimentid):
        # name the savedir, might add logs/ before the datetime for clarity
        if experimentid is None:
            savedir = time.strftime('%Y%m%d%H%M%S')
        else:
            savedir = experimentid

        self.savepath = os.path.join('logs', self.imagedataset, savedir)
        return self.savepath

    # getter method
    def get_savepath(self):
        return self.savepath

    def generateTB(self, period):
        self.writer = SummaryWriter(self.savepath + '/runs')
        self.loss_meter = MovingAverageValueMeter(20)
        self.tb = period

    def get_writer(self):
        return self.writer

    def createDirectory(self, values, config, args):
        try:
            os.makedirs(self.savepath)
            # print("Log dir:", savepath)
        except:
            pass

        # now join the path in save_screenshot:
        if os.path.exists(self.savepath + '/libs'):
            shutil.rmtree(self.savepath + '/libs')
        shutil.copytree('./libs/', self.savepath + '/libs')
        shutil.copy2(osp.abspath(inspect.stack()[0][1]), self.savepath)
        shutil.copy2(config, self.savepath)
        args_dict = {}
        for a in args:
            args_dict[a] = values[a]
        with open(self.savepath + '/args.json', 'w') as fp:
            json.dump(args_dict, fp)

    def loadClasses(self, bkg):
        self.seen_classes = np.load(
            self.datadir + '/split/seen_cls.npy')  #only the seen classes

        if bkg:
            self.seen_classes = np.asarray(np.concatenate(
                [np.array([0]), self.seen_classes]),
                                           dtype=int)  #seen classes + bkg

        self.novel_classes = np.load(self.datadir + '/split/novel_cls.npy')
        self.all_labels = np.genfromtxt(self.datadir + '/labels_2.txt',
                                        delimiter='\t',
                                        usecols=1,
                                        dtype='str')

        self.seen_classes = np.asarray(np.concatenate(
            [self.seen_classes,
             np.load(self.datadir + '/split/val_cls.npy')]),
                                       dtype=int)
        self.seen_novel_classes = np.concatenate(
            [self.seen_classes, self.novel_classes])
        self.to_ignore_classes = self.novel_classes

        if self.inputmix == 'seen':
            self.visible_classes = self.seen_classes
        else:
            self.visible_classes = self.seen_novel_classes

        print("Seen classes: ")
        print(self.seen_classes)
        print("all labels: ")
        print(self.all_labels)

        return self.seen_classes, self.novel_classes, self.seen_novel_classes, self.to_ignore_classes, self.visible_classes, self.all_labels

    def get_Classes(self):
        return self.seen_classes, self.novel_classes, self.seen_novel_classes, self.to_ignore_classes, self.visible_classes, self.all_labels, self.visibility_mask

    def loadData(self):

        self.train = np.load(self.datadir + '/split/train_list.npy')

        self.novelset = []
        self.seenset = []

        if self.inputmix == 'seen':
            self.seenset = range(self.train.shape[0])
        else:
            print("inputmix is not seen")
            exit()

        return self.train, self.seenset, self.novelset

    def get_data(self):
        return self.train, self.seenset, self.novelset

    def loadDatasets(self, CONFIG, bs):
        # Sampler
        sampler = MyDistributedSampler(
            self.seenset,
            self.novelset,
            num_replicas=torch.distributed.get_world_size(),
            rank=torch.distributed.get_rank())

        self.dataset = get_dataset(CONFIG.DATASET)(
            train=self.train,
            test=None,
            root=CONFIG.ROOT,
            transform=None,
            split=CONFIG.SPLIT.TRAIN,
            base_size=513,
            crop_size=CONFIG.IMAGE.SIZE.TRAIN,
            mean=(CONFIG.IMAGE.MEAN.B, CONFIG.IMAGE.MEAN.G,
                  CONFIG.IMAGE.MEAN.R),
            warp=CONFIG.WARP_IMAGE,
            scale=(0.5, 1.5),
            flip=True,
            visibility_mask=self.visibility_mask,
        )
        random.seed(42)
        # DataLoader
        self.loader = torch.utils.data.DataLoader(
            dataset=self.dataset,
            batch_size=bs,
            num_workers=CONFIG.NUM_WORKERS,
            # num_workers = 1,
            sampler=sampler,
            pin_memory=True)
        return self.dataset, self.loader

    def get_datasets(self):

        return self.dataset, self.loader

    def loadClassEmbs(self):
        # Word embeddings
        if self.embedding == 'word2vec':
            self.class_emb = pickle.load(
                open(self.datadir + '/word_vectors/word2vec.pkl', "rb"))
        elif self.embedding == 'fasttext':
            self.class_emb = pickle.load(
                open(self.datadir + '/word_vectors/fasttext.pkl', "rb"))
        elif self.embedding == 'fastnvec':
            self.class_emb = np.concatenate([
                pickle.load(
                    open(self.datadir + '/word_vectors/fasttext.pkl', "rb")),
                pickle.load(
                    open(self.datadir + '/word_vectors/word2vec.pkl', "rb"))
            ],
                                            axis=1)
        else:
            print("invalid emb ", self.embedding)
            exit()
        self.class_emb = F.normalize(torch.tensor(self.class_emb), p=2,
                                     dim=1).to(self.device)
        self.seen_class_emb = self.class_emb[self.seen_classes]
        self.to_ignore_class_emb = self.class_emb[self.to_ignore_classes]

        return self.class_emb, self.to_ignore_class_emb, self.seen_class_emb

    def get_clsEmbs(self):
        return self.class_emb, self.to_ignore_class_emb, self.seen_class_emb

    def loadClsMaps(self, bkg):

        self.seen_map = np.array([-1] * 256)
        for i, n in enumerate(list(self.seen_classes)):
            self.seen_map[n] = i

        self.all_map = np.array([-1] * 256)
        for i, n in enumerate(list(self.seen_classes)):
            self.all_map[n] = i
        for i, n in enumerate(self.to_ignore_classes, len(self.seen_classes)):
            self.all_map[n] = i

        self.inverse_map = np.array([-1] * 256)
        for i, n in enumerate(self.all_map):
            self.inverse_map[n] = i

        if bkg:
            for i, n in enumerate(self.to_ignore_classes):
                self.seen_map[n] = 0

        # viene usata per sapere quali predizioni sono unseen e quali no nel calcolo della percentuale
        self.cls_map_seen = np.array([0] * 256)
        for i, n in enumerate(self.to_ignore_classes):
            self.cls_map_seen[n] = 1

        self.cls_map = None
        self.cls_map = np.array([255] * 256)
        for i, n in enumerate(self.seen_classes):
            self.cls_map[n] = i

        # VISIBILITY MASK
        self.visibility_mask = {}
        self.visibility_mask[0] = self.seen_map.copy()

        print(self.visibility_mask[0])
        return self.seen_map, self.cls_map_seen, self.cls_map

    def getClsMaps(self):
        return self.seen_map, self.cls_map_seen, self.cls_map, self.inverse_map

    def savePerIteration(self, iter_loss, optimizer, model, iteration, save):

        self.loss_meter.add(iter_loss)
        # TensorBoard
        if iteration % self.tb == 0:
            self.writer.add_scalar("train_loss",
                                   self.loss_meter.value()[0], iteration)
            for i, o in enumerate(optimizer.param_groups):
                self.writer.add_scalar("train_lr_group{}".format(i), o["lr"],
                                       iteration)

        # Save a model (short term)
        if iteration > 0 and iteration % save == 0:
            print(
                "\nIteration: {} \nSaving (short term) model (iteration,state_dict,optimizer) ...\n "
                .format(iteration))
            with open(self.savepath + '/iteration.json', 'w') as fp:
                json.dump({'iteration': iteration}, fp)
            name = "checkpoint_current.pth.tar"
            if "voc" in self.savepath or iteration % 5000 == 0:
                name = "checkpoint_{}.pth.tar".format(iteration)
            torch.save(
                {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                }, osp.join(self.savepath, name))

    def saveFinal(self, optimizer, model):

        torch.save(
            {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, osp.join(self.savepath, "checkpoint_final.pth.tar"))
コード例 #27
0
ファイル: train.py プロジェクト: HAL-42/DeepLabV2YQ
        ],
        momentum=CONFIG.SOLVER.MOMENTUM,
    )

    # Learning rate scheduler
    scheduler = PolynomialLR(
        optimizer=optimizer,
        step_size=CONFIG.SOLVER.LR_DECAY,
        iter_max=CONFIG.SOLVER.ITER_MAX,
        power=CONFIG.SOLVER.POLY_POWER,
    )

    # Setup loss logger
    writer = SummaryWriter(os.path.join("experiment", CONFIG.EXP_ID,
                                        "summary"))
    average_loss = MovingAverageValueMeter(CONFIG.SOLVER.AVERAGE_LOSS)

    # Path to save models
    checkpoint_dir = os.path.join("experiment", CONFIG.EXP_ID, "checkpoints")
    makedirs(checkpoint_dir)
    print("Checkpoint dst:", checkpoint_dir)

    # Random Dropout
    model.train()

    for iteration in tqdm(
            range(1, CONFIG.SOLVER.ITER_MAX + 1),
            total=CONFIG.SOLVER.ITER_MAX,
            dynamic_ncols=True,
    ):