예제 #1
0
def train(cfg):
    train_ds = COCODataset(cfg.train_imgs_path,
                           cfg.train_anno_path,
                           resize_size=cfg.resize_size)
    train_dl = DataLoader(train_ds,
                          batch_size=cfg.batch_size,
                          shuffle=False,
                          num_workers=cfg.num_workers,
                          collate_fn=train_ds.collate_fn)

    if cfg.eval:
        eval_ds = COCODataset(cfg.eval_imgs_path,
                              cfg.eval_anno_path,
                              resize_size=cfg.resize_size)
        eval_dl = DataLoader(eval_ds,
                             batch_size=max(cfg.batch_size // 2, 1),
                             num_workers=cfg.num_workers,
                             collate_fn=eval_ds.collate_fn)
    else:
        eval_dl = None

    model = CenterNet(cfg)
    if cfg.gpu:
        model = model.cuda()

    loss_func = Loss(cfg)

    trainer = Trainer(cfg, model, loss_func, train_dl, eval_dl)
    trainer.train()
예제 #2
0
def main():
    model = Model()

    #pretrained_dict = torch.load('weights/resnet152-b121ed2d.pth', map_location=lambda storage, loc: storage)
    pretrained_dict = torch.load('weights/resnet152-b121ed2d.pth')
    model_dict = model.state_dict()
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items() if k in model_dict
    }
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)

    data = VRIC_data.Data()
    loss = Loss()
    main = Main(model, loss, data)
    #main.evaluate_ai()
    if opt.mode == 'train':
        for epoch in range(1, opt.epoch + 1):
            print('\nepoch', epoch)
            outputs = main.train()
            vis.line(Y=np.column_stack(
                (outputs[0], outputs[1], outputs[2], outputs[3])),
                     X=np.column_stack((epoch, epoch, epoch, epoch)),
                     win='Learning curve',
                     update='append',
                     opts={
                         'title': 'Learning curve',
                     })
            vis.line(Y=np.column_stack((outputs[4], outputs[5], outputs[6],
                                        outputs[7], outputs[8], outputs[9])),
                     X=np.column_stack(
                         (epoch, epoch, epoch, epoch, epoch, epoch)),
                     win='accuracy curve',
                     update='append',
                     opts={
                         'title': 'accuracy curve',
                     })
            if epoch % 1 == 0:
                print('\nstart evaluate')
                os.makedirs('weights/AI_mgn', exist_ok=True)
                torch.save(model.state_dict(),
                           ('weights/AI_mgn/modelv5_{}.pth'.format(epoch)))

    if opt.mode == 'evaluate':
        print('start evaluate')
        model.load_state_dict(torch.load(opt.weight))
        main.evaluate()

    if opt.mode == 'aicity':
        print('start output txt files')
        model.load_state_dict(torch.load(opt.weight))
        main.AICity_evaluate()
예제 #3
0
def train(cfg):
    train_ds = VOCDataset(cfg.root, mode=cfg.split, resize_size=cfg.resize_size)
    train_dl = DataLoader(train_ds, batch_size=1, shuffle=True,
                          num_workers=cfg.num_workers, collate_fn=train_ds.collate_fn, pin_memory=True)

    model = CenterNet(cfg)
    if cfg.gpu:
        model = model.cuda()
    loss_func = Loss(cfg)

    epoch = 100
    cfg.max_iter = len(train_dl) * epoch
    cfg.steps = (int(cfg.max_iter * 0.6), int(cfg.max_iter * 0.8))

    trainer = Trainer(cfg, model, loss_func, train_dl, None)
    trainer.train()
예제 #4
0
def train(args):
    start_t = time.time()
    params = get_train_options()
    params["exp_name"] = args.exp_name
    params["patch_num_point"] = 1024
    params["batch_size"] = args.batch_size
    params['use_gan'] = args.use_gan

    if args.debug:
        params["nepoch"] = 2
        params["model_save_interval"] = 3
        params['model_vis_interval'] = 3

    log_dir = os.path.join(params["model_save_dir"], args.exp_name)
    if os.path.exists(log_dir) == False:
        os.makedirs(log_dir)
    tb_logger = Logger(log_dir)

    trainloader = PUNET_Dataset(h5_file_path=params["dataset_dir"])
    # print(params["dataset_dir"])
    num_workers = 4
    train_data_loader = data.DataLoader(dataset=trainloader,
                                        batch_size=params["batch_size"],
                                        shuffle=True,
                                        num_workers=num_workers,
                                        pin_memory=True,
                                        drop_last=True)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    G_model = Generator_recon(params)
    G_model.apply(xavier_init)
    G_model = torch.nn.DataParallel(G_model).to(device)
    D_model = torch.nn.DataParallel(Discriminator(params,
                                                  in_channels=3)).to(device)

    G_model.train()
    D_model.train()

    optimizer_D = Adam(D_model.parameters(),
                       lr=params["lr_D"],
                       betas=(0.9, 0.999))
    optimizer_G = Adam(G_model.parameters(),
                       lr=params["lr_G"],
                       betas=(0.9, 0.999))

    D_scheduler = MultiStepLR(optimizer_D, [50, 80], gamma=0.2)
    G_scheduler = MultiStepLR(optimizer_G, [50, 80], gamma=0.2)

    Loss_fn = Loss()

    print("preparation time is %fs" % (time.time() - start_t))
    iter = 0
    for e in range(params["nepoch"]):
        D_scheduler.step()
        G_scheduler.step()
        for batch_id, (input_data, gt_data,
                       radius_data) in enumerate(train_data_loader):
            optimizer_G.zero_grad()
            optimizer_D.zero_grad()

            input_data = input_data[:, :, 0:3].permute(0, 2, 1).float().cuda()
            gt_data = gt_data[:, :, 0:3].permute(0, 2, 1).float().cuda()

            start_t_batch = time.time()
            output_point_cloud = G_model(input_data)

            emd_loss = Loss_fn.get_emd_loss(
                output_point_cloud.permute(0, 2, 1),
                input_data.permute(0, 2, 1))

            total_G_loss = emd_loss
            total_G_loss.backward()
            optimizer_G.step()

            current_lr_D = optimizer_D.state_dict()['param_groups'][0]['lr']
            current_lr_G = optimizer_G.state_dict()['param_groups'][0]['lr']

            tb_logger.scalar_summary('emd_loss', emd_loss.item(), iter)
            tb_logger.scalar_summary('lr_D', current_lr_D, iter)
            tb_logger.scalar_summary('lr_G', current_lr_G, iter)

            msg = "{:0>8},{}:{}, [{}/{}], {}: {},{}:{}".format(
                str(datetime.timedelta(seconds=round(time.time() - start_t))),
                "epoch", e, batch_id + 1, len(train_data_loader),
                "total_G_loss", total_G_loss.item(), "iter time",
                (time.time() - start_t_batch))
            print(msg)

            if iter % params['model_save_interval'] == 0 and iter > 0:
                model_save_dir = os.path.join(params['model_save_dir'],
                                              params['exp_name'])
                if os.path.exists(model_save_dir) == False:
                    os.makedirs(model_save_dir)
                D_ckpt_model_filename = "D_iter_%d.pth" % (iter)
                G_ckpt_model_filename = "G_iter_%d.pth" % (iter)
                D_model_save_path = os.path.join(model_save_dir,
                                                 D_ckpt_model_filename)
                G_model_save_path = os.path.join(model_save_dir,
                                                 G_ckpt_model_filename)
                torch.save(D_model.module.state_dict(), D_model_save_path)
                torch.save(G_model.module.state_dict(), G_model_save_path)

            if iter % params['model_vis_interval'] == 0 and iter > 0:
                np_pcd = output_point_cloud.permute(
                    0, 2, 1)[0].detach().cpu().numpy()
                # print(np_pcd.shape)
                img = (np.array(visualize_point_cloud(np_pcd)) * 255).astype(
                    np.uint8)
                tb_logger.image_summary("images", img[np.newaxis, :], iter)

                gt_pcd = gt_data.permute(0, 2, 1)[0].detach().cpu().numpy()
                # print(gt_pcd.shape)
                gt_img = (np.array(visualize_point_cloud(gt_pcd)) *
                          255).astype(np.uint8)
                tb_logger.image_summary("gt", gt_img[np.newaxis, :], iter)

                input_pcd = input_data.permute(0, 2,
                                               1)[0].detach().cpu().numpy()
                input_img = (np.array(visualize_point_cloud(input_pcd)) *
                             255).astype(np.uint8)
                tb_logger.image_summary("input", input_img[np.newaxis, :],
                                        iter)
            iter += 1
예제 #5
0
def train(args):
    start_t = time.time()
    params = get_train_options()
    params["exp_name"] = args.exp_name
    params["patch_num_point"] = 1024
    params["batch_size"] = args.batch_size
    params['use_gan'] = args.use_gan

    if args.debug:
        params["nepoch"] = 2
        params["model_save_interval"] = 3
        params['model_vis_interval'] = 3

    log_dir = os.path.join(params["model_save_dir"], args.exp_name)
    if os.path.exists(log_dir) == False:
        os.makedirs(log_dir)
    tb_logger = Logger(log_dir)

    trainloader = PUNET_Dataset(h5_file_path=params["dataset_dir"],
                                split_dir=params['train_split'])
    #print(params["dataset_dir"])
    num_workers = 4
    train_data_loader = data.DataLoader(dataset=trainloader,
                                        batch_size=params["batch_size"],
                                        shuffle=True,
                                        num_workers=num_workers,
                                        pin_memory=True,
                                        drop_last=True)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    G_model = Generator(params)
    G_model.apply(xavier_init)
    G_model = torch.nn.DataParallel(G_model).to(device)
    D_model = Discriminator(params, in_channels=3)
    D_model.apply(xavier_init)
    D_model = torch.nn.DataParallel(D_model).to(device)

    G_model.train()
    D_model.train()

    optimizer_D = Adam(D_model.parameters(),
                       lr=params["lr_D"],
                       betas=(0.9, 0.999))
    optimizer_G = Adam(G_model.parameters(),
                       lr=params["lr_G"],
                       betas=(0.9, 0.999))

    D_scheduler = MultiStepLR(optimizer_D, [50, 80], gamma=0.2)
    G_scheduler = MultiStepLR(optimizer_G, [50, 80], gamma=0.2)

    Loss_fn = Loss()

    print("preparation time is %fs" % (time.time() - start_t))
    iter = 0
    for e in range(params["nepoch"]):
        D_scheduler.step()
        G_scheduler.step()
        for batch_id, (input_data, gt_data,
                       radius_data) in enumerate(train_data_loader):
            optimizer_G.zero_grad()
            optimizer_D.zero_grad()

            input_data = input_data[:, :, 0:3].permute(0, 2, 1).float().cuda()
            gt_data = gt_data[:, :, 0:3].permute(0, 2, 1).float().cuda()

            start_t_batch = time.time()
            output_point_cloud = G_model(input_data)

            repulsion_loss = Loss_fn.get_repulsion_loss(
                output_point_cloud.permute(0, 2, 1))
            uniform_loss = Loss_fn.get_uniform_loss(
                output_point_cloud.permute(0, 2, 1))
            #print(output_point_cloud.shape,gt_data.shape)
            emd_loss = Loss_fn.get_emd_loss(
                output_point_cloud.permute(0, 2, 1), gt_data.permute(0, 2, 1))

            if params['use_gan'] == True:
                fake_pred = D_model(output_point_cloud.detach())
                d_loss_fake = Loss_fn.get_discriminator_loss_single(
                    fake_pred, label=False)
                d_loss_fake.backward()
                optimizer_D.step()

                real_pred = D_model(gt_data.detach())
                d_loss_real = Loss_fn.get_discriminator_loss_single(real_pred,
                                                                    label=True)
                d_loss_real.backward()
                optimizer_D.step()

                d_loss = d_loss_real + d_loss_fake

                fake_pred = D_model(output_point_cloud)
                g_loss = Loss_fn.get_generator_loss(fake_pred)

                #print(repulsion_loss,uniform_loss,emd_loss)
                total_G_loss=params['uniform_w']*uniform_loss+params['emd_w']*emd_loss+ \
                repulsion_loss*params['repulsion_w']+ g_loss*params['gan_w']
            else:
                #total_G_loss = params['uniform_w'] * uniform_loss + params['emd_w'] * emd_loss + \
                #               repulsion_loss * params['repulsion_w']
                total_G_loss=params['emd_w'] * emd_loss + \
                               repulsion_loss * params['repulsion_w']

            #total_G_loss=emd_loss
            total_G_loss.backward()
            optimizer_G.step()

            current_lr_D = optimizer_D.state_dict()['param_groups'][0]['lr']
            current_lr_G = optimizer_G.state_dict()['param_groups'][0]['lr']

            tb_logger.scalar_summary('repulsion_loss', repulsion_loss.item(),
                                     iter)
            tb_logger.scalar_summary('uniform_loss', uniform_loss.item(), iter)
            tb_logger.scalar_summary('emd_loss', emd_loss.item(), iter)
            if params['use_gan'] == True:
                tb_logger.scalar_summary('d_loss', d_loss.item(), iter)
                tb_logger.scalar_summary('g_loss', g_loss.item(), iter)
            tb_logger.scalar_summary('lr_D', current_lr_D, iter)
            tb_logger.scalar_summary('lr_G', current_lr_G, iter)

            msg = "{:0>8},{}:{}, [{}/{}], {}: {},{}:{}".format(
                str(datetime.timedelta(seconds=round(time.time() - start_t))),
                "epoch", e, batch_id + 1, len(train_data_loader),
                "total_G_loss", total_G_loss.item(), "iter time",
                (time.time() - start_t_batch))
            print(msg)

            iter += 1
        if (e + 1) % params['model_save_interval'] == 0 and e > 0:
            model_save_dir = os.path.join(params['model_save_dir'],
                                          params['exp_name'])
            if os.path.exists(model_save_dir) == False:
                os.makedirs(model_save_dir)
            D_ckpt_model_filename = "D_iter_%d.pth" % (e)
            G_ckpt_model_filename = "G_iter_%d.pth" % (e)
            D_model_save_path = os.path.join(model_save_dir,
                                             D_ckpt_model_filename)
            G_model_save_path = os.path.join(model_save_dir,
                                             G_ckpt_model_filename)
            torch.save(D_model.module.state_dict(), D_model_save_path)
            torch.save(G_model.module.state_dict(), G_model_save_path)
예제 #6
0
def train(args):
    start_t = time.time()
    params = get_train_options()
    params["exp_name"] = args.exp_name
    params["patch_num_point"] = 256
    params["batch_size"] = args.batch_size

    if args.debug:
        params["nepoch"] = 2
        params["model_save_interval"] = 3
        params['model_vis_interval'] = 3

    log_dir = os.path.join(params["model_save_dir"], args.exp_name)
    if os.path.exists(log_dir) == False:
        os.makedirs(log_dir)
    tb_logger = Logger(log_dir)

    #trainloader=PUNET_Dataset(h5_file_path=params["dataset_dir"],split_dir=params['train_split'])
    trainloader = PUGAN_Dataset(h5_file_path=params["dataset_dir"], npoint=256)
    num_workers = 4
    train_data_loader = data.DataLoader(dataset=trainloader,
                                        batch_size=params["batch_size"],
                                        shuffle=True,
                                        num_workers=num_workers,
                                        pin_memory=True,
                                        drop_last=True)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    ##########################################
    # Initialize generator and discriminator #
    ##########################################
    G_AB = Generator(params)
    G_AB.apply(xavier_init)
    G_AB = torch.nn.DataParallel(G_AB).to(device)

    G_BA = Downsampler(params)
    G_BA.apply(xavier_init)
    G_BA = torch.nn.DataParallel(G_BA).to(device)

    D_A = Discriminator(params, in_channels=3)
    D_A.apply(xavier_init)
    D_A = torch.nn.DataParallel(D_A).to(device)

    D_B = Discriminator(params, in_channels=3)
    D_B.apply(xavier_init)
    D_B = torch.nn.DataParallel(D_B).to(device)

    ########################################
    #Optimizers and Learning Rate scheduler#
    ########################################

    optimizer_D_A = Adam(D_A.parameters(),
                         lr=params["lr_D_A"],
                         betas=(0.9, 0.999))
    optimizer_D_B = Adam(D_B.parameters(),
                         lr=params["lr_D_B"],
                         betas=(0.9, 0.999))

    optimizer_G_AB = Adam(G_AB.parameters(),
                          lr=params["lr_G_AB"],
                          betas=(0.9, 0.999))
    optimizer_G_BA = Adam(G_BA.parameters(),
                          lr=params["lr_G_BA"],
                          betas=(0.9, 0.999))

    D_A_scheduler = MultiStepLR(optimizer_D_A, [50, 80], gamma=0.2)
    G_AB_scheduler = MultiStepLR(optimizer_G_AB, [50, 80], gamma=0.2)
    D_B_scheduler = MultiStepLR(optimizer_D_A, [50, 80], gamma=0.2)
    G_BA_scheduler = MultiStepLR(optimizer_G_AB, [50, 80], gamma=0.2)

    Loss_fn = Loss()

    print("preparation time is %fs" % (time.time() - start_t))
    iter = 0
    for e in range(params["nepoch"]):

        for batch_id, (input_data, gt_data,
                       radius_data) in enumerate(train_data_loader):

            G_AB.train()
            G_BA.train()
            D_A.train()
            D_B.train()

            optimizer_G_AB.zero_grad()
            optimizer_D_A.zero_grad()
            optimizer_G_BA.zero_grad()
            optimizer_D_B.zero_grad()

            input_data = input_data[:, :, 0:3].permute(0, 2, 1).float().cuda()
            gt_data = gt_data[:, :, 0:3].permute(0, 2, 1).float().cuda()
            start_t_batch = time.time()

            output_point_cloud_high = G_AB(input_data)
            output_point_cloud_low = G_BA(gt_data)

            #####################################
            #               Loss                #
            #####################################
            repulsion_loss_AB = Loss_fn.get_repulsion_loss(
                output_point_cloud_high.permute(0, 2, 1))
            uniform_loss_AB = Loss_fn.get_uniform_loss(
                output_point_cloud_high.permute(0, 2, 1))
            repulsion_loss_BA = Loss_fn.get_repulsion_loss(
                output_point_cloud_low.permute(0, 2, 1))
            uniform_loss_BA = Loss_fn.get_uniform_loss(
                output_point_cloud_low.permute(0, 2, 1))
            emd_loss_AB = Loss_fn.get_emd_loss(
                output_point_cloud_high.permute(0, 2, 1),
                gt_data.permute(0, 2, 1))
            #emd_loss_BA = Loss_fn.get_emd_loss(output_point_cloud_low.permute(0, 2, 1), input_data.permute(0, 2, 1))

            #Cycle Loss
            recov_A = G_BA(output_point_cloud_high)
            ABA_repul_loss = Loss_fn.get_repulsion_loss(
                recov_A.permute(0, 2, 1))
            ABA_uniform_loss = Loss_fn.get_uniform_loss(
                recov_A.permute(0, 2, 1))

            recov_B = G_AB(output_point_cloud_low)
            BAB_repul_loss = Loss_fn.get_repulsion_loss(
                recov_B.permute(0, 2, 1))
            BAB_uniform_loss = Loss_fn.get_uniform_loss(
                recov_B.permute(0, 2, 1))
            BAB_emd_loss = Loss_fn.get_emd_loss(recov_B.permute(0, 2, 1),
                                                gt_data.permute(0, 2, 1))

            #G_AB loss
            fake_pred_B = D_A(output_point_cloud_high.detach())
            g_AB_loss = Loss_fn.get_generator_loss(fake_pred_B)
            total_G_AB_loss=g_AB_loss*params['gan_w_AB']+ BAB_repul_loss*params['repulsion_w_AB']+ \
            BAB_uniform_loss*params['uniform_w_AB']+ BAB_emd_loss*params['emd_w_AB']+ \
            params['uniform_w_AB']*uniform_loss_AB+params['emd_w_AB']*emd_loss_AB+ \
            repulsion_loss_AB*params['repulsion_w_AB']

            total_G_AB_loss.backward()
            optimizer_G_AB.step()

            #G_BA loss
            fake_pred_A = D_B(output_point_cloud_low.detach())
            g_BA_loss = Loss_fn.get_generator_loss(fake_pred_A)
            total_G_BA_loss=g_BA_loss*params['gan_w_BA']+ ABA_repul_loss*params['repulsion_w_BA']+ \
            repulsion_loss_BA*params['repulsion_w_BA']
            # ABA_uniform_loss*params['uniform_w_BA']+ \
            # params['uniform_w_BA']*uniform_loss_BA+ \

            total_G_BA_loss.backward()
            optimizer_G_BA.step()

            #Discriminator A loss
            fake_B_ = fake_A_buffer.push_and_pop(output_point_cloud_high)
            fake_pred_B = D_A(fake_B_.detach())
            d_A_loss_fake = Loss_fn.get_discriminator_loss_single(fake_pred_B,
                                                                  label=False)

            real_pred_B = D_A(gt_data.detach())
            d_A_loss_real = Loss_fn.get_discriminator_loss_single(real_pred_B,
                                                                  label=True)

            d_A_loss = d_A_loss_real + d_A_loss_fake
            d_A_loss.backward()
            optimizer_D_A.step()

            #Discriminator B loss
            fake_A_ = fake_B_buffer.push_and_pop(output_point_cloud_low)
            fake_pred_A = D_B(fake_A_.detach())
            d_B_loss_fake = Loss_fn.get_discriminator_loss_single(fake_pred_A,
                                                                  label=False)

            real_pred_A = D_B(input_data.detach())
            d_B_loss_real = Loss_fn.get_discriminator_loss_single(real_pred_A,
                                                                  label=True)
            d_B_loss = d_B_loss_real + d_B_loss_fake
            d_B_loss.backward()
            optimizer_D_B.step()

            #Learning rate scheduler#
            current_lr_D_A = optimizer_D_A.state_dict(
            )['param_groups'][0]['lr']
            current_lr_G_AB = optimizer_G_AB.state_dict(
            )['param_groups'][0]['lr']
            current_lr_D_B = optimizer_D_B.state_dict(
            )['param_groups'][0]['lr']
            current_lr_G_BA = optimizer_G_BA.state_dict(
            )['param_groups'][0]['lr']

            # tb_logger.scalar_summary('repulsion_loss_AB', repulsion_loss_AB.item(), iter)
            # tb_logger.scalar_summary('uniform_loss_AB', uniform_loss_AB.item(), iter)
            # tb_logger.scalar_summary('repulsion_loss_BA', repulsion_loss_BA.item(), iter)
            # tb_logger.scalar_summary('uniform_loss_BA', uniform_loss_BA.item(), iter)
            # tb_logger.scalar_summary('emd_loss_AB', emd_loss_AB.item(), iter)

            tb_logger.scalar_summary('d_A_loss', d_A_loss.item(), iter)
            tb_logger.scalar_summary('g_AB_loss', g_AB_loss.item(), iter)
            tb_logger.scalar_summary('Total_G_AB_loss', total_G_AB_loss.item(),
                                     iter)
            tb_logger.scalar_summary('lr_D_A', current_lr_D_A, iter)
            tb_logger.scalar_summary('lr_G_AB', current_lr_G_AB, iter)
            tb_logger.scalar_summary('d_B_loss', d_B_loss.item(), iter)
            tb_logger.scalar_summary('g_BA_loss', g_BA_loss.item(), iter)
            tb_logger.scalar_summary('Total_G_BA_loss', total_G_BA_loss.item(),
                                     iter)
            tb_logger.scalar_summary('lr_D_B', current_lr_D_B, iter)
            tb_logger.scalar_summary('lr_G_BA', current_lr_G_BA, iter)

            msg = "{:0>8},{}:{}, [{}/{}], {}: {}, {}: {}, {}:{}, {}: {},{}: {}".format(
                str(datetime.timedelta(seconds=round(time.time() - start_t))),
                "epoch", e + 1, batch_id + 1, len(train_data_loader),
                "total_G_AB_loss", total_G_AB_loss.item(), "total_G_BA_loss",
                total_G_BA_loss.item(),
                "iter time", (time.time() - start_t_batch), "d_A_loss",
                d_A_loss.item(), "d_B_loss", d_B_loss.item())
            print(msg)

            iter += 1

        D_A_scheduler.step()
        G_AB_scheduler.step()
        D_B_scheduler.step()
        G_BA_scheduler.step()

        if (e + 1) % params['model_save_interval'] == 0 and e > 0:
            model_save_dir = os.path.join(params['model_save_dir'],
                                          params['exp_name'])
            if os.path.exists(model_save_dir) == False:
                os.makedirs(model_save_dir)
            D_A_ckpt_model_filename = "D_A_iter_%d.pth" % (e + 1)
            G_AB_ckpt_model_filename = "G_AB_iter_%d.pth" % (e + 1)
            D_A_model_save_path = os.path.join(model_save_dir,
                                               D_A_ckpt_model_filename)
            G_AB_model_save_path = os.path.join(model_save_dir,
                                                G_AB_ckpt_model_filename)
            D_B_ckpt_model_filename = "D_B_iter_%d.pth" % (e + 1)
            G_BA_ckpt_model_filename = "G_BA_iter_%d.pth" % (e + 1)
            model_ckpt_model_filename = "Cyclegan_iter_%d.pth" % (e + 1)
            D_B_model_save_path = os.path.join(model_save_dir,
                                               D_B_ckpt_model_filename)
            G_BA_model_save_path = os.path.join(model_save_dir,
                                                G_BA_ckpt_model_filename)
            model_all_path = os.path.join(model_save_dir,
                                          model_ckpt_model_filename)
            torch.save(
                {
                    'G_AB_state_dict': G_AB.module.state_dict(),
                    'G_BA_state_dict': G_BA.module.state_dict(),
                    'D_A_state_dict': D_A.module.state_dict(),
                    'D_B_state_dict': D_B.module.state_dict(),
                    'optimizer_G_AB_state_dict': optimizer_G_AB.state_dict(),
                    'optimizer_G_BA_state_dict': optimizer_G_BA.state_dict(),
                    'optimizer_D_A_state_dict': optimizer_D_A.state_dict(),
                    'optimizer_D_B_state_dict': optimizer_D_B.state_dict()
                }, model_all_path)
            torch.save(D_A.module.state_dict(), D_A_model_save_path)
            torch.save(G_AB.module.state_dict(), G_AB_model_save_path)
            torch.save(D_B.module.state_dict(), D_B_model_save_path)
            torch.save(G_BA.module.state_dict(), G_BA_model_save_path)
예제 #7
0
def main():
    print('------------')
    opt.manualSeed = random.randint(1, 100)                                     # 设定随机数
    random.seed(opt.manualSeed)                                                 # 设定随机种子
    torch.manual_seed(opt.manualSeed)                                           # 设定随机种子
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')       # 设定设备

    opt.num_objects = 13                                                        # 训练数据的物体种类数目
    opt.num_points = 500                                                        # 输入点云的数目
    opt.outf = 'trained_models/linemod'                                         # 训练模型保存的目录
    opt.log_dir = 'logs/linemod'                                                # log保存的目录
    opt.repeat_epoch = 20                                                        # 重复epoch数目

    estimator = PoseNet(num_points=opt.num_points, num_obj=opt.num_objects)     # 网络构建,构建完成,对物体的6D姿态进行预测
    print(estimator)
    estimator.to(device)                                                        # 选择设备
    refiner = PoseRefineNet(num_points=opt.num_points, num_obj=opt.num_objects) # 对初步预测的姿态进行提炼
    print(refiner)
    refiner.to(device)                                                          # 选择设备

    if opt.resume_posenet != '':                                                # 对posenet模型的加载,如果有的话,
        estimator.load_state_dict(torch.load('{0}/{1}'.format(opt.outf, opt.resume_posenet)))

    if opt.resume_refinenet != '':                                              # 对refinenet模型的加载,如果有的话,
        refiner.load_state_dict(torch.load('{0}/{1}'.format(opt.outf, opt.resume_refinenet)))

        opt.refine_start = True                                                 # 标记refine网络开始训练
        opt.decay_start = True                                                  # 标记refine网络参数开始衰减
        opt.lr *= opt.lr_rate                                                   # 学习率变化
        opt.w *= opt.w_rate                                                     # 权重衰减率变化
        opt.batch_size = int(opt.batch_size / opt.iteration)                    # batchsize设定

        optimizer = optim.Adam(refiner.parameters(), lr=opt.lr)                 # 设定refine的优化器
    else:
        opt.refine_start = False                                                # 标记refine网络未开始训练
        opt.decay_start = False                                                 # 标记refine网络参数未开始衰减
        optimizer = optim.Adam(estimator.parameters(), lr=opt.lr)               # 设定posenet的优化器

    # 加载训练数据集
    dataset = PoseDataset('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers)

    # 加载验证数据集
    test_dataset = PoseDataset('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
    test_dataloder = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)

    opt.sym_list = dataset.get_sym_list()                                       # 设定对称物体列表
    opt.num_points_mesh = dataset.get_num_points_mesh()                         # 设定点云数目
    print('----------Dataset loaded!---------\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber '
          'of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset),
                                                                            opt.num_points_mesh, opt.sym_list))

    criterion = Loss(opt.num_points_mesh, opt.sym_list)                         # loss计算
    criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)           # refine_loss计算

    best_test = np.Inf                                                          # 初始位置最好的模型,loss无限大
    if opt.start_epoch == 1:                                                    # 开始训练,则删除之前的日志
        for log in os.listdir(opt.log_dir):
            os.remove(os.path.join(opt.log_dir, log))
    st_time = time.time()                                                       # 记录开始时间

    # 开始循环迭代,训练模型-----------------------------------!

    for epoch in range(opt.start_epoch, opt.nepoch):
        # 保存开始迭代的log信息
        logger = setup_logger('epoch%d' % epoch, os.path.join(opt.log_dir, 'epoch_%d_log.txt' % epoch))
        # 记录每个epoch时间
        logger.info('Train time {0}'.format(time.strftime('%Hh %Mm %Ss', time.gmtime(time.time() - st_time)) + 'Training started'))

        train_count = 0
        train_dis_avg = 0.0                                                     # 用于计算平均距离

        if opt.refine_start:                                                    # 如果refine模型,已经开始训练
            estimator.eval()
            refiner.train()
        else:
            estimator.train()

        optimizer.zero_grad()                                                   # 优化器清零梯度

        for rep in range(opt.repeat_epoch):                                     # 每次epoch重复训练次数
            for i, data in enumerate(dataloader, 0):
                points, choose, img, target, model_points, idx = data           # 读取数据
                '''
                points: 由深度图计算出来的点云,该点云数据以摄像头为参考坐标
                choose: 所选择点云的索引[bs, 1, 500]
                img: 通过box剪切下来的RGB图像
                target: 根据model_points点云信息,以及旋转偏移矩阵转换过的点云信息[bs, 500, 3]
                model_points: 目标初始帧对应的点云信息
                idx: 训练图片的下标
                '''
                # 将数据放到device上

                points, choose, img, target, model_points, idx = points.to(device), choose.to(device), img.to(device), target.to(device), model_points.to(device), idx.to(device)
                # 进行预测获得预测的姿态,和特征向量
                pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
                '''
                pred_r: 旋转矩阵[bs, 500, 4]
                pred_t: 偏移矩阵[bs, 500, 3]
                pred_c: 置信度[bs, 500, 1]
                '''
                # 计算loss
                loss, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start)

                # 如果已经开始了refiner模型的训练
                if opt.refine_start:
                    for iter in range(0, opt.iteration):
                        pred_r, pred_t = refiner(new_points, emb, idx)          # 进行refiner预测
                        # 计算loss得到dis
                        dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points)
                        dis.backward()                                          # dis进行反向传播
                else:
                    loss.backward()                                             # 否则,则对loss进行反向传播

                train_dis_avg += dis.item()                                     # 用于计算平均距离
                train_count += 1

                # log信息存储
                if train_count % opt.batch_size == 0:
                    logger.info('Train time {0} Epoch {1} Batch {2} Frame {3} Avg_dis:{4}'
                                .format(time.strftime('%Hh %Mm %Ss', time.gmtime(time.time() - st_time)),
                                        epoch, int(train_count / opt.batch_size), train_count, train_dis_avg / opt.batch_size))
                    optimizer.step()                                            # 优化器更新
                    optimizer.zero_grad()                                       # 优化器梯度清零
                    train_dis_avg = 0.0

                if train_count != 0 and train_count % 1000 == 0:                # 模型保存
                    if opt.refine_start:                                        # 已经开始refine模型
                        torch.save(refiner.state_dict(), '{0}/pose_refine_model_current.pth'.format(opt.outf))
                    else:
                        torch.save(estimator.state_dict(), '{0}.pos_model_current.pth'.format(opt.outf))

        print('------------ epoch {0} train finish -----------'.format(epoch))
        # 进行测试
        logger = setup_logger('epoch%d test' % epoch, os.path.join(opt.log_dir, 'epoch_%d_test_log.txt' % epoch))
        # 记录测试时间
        logger.info('Test time {0}'.format(time.strftime('%Hh %Mm %Ss', time.gmtime(time.time() - st_time)) + ',' + 'Testing started'))
        test_dis = 0.0
        test_count = 0
        estimator.eval()                                                        # 验证模型构建
        refiner.eval()                                                          # refiner模型

        for j, data in enumerate(test_dataloder, 0):
            points, choose, img, target, model_points, idx = data  # 读取数据
            '''
            points: 由深度图计算出来的点云,该点云数据以摄像头为参考坐标
            choose: 所选择点云的索引[bs, 1, 500]
            img: 通过box剪切下来的RGB图像
            target: 根据model_points点云信息,以及旋转偏移矩阵转换过的点云信息[bs, 500, 3]
            model_points: 目标初始帧对应的点云信息
            idx: 训练图片的下标
            '''
            # 将数据放到device上
            points, choose, img, target, model_points, idx = points.to(device), choose.to(device), img.to(
                device), target.to(device), model_points.to(device), idx.to(device)

            # 进行预测获得预测的姿态,和特征向量
            pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
            '''
            pred_r: 旋转矩阵[bs, 500, 4]
            pred_t: 偏移矩阵[bs, 500, 3]
            pred_c: 置信度[bs, 500, 1]
            '''

            # 对结果进行评估
            _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start)

            # 如果refine模型开始训练,则同样进行评估
            if opt.refine_start:
                for iter in range(0, opt.iteration):
                    pred_r, pred_t = refiner(new_points, emb, idx)
                    dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points)

            test_dis += dis.item()                                              # 用于计算平均距离
            # 保存eval的log
            logger.info('Test time {0} Test Frame No.{1} dis:{2}'.format(time.strftime('%Hh %Mm %Ss', time.gmtime(time.time() - st_time)), test_count, dis))
            test_count += 1

        test_dis = test_dis / test_count                                        # 计算平均距离
        logger.info('Test time {0} Epoch {1} Test finish avg_dis:{2}'.format(time.strftime('%Hh %Mm %Ss', time.gmtime(time.time() - st_time)), epoch, test_dis))

        if test_dis <= best_test:                                               # 如果此次测试结果最好,则保留当前测试结果
            best_test = test_dis
            if opt.refine_start:                                                # 保存refiner
                torch.save(refiner.state_dict(), '{0}/pose_refine_model_{1}_{2}.pth'.format(opt.outf, epoch, test_dis))
            else:
                torch.save(estimator.state_dict(), '{0}/pose_model_{1}_{2}.pth'.format(opt.outf, epoch, test_dis))
            print('----------------test model save finished-------------------')

        # 参数变化

        # 判断模型是否达到衰减要求
        if best_test < opt.decay_margin and not opt.decay_start:
            opt.decay_start = True
            opt.lr *= opt.lr_rate                                               # 学习率衰减
            opt.w *= opt.w_rate                                                 # 权重衰减
            optimizer = optim.Adam(estimator.parameters(), lr=opt.lr)

        # 模型没有达到loss阈值要求,refine_start = False,则修改相关参数,传递相关数,更新dataset和dataloader
        if best_test < opt.refine_margin and not opt.refine_start:
            opt.refine_start = True
            opt.batch_size = int(opt.batch_size / opt.iteration)
            optimizer = optim.Adam(refiner.parameters(), lr=opt.lr)

            # 训练
            dataset = PoseDataset('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start)
            dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers)
            # 测试
            test_dataset = PoseDataset('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
            test_dataloder = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)

            opt.sym_list = dataset.get_sym_list()
            opt.num_points_mesh = dataset.get_num_points_mesh()
            print('----------Dataset loaded!---------\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber '
                'of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset),
                                                                                  opt.num_points_mesh, opt.sym_list))
            criterion = Loss(opt.num_points_mesh, opt.sym_list)
            criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)
예제 #8
0
refiner.to(device)

estimator.load_state_dict(torch.load(opt.model))  # PoseNet模型参数加载
refiner.load_state_dict(torch.load(opt.refine_model))  # PoseRefineNet模型参数加载
estimator.eval()
refiner.eval()

# 以eval模式,加载数据
test_dataset = PoseDataset('eval', num_points, False, opt.dataset_root, 0.0,
                           True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

sym_list = test_dataset.get_sym_list()  # 获取对称物体的索引
num_points_mesh = test_dataset.get_num_points_mesh()  # 500

criterion = Loss(num_points_mesh, sym_list)  # Loss加载
criterion_refine = Loss_refine(num_points_mesh, sym_list)  # Loss_refine加载

diameter = []  # 存储模型给定的半径标准,与结果对比
meta_file = open(dataset_config_dir, 'r')  # 读取model.info文件
meta = yaml.load(meta_file)  # 加载model.info文件

for obj in objlist:
    diameter.append(meta[obj]['diameter'] / 1000.0 * 0.1)  # 存储到diameter中

success_count = [0 for i in range(num_objects)]  # 用于记录每个目标合格的数目
num_count = [0 for i in range(num_objects)]  # 用于记录每个目标物体测试总数目

fw = open('{0}/eval_result_logs.txt'.format(output_result_dir), 'w')  # 日志记录

for i, data in enumerate(test_dataloader, 0):  # 数据集遍历