Example #1
0
	def forward(self, inputs, gt_data=None, epoch=0, mode="generator"):
		assert mode in list(["discriminator", "generator"]), ValueError("Invalid network mode '{}'".format(mode))
		inputs = network.np_to_variable(inputs, is_cuda=True, is_training=self.training)
		if not self.training:
			g_l = self.g_large(inputs)
		else:
			gt_data = network.np_to_variable(gt_data, is_cuda=True, is_training=self.training)
			#chunk input data in 4
			inputs_chunks, gt_data_chunks = self.chunk_input(inputs, gt_data)

			if mode == "generator":
				# g_large
				x_l, self.loss_gen_large = self.adv_loss_generator(self.g_large, self.d_large, inputs)
				self.loss_gen_large += self.alpha_euclidean * self.euclidean_loss(x_l, gt_data)
				self.loss_gen_large += self.alpha_perceptual * self.perceptual_loss(x_l, gt_data)

				# g_small
				x_s, self.loss_gen_small = self.adv_loss_generator(self.g_small, self.d_small, inputs_chunks)
				self.loss_gen_small += self.alpha_euclidean * self.euclidean_loss(x_s, gt_data_chunks)
				self.loss_gen_small += self.alpha_perceptual * self.perceptual_loss(x_s, gt_data_chunks)

				if epoch >= 100:
					self.alpha_cscp = 10
				self.loss_gen_large += self.alpha_cscp * self.cscp_loss(x_l, x_s)
				self.loss_gen_small += self.alpha_cscp * self.cscp_loss(x_l, x_s)

				self.loss_gen = self.loss_gen_large + self.loss_gen_small
			else:
				#d_large
				x_l, self.loss_dis_large = self.adv_loss_discriminator(self.g_large, self.d_large, inputs, gt_data)

				#d_small
				x_s, self.loss_dis_small = self.adv_loss_discriminator(self.g_small, self.d_small, inputs_chunks, gt_data_chunks)
			g_l = x_l
		return g_l
Example #2
0
    def forward(self, im_data, gt_data=None):
        im_data = network.np_to_variable(im_data,
                                         is_cuda=True,
                                         is_training=self.training)
        density_map = self.net(im_data)

        if self.training:
            gt_data = network.np_to_variable(gt_data,
                                             is_cuda=True,
                                             is_training=self.training)
            self.loss = self.loss_fn(density_map, gt_data)
        return density_map
def train_gan(train_test_unit, out_dir_root, args):
    output_dir = osp.join(out_dir_root, train_test_unit.metadata['name'])
    mkdir_if_missing(output_dir)
    output_dir_model = osp.join(output_dir, 'models')
    mkdir_if_missing(output_dir_model)
    if args.resume:
        sys.stdout = Logger(osp.join(output_dir, 'log_train.txt'), mode='a')
        plotter = LossPlotter(output_dir, mode='a')
    else:
        sys.stdout = Logger(osp.join(output_dir, 'log_train.txt'))
        plotter = LossPlotter(output_dir, mode='w')
    print("==========\nArgs:{}\n==========".format(args))

    dataset_name = train_test_unit.metadata['name']
    train_path = train_test_unit.train_dir_img
    train_gt_path = train_test_unit.train_dir_den
    val_path = train_test_unit.val_dir_img
    val_gt_path = train_test_unit.val_dir_den

    #training configuration
    start_step = args.start_epoch
    end_step = args.max_epoch

    #log frequency
    disp_interval = args.train_batch * 20

    # ------------
    rand_seed = args.seed
    if rand_seed is not None:
        np.random.seed(rand_seed)
        torch.manual_seed(rand_seed)
        torch.cuda.manual_seed(rand_seed)

    best_mae = sys.maxsize  # best mae
    current_patience = 0

    mse_criterion = nn.MSELoss()

    # load net and optimizer
    net = CrowdCounter(model=args.model, channel_param=args.channel_param)
    net.cuda()
    net.train()
    #optimizerG = torch.optim.RMSprop(filter(lambda p: p.requires_grad, net.net.parameters()), lr=lr)
    #optimizerD = torch.optim.RMSprop(filter(lambda p: p.requires_grad, net.gan_net.parameters()), lr=lrc)
    optimizerG, optimizerD = get_optimizers(args, net)
    if args.reduce_lr_on_plateau:
        schedulerG = lr_scheduler.ReduceLROnPlateau(
            optimizerG,
            patience=args.scheduler_patience,
            factor=args.scheduler_factor,
            cooldown=args.scheduler_cooldown,
            min_lr=args.min_lr,
            verbose=True)
        schedulerD = lr_scheduler.ReduceLROnPlateau(
            optimizerD,
            patience=args.scheduler_patience,
            factor=args.scheduler_factor,
            cooldown=args.scheduler_cooldown,
            min_lr=args.min_lrc,
            verbose=True)
    elif args.step_lr:
        schedulerG = lr_scheduler.StepLR(optimizerG,
                                         args.scheduler_step_size,
                                         gamma=args.scheduler_gamma,
                                         verbose=True)
        schedulerD = lr_scheduler.StepLR(optimizerD,
                                         args.scheduler_step_size,
                                         gamma=args.scheduler_gamma,
                                         verbose=True)
    if not args.resume:
        network.weights_normal_init(net.net, dev=0.01)

    else:
        if args.resume[-3:] == '.h5':  #don't use this option!
            pretrained_model = args.resume
        else:
            resume_dir = osp.join(args.resume,
                                  train_test_unit.metadata['name'])
            if args.last_model:
                pretrained_model = osp.join(resume_dir, 'last_model.h5')
                f = open(osp.join(resume_dir, "current_values.bin"), "rb")
                current_patience = pickle.load(f)
                f.close()
                f = torch.load(osp.join(resume_dir, 'optimizer.pth'))
                optimizerD.load_state_dict(f['opt_d'])
                optimizerG.load_state_dict(f['opt_g'])
                if args.reduce_lr_on_plateau or args.step_lr:
                    schedulerD.load_state_dict(f['sch_d'])
                    schedulerG.load_state_dict(f['sch_g'])
            else:
                pretrained_model = osp.join(resume_dir, 'best_model.h5')
                current_patience = 0
            f = open(osp.join(resume_dir, "best_values.bin"), "rb")
            best_mae, best_mse, best_model, _ = pickle.load(f)
            f.close()
            print(
                "Best MAE: {0:.4f}, Best MSE: {1:.4f}, Best model: {2}, Current patience: {3}"
                .format(best_mae, best_mse, best_model, current_patience))

        network.load_net(pretrained_model, net)
        print('Will apply fine tunning over', pretrained_model)

    # training
    train_lossG = 0
    train_lossD = 0
    step_cnt = 0
    re_cnt = False
    t = Timer()
    t.tic()

    # gan labels
    real_label = 1
    fake_label = 0

    netD = net.gan_net
    netG = net.net

    data_loader = ImageDataLoader(train_path,
                                  train_gt_path,
                                  shuffle=True,
                                  batch_size=args.train_batch,
                                  den_scale=1)
    data_loader_val = ImageDataLoader(val_path,
                                      val_gt_path,
                                      shuffle=False,
                                      batch_size=1,
                                      den_scale=1,
                                      testing=True)

    for epoch in range(start_step, end_step + 1):
        step = 0
        train_lossG = 0
        train_lossD = 0
        train_lossG_mse = 0
        train_lossG_gan = 0

        for blob in data_loader:
            optimizerG.zero_grad()
            optimizerD.zero_grad()
            step = step + args.train_batch
            im_data = blob['data']
            gt_data = blob['gt_density']
            im_data_norm_a = im_data / 127.5 - 1.  #normalize between -1 and 1
            gt_data_a = gt_data * args.den_scale_factor

            errD_epoch = 0

            for critic_epoch in range(args.ncritic):
                im_data_norm = network.np_to_variable(im_data_norm_a,
                                                      is_cuda=True,
                                                      is_training=True)
                gt_data = network.np_to_variable(gt_data_a,
                                                 is_cuda=True,
                                                 is_training=True)

                netD.zero_grad()
                netG.zero_grad()

                #real data discriminator
                b_size = gt_data.size(0)
                output_real = netD(gt_data).view(-1)

                #fake data discriminator
                density_map = netG(im_data_norm)
                output_fake = netD(density_map.detach()).view(-1)

                errD = -(torch.mean(output_real) - torch.mean(output_fake))
                errD.backward()
                optimizerD.step()

                for p in netD.parameters():
                    p.data.clamp_(-0.01, 0.01)

                errD_epoch += errD.data.item()

            errD_epoch /= args.ncritic

            #Generator update
            netG.zero_grad()
            output_fake = netD(density_map).view(-1)
            errG_gan = -torch.mean(output_fake)
            errG_mse = mse_criterion(density_map, gt_data)
            #errG = (1-args.alpha)*errG_mse + args.alpha*errG_gan
            errG = errG_mse + args.alpha * errG_gan
            errG.backward()
            optimizerG.step()

            train_lossG += errG.data.item()
            train_lossG_mse += errG_mse.data.item()
            train_lossG_gan += errG_gan.data.item()
            train_lossD += errD_epoch
            density_map = density_map.data.cpu().numpy()
            density_map /= args.den_scale_factor
            gt_data = gt_data.data.cpu().numpy()
            gt_data /= args.den_scale_factor

            step_cnt += 1
            if step % disp_interval == 0:
                duration = t.toc(average=False)
                fps = step_cnt / duration
                train_batch_size = gt_data.shape[0]
                gt_count = np.sum(gt_data.reshape(train_batch_size, -1),
                                  axis=1)
                et_count = np.sum(density_map.reshape(train_batch_size, -1),
                                  axis=1)

                print(
                    "epoch: {0}, step {1}/{5}, Time: {2:.4f}s, gt_cnt[0]: {3:.4f}, et_cnt[0]: {4:.4f}, mean_diff: {6:.4f}"
                    .format(epoch, step, 1. / fps, gt_count[0], et_count[0],
                            data_loader.num_samples,
                            np.mean(np.abs(gt_count - et_count))))
                re_cnt = True

            if re_cnt:
                t.tic()
                re_cnt = False

        #save model and optimizer
        save_name = os.path.join(
            output_dir_model, '{}_{}_{}.h5'.format(train_test_unit.to_string(),
                                                   dataset_name, epoch))
        network.save_net(save_name, net)
        network.save_net(os.path.join(output_dir, "last_model.h5"), net)

        #calculate error on the validation dataset
        mae, mse = evaluate_model(save_name,
                                  data_loader_val,
                                  model=args.model,
                                  save_test_results=args.save_plots,
                                  plot_save_dir=osp.join(
                                      output_dir, 'plot-results-train/'),
                                  den_scale_factor=args.den_scale_factor,
                                  channel_param=args.channel_param)
        if mae < best_mae:
            best_mae = mae
            best_mse = mse
            current_patience = 0
            best_model = '{}_{}_{}.h5'.format(train_test_unit.to_string(),
                                              dataset_name, epoch)
            network.save_net(os.path.join(output_dir, "best_model.h5"), net)
            f = open(os.path.join(output_dir, "best_values.bin"), "wb")
            pickle.dump((best_mae, best_mse, best_model, current_patience), f)
            f.close()

        else:
            current_patience += 1

        f = open(os.path.join(output_dir, "current_values.bin"), "wb")
        pickle.dump(current_patience, f)
        f.close()

        # update lr
        if args.reduce_lr_on_plateau:
            schedulerD.step(train_lossG_mse)
            schedulerG.step(train_lossG_mse)
        elif args.step_lr:
            schedulerD.step()
            schedulerG.step()
        optim_dict = {
            "opt_d": optimizerD.state_dict(),
            "opt_g": optimizerG.state_dict()
        }
        if args.reduce_lr_on_plateau or args.step_lr:
            optim_dict['sch_d'] = schedulerD.state_dict()
            optim_dict['sch_g'] = schedulerG.state_dict()
        torch.save(optim_dict, os.path.join(output_dir, "optimizer.pth"))

        plotter.report(train_lossG_mse, train_lossG_gan, train_lossD)
        plotter.save()
        plotter.plot()

        print(
            "Epoch: {0}, MAE: {1:.4f}, MSE: {2:.4f}, lossG: {3:.4f}, lossG_mse: {4:.4f}, lossG_gan: {5:.4f}, lossD: {6:.4f}"
            .format(epoch, mae, mse, train_lossG, train_lossG_mse,
                    train_lossG_gan, train_lossD))
        print("Best MAE: {0:.4f}, Best MSE: {1:.4f}, Best model: {2}".format(
            best_mae, best_mse, best_model))
        print("Patience: {0}/{1}".format(current_patience, args.patience))
        sys.stdout.close_open()

        if current_patience > args.patience and args.patience > -1:
            break