class FCNSegmentor(object):
    """
      The class for Pose Estimation. Include train, val, val & predict.
    """
    def __init__(self, configer):
        self.configer = configer
        self.batch_time = AverageMeter()
        self.data_time = AverageMeter()
        self.train_losses = AverageMeter()
        self.val_losses = AverageMeter()
        self.seg_visualizer = SegVisualizer(configer)
        self.seg_loss_manager = SegLossManager(configer)
        self.module_utilizer = ModuleUtilizer(configer)
        self.seg_model_manager = SegModelManager(configer)
        self.seg_data_loader = SegDataLoader(configer)

        self.seg_net = None
        self.train_loader = None
        self.val_loader = None
        self.optimizer = None
        self.lr = None
        self.iters = None

    def init_model(self):
        self.seg_net = self.seg_model_manager.seg_net()
        self.iters = 0
        self.seg_net, _ = self.module_utilizer.load_net(self.seg_net)

        self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.seg_net, self.iters)

        if self.configer.get('dataset') == 'cityscape':
            self.train_loader = self.seg_data_loader.get_trainloader(FSCityScapeLoader)
            self.val_loader = self.seg_data_loader.get_valloader(FSCityScapeLoader)

        else:
            Log.error('Dataset: {} is not valid!'.format(self.configer.get('dataset')))
            exit(1)

        self.pixel_loss = self.seg_loss_manager.get_seg_loss('cross_entropy_loss')

    def __train(self):
        """
          Train function of every epoch during train phase.
        """
        self.seg_net.train()
        start_time = time.time()

        # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects)
        for i, data_tuple in enumerate(self.train_loader):
            self.data_time.update(time.time() - start_time)
            # Change the data type.
            if len(data_tuple) < 2:
                Log.error('Train Loader Error!')
                exit(0)

            inputs = Variable(data_tuple[0].cuda(async=True))
            targets = Variable(data_tuple[1].cuda(async=True))

            # Forward pass.
            outputs = self.seg_net(inputs)

            # Compute the loss of the train batch & backward.
            loss_pixel = self.pixel_loss(outputs, targets)
            loss = loss_pixel
            self.train_losses.update(loss.data[0], inputs.size(0))
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # Update the vars of the train phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()
            self.iters += 1

            # Print the log info & reset the states.
            if self.iters % self.configer.get('solver', 'display_iter') == 0:
                Log.info('Train Iteration: {0}\t'
                         'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
                         'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
                         'Learning rate = {2}\n'
                         'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
                         self.iters, self.configer.get('solver', 'display_iter'),
                         self.lr, batch_time=self.batch_time,
                         data_time=self.data_time, loss=self.train_losses))
                self.batch_time.reset()
                self.data_time.reset()
                self.train_losses.reset()

            # Check to val the current model.
            if self.val_loader is not None and \
               self.iters % self.configer.get('solver', 'test_interval') == 0:
                self.__val()

            self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.seg_net, self.iters)

    def __val(self):
        """
          Validation function during the train phase.
        """
        self.seg_net.eval()
        start_time = time.time()

        for j, data_tuple in enumerate(self.val_loader):
            # Change the data type.
            inputs = Variable(data_tuple[0].cuda(async=True), volatile=True)
            targets = Variable(data_tuple[1].cuda(async=True), volatile=True)
            # Forward pass.
            outputs = self.seg_net(inputs)
            # Compute the loss of the val batch.
            loss_pixel = self.pixel_loss(outputs, targets)
            loss = loss_pixel

            self.val_losses.update(loss.data[0], inputs.size(0))

            # Update the vars of the val phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()

        self.module_utilizer.save_net(self.seg_net, self.iters)
        # Print the log info & reset the states.
        Log.info(
            'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t'
            'Loss {loss.avg:.8f}\n'.format(
            batch_time=self.batch_time, loss=self.val_losses))
        self.batch_time.reset()
        self.val_losses.reset()
        self.seg_net.train()

    def train(self):
        cudnn.benchmark = True
        while self.iters < self.configer.get('solver', 'max_iter'):
            self.__train()
            if self.iters == self.configer.get('solver', 'max_iter'):
                break
class ConvPoseMachine(object):
    """
      The class for Pose Estimation. Include train, val, val & predict.
    """
    def __init__(self, configer):
        self.configer = configer
        self.batch_time = AverageMeter()
        self.data_time = AverageMeter()
        self.train_losses = AverageMeter()
        self.val_losses = AverageMeter()
        self.pose_visualizer = PoseVisualizer(configer)
        self.loss_manager = PoseLossManager(configer)
        self.model_manager = PoseModelManager(configer)
        self.train_utilizer = ModuleUtilizer(configer)

        self.pose_net = None
        self.train_loader = None
        self.val_loader = None
        self.optimizer = None
        self.best_model_loss = None
        self.is_best = None
        self.lr = None
        self.iters = None

    def init_model(self, train_loader=None, val_loader=None):
        self.pose_net = self.model_manager.pose_detector()

        self.pose_net, self.iters = self.train_utilizer.load_net(self.pose_net)

        self.optimizer = self.train_utilizer.update_optimizer(self.pose_net, self.iters)

        self.train_loader = train_loader
        self.val_loader = val_loader

        self.heatmap_loss = self.loss_manager.get_pose_loss('heatmap_loss')

    def __train(self):
        """
          Train function of every epoch during train phase.
        """
        self.pose_net.train()
        start_time = time.time()

        # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects)
        for i, data_tuple in enumerate(self.train_loader):
            self.data_time.update(time.time() - start_time)
            # Change the data type.
            if len(data_tuple) < 2:
                Log.error('Train Loader Error!')
                exit(0)

            inputs = Variable(data_tuple[0].cuda(async=True))
            heatmap = Variable(data_tuple[1].cuda(async=True))
            maskmap = None
            if len(data_tuple) > 2:
                maskmap = Variable(data_tuple[2].cuda(async=True))

            self.pose_visualizer.vis_tensor(heatmap, name='heatmap')
            self.pose_visualizer.vis_tensor((inputs*256+128)/255, name='image')
            # Forward pass.
            outputs = self.pose_net(inputs)

            self.pose_visualizer.vis_tensor(outputs, name='output')
            self.pose_visualizer.vis_peaks(inputs, outputs, name='peak')
            # Compute the loss of the train batch & backward.
            loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap)
            loss = loss_heatmap

            self.train_losses.update(loss.data[0], inputs.size(0))
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # Update the vars of the train phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()
            self.iters += 1

            # Print the log info & reset the states.
            if self.iters % self.configer.get('solver', 'display_iter') == 0:
                Log.info('Train Iteration: {0}\t'
                         'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
                         'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
                         'Learning rate = {2}\n'
                         'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
                         self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time,
                         data_time=self.data_time, loss=self.train_losses))
                self.batch_time.reset()
                self.data_time.reset()
                self.train_losses.reset()

            # Check to val the current model.
            if self.val_loader is not None and \
               self.iters % self.configer.get('solver', 'test_interval') == 0:
                self.__val()

            self.optimizer = self.train_utilizer.update_optimizer(self.pose_net, self.iters)

    def __val(self):
        """
          Validation function during the train phase.
        """
        self.pose_net.eval()
        start_time = time.time()

        for j, data_tuple in enumerate(self.val_loader):
            # Change the data type.
            inputs = Variable(data_tuple[0].cuda(async=True), volatile=True)
            heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True)
            maskmap = None
            if len(data_tuple) > 2:
                maskmap = Variable(data_tuple[2].cuda(async=True), volatile=True)

            # Forward pass.
            outputs = self.pose_net(inputs)
            self.pose_visualizer.vis_peaks(inputs, outputs, name='peak_val')
            # Compute the loss of the val batch.
            loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap)
            loss = loss_heatmap

            self.val_losses.update(loss.data[0], inputs.size(0))

            # Update the vars of the val phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()

        # Print the log info & reset the states.
        Log.info(
            'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t'
            'Loss {loss.avg:.8f}\n'.format(
            batch_time=self.batch_time, loss=self.val_losses))
        self.batch_time.reset()
        self.val_losses.reset()
        self.pose_net.train()

    def train(self):
        cudnn.benchmark = True
        while self.iters < self.configer.get('solver', 'max_iter'):
            self.__train()
            if self.iters == self.configer.get('solver', 'max_iter'):
                break

    def test(self, img_path=None, img_dir=None):
        if img_path is not None and os.path.exists(img_path):
            image = Image.open(img_path).convert('RGB')
Esempio n. 3
0
class Trainer(CheckpointRunner):

    # noinspection PyAttributeOutsideInit
    def init_fn(self, shared_model=None, **kwargs):
        if self.options.model.name == "pixel2mesh":
            # Visualization renderer
            self.renderer = MeshRenderer(self.options.dataset.camera_f, self.options.dataset.camera_c,
                                         self.options.dataset.mesh_pos)
            # create ellipsoid
            self.ellipsoid = Ellipsoid(self.options.dataset.mesh_pos)
        else:
            self.renderer = None

        if shared_model is not None:
            self.model = shared_model
        else:
            if self.options.model.name == "pixel2mesh":
                # create model
                self.model = P2MModel(self.options.model, self.ellipsoid,
                                      self.options.dataset.camera_f, self.options.dataset.camera_c,
                                      self.options.dataset.mesh_pos)
            elif self.options.model.name == "classifier":
                self.model = Classifier(self.options.model, self.options.dataset.num_classes)
            else:
                raise NotImplementedError("Your model is not found")
            self.model = torch.nn.DataParallel(self.model, device_ids=self.gpus).cuda()

        # Setup a joint optimizer for the 2 models
        if self.options.optim.name == "adam":
            self.optimizer = torch.optim.Adam(
                params=list(self.model.parameters()),
                lr=self.options.optim.lr,
                betas=(self.options.optim.adam_beta1, 0.999),
                weight_decay=self.options.optim.wd
            )
        elif self.options.optim.name == "sgd":
            self.optimizer = torch.optim.SGD(
                params=list(self.model.parameters()),
                lr=self.options.optim.lr,
                momentum=self.options.optim.sgd_momentum,
                weight_decay=self.options.optim.wd
            )
        else:
            raise NotImplementedError("Your optimizer is not found")
        self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            self.optimizer, self.options.optim.lr_step, self.options.optim.lr_factor
        )

        # Create loss functions
        if self.options.model.name == "pixel2mesh":
            self.criterion = P2MLoss(self.options.loss, self.ellipsoid).cuda()
        elif self.options.model.name == "classifier":
            self.criterion = CrossEntropyLoss()
        else:
            raise NotImplementedError("Your loss is not found")

        # Create AverageMeters for losses
        self.losses = AverageMeter()

        # Evaluators
        self.evaluators = [Evaluator(self.options, self.logger, self.summary_writer, shared_model=self.model)]

    def models_dict(self):
        return {'model': self.model}

    def optimizers_dict(self):
        return {'optimizer': self.optimizer,
                'lr_scheduler': self.lr_scheduler}

    def train_step(self, input_batch):
        self.model.train()

        # Grab data from the batch
        images = input_batch["images"]

        # predict with model
        out = self.model(images)

        # compute loss
        loss, loss_summary = self.criterion(out, input_batch)
        self.losses.update(loss.detach().cpu().item())

        # Do backprop
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # Pack output arguments to be used for visualization
        return recursive_detach(out), recursive_detach(loss_summary)

    def train(self):
        # Run training for num_epochs epochs
        for epoch in range(self.epoch_count, self.options.train.num_epochs):
            self.epoch_count += 1

            # Create a new data loader for every epoch
            train_data_loader = DataLoader(self.dataset,
                                           batch_size=self.options.train.batch_size * self.options.num_gpus,
                                           num_workers=self.options.num_workers,
                                           pin_memory=self.options.pin_memory,
                                           shuffle=self.options.train.shuffle,
                                           collate_fn=self.dataset_collate_fn)

            # Reset loss
            self.losses.reset()

            # Iterate over all batches in an epoch
            for step, batch in enumerate(train_data_loader):
                # Send input to GPU
                batch = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in batch.items()}

                # Run training step
                out = self.train_step(batch)

                self.step_count += 1

                # Tensorboard logging every summary_steps steps
                if self.step_count % self.options.train.summary_steps == 0:
                    self.train_summaries(batch, *out)

                # Save checkpoint every checkpoint_steps steps
                if self.step_count % self.options.train.checkpoint_steps == 0:
                    self.dump_checkpoint()

            # save checkpoint after each epoch
            self.dump_checkpoint()

            # Run validation every test_epochs
            if self.epoch_count % self.options.train.test_epochs == 0:
                self.test()

            # lr scheduler step
            self.lr_scheduler.step()

    def train_summaries(self, input_batch, out_summary, loss_summary):
        if self.renderer is not None:
            # Do visualization for the first 2 images of the batch
            render_mesh = self.renderer.p2m_batch_visualize(input_batch, out_summary, self.ellipsoid.faces)
            self.summary_writer.add_image("render_mesh", render_mesh, self.step_count)
            self.summary_writer.add_histogram("length_distribution", input_batch["length"].cpu().numpy(),
                                              self.step_count)

        # Debug info for filenames
        self.logger.debug(input_batch["filename"])

        # Save results in Tensorboard
        for k, v in loss_summary.items():
            self.summary_writer.add_scalar(k, v, self.step_count)

        # Save results to log
        self.logger.info("Epoch %03d, Step %06d/%06d, Time elapsed %s, Loss %.9f (%.9f)" % (
            self.epoch_count, self.step_count,
            self.options.train.num_epochs * len(self.dataset) // (
                        self.options.train.batch_size * self.options.num_gpus),
            self.time_elapsed, self.losses.val, self.losses.avg))

    def test(self):
        for evaluator in self.evaluators:
            evaluator.evaluate()
Esempio n. 4
0
class FCNSegmentor(object):
    """
      The class for Pose Estimation. Include train, val, val & predict.
    """
    def __init__(self, configer):
        self.configer = configer
        self.batch_time = AverageMeter()
        self.data_time = AverageMeter()
        self.train_losses = AverageMeter()
        self.val_losses = AverageMeter()
        self.seg_visualizer = SegVisualizer(configer)
        self.seg_loss_manager = SegLossManager(configer)
        self.module_utilizer = ModuleUtilizer(configer)
        self.seg_model_manager = SegModelManager(configer)
        self.seg_data_loader = SegDataLoader(configer)

        self.seg_net = None
        self.train_loader = None
        self.val_loader = None
        self.optimizer = None
        self.lr = None
        self.iters = None

    def init_model(self):
        self.seg_net = self.seg_model_manager.seg_net()
        self.iters = 0
        self.seg_net, _ = self.module_utilizer.load_net(self.seg_net)

        self.optimizer, self.lr = self.module_utilizer.update_optimizer(
            self.seg_net, self.iters)

        if self.configer.get('dataset') == 'cityscape':
            self.train_loader = self.seg_data_loader.get_trainloader(
                FSCityScapeLoader)
            self.val_loader = self.seg_data_loader.get_valloader(
                FSCityScapeLoader)

        else:
            Log.error('Dataset: {} is not valid!'.format(
                self.configer.get('dataset')))
            exit(1)

        self.pixel_loss = self.seg_loss_manager.get_seg_loss(
            'cross_entropy_loss')

    def __train(self):
        """
          Train function of every epoch during train phase.
        """
        self.seg_net.train()
        start_time = time.time()

        # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects)
        for i, data_tuple in enumerate(self.train_loader):
            self.data_time.update(time.time() - start_time)
            # Change the data type.
            if len(data_tuple) < 2:
                Log.error('Train Loader Error!')
                exit(0)

            inputs = Variable(data_tuple[0].cuda(async=True))
            targets = Variable(data_tuple[1].cuda(async=True))

            # Forward pass.
            outputs = self.seg_net(inputs)

            # Compute the loss of the train batch & backward.
            loss_pixel = self.pixel_loss(outputs, targets)
            loss = loss_pixel
            self.train_losses.update(loss.data[0], inputs.size(0))
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # Update the vars of the train phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()
            self.iters += 1

            # Print the log info & reset the states.
            if self.iters % self.configer.get('solver', 'display_iter') == 0:
                Log.info(
                    'Train Iteration: {0}\t'
                    'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
                    'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
                    'Learning rate = {2}\n'
                    'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
                        self.iters,
                        self.configer.get('solver', 'display_iter'),
                        self.lr,
                        batch_time=self.batch_time,
                        data_time=self.data_time,
                        loss=self.train_losses))
                self.batch_time.reset()
                self.data_time.reset()
                self.train_losses.reset()

            # Check to val the current model.
            if self.val_loader is not None and \
               self.iters % self.configer.get('solver', 'test_interval') == 0:
                self.__val()

            self.optimizer, self.lr = self.module_utilizer.update_optimizer(
                self.seg_net, self.iters)

    def __val(self):
        """
          Validation function during the train phase.
        """
        self.seg_net.eval()
        start_time = time.time()

        for j, data_tuple in enumerate(self.val_loader):
            # Change the data type.
            inputs = Variable(data_tuple[0].cuda(async=True), volatile=True)
            targets = Variable(data_tuple[1].cuda(async=True), volatile=True)
            # Forward pass.
            outputs = self.seg_net(inputs)
            # Compute the loss of the val batch.
            loss_pixel = self.pixel_loss(outputs, targets)
            loss = loss_pixel

            self.val_losses.update(loss.data[0], inputs.size(0))

            # Update the vars of the val phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()

        self.module_utilizer.save_net(self.seg_net, self.iters)
        # Print the log info & reset the states.
        Log.info('Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t'
                 'Loss {loss.avg:.8f}\n'.format(batch_time=self.batch_time,
                                                loss=self.val_losses))
        self.batch_time.reset()
        self.val_losses.reset()
        self.seg_net.train()

    def train(self):
        cudnn.benchmark = True
        while self.iters < self.configer.get('solver', 'max_iter'):
            self.__train()
            if self.iters == self.configer.get('solver', 'max_iter'):
                break
Esempio n. 5
0
class OpenPose(object):
    """
      The class for Pose Estimation. Include train, val, test & predict.
    """
    def __init__(self, configer):
        self.configer = configer
        self.batch_time = AverageMeter()
        self.data_time = AverageMeter()
        self.train_losses = AverageMeter()
        self.val_losses = AverageMeter()
        self.vis = PoseVisualizer(configer)
        self.loss_manager = PoseLossManager(configer)
        self.model_manager = PoseModelManager(configer)
        self.data_loader = PoseDataLoader(configer)
        self.module_utilizer = ModuleUtilizer(configer)

        self.pose_net = None
        self.train_loader = None
        self.val_loader = None
        self.optimizer = None
        self.lr = None
        self.iters = None

    def init_model(self):
        self.pose_net = self.model_manager.pose_detector()
        self.iters = 0

        self.pose_net, _ = self.module_utilizer.load_net(self.pose_net)

        self.optimizer, self.lr = self.module_utilizer.update_optimizer(
            self.pose_net, self.iters)

        if self.configer.get('dataset') == 'coco':
            self.train_loader = self.data_loader.get_trainloader(OPCocoLoader)
            self.val_loader = self.data_loader.get_valloader(OPCocoLoader)

        else:
            Log.error('Dataset: {} is not valid!'.format(
                self.configer.get('dataset')))
            exit(1)

        self.mse_loss = self.loss_manager.get_pose_loss('mse_loss')

    def __train(self):
        """
          Train function of every epoch during train phase.
        """
        self.pose_net.train()
        start_time = time.time()

        # data_tuple: (inputs, heatmap, maskmap, vecmap)
        for i, data_tuple in enumerate(self.train_loader):
            self.data_time.update(time.time() - start_time)
            # Change the data type.
            if len(data_tuple) < 2:
                Log.error('Train Loader Error!')
                exit(0)

            inputs = Variable(data_tuple[0].cuda(async=True))
            heatmap = Variable(data_tuple[1].cuda(async=True))
            maskmap = None
            if len(data_tuple) > 2:
                maskmap = Variable(data_tuple[2].cuda(async=True))

            # Forward pass.
            paf_out, heatmap_out = self.pose_net(inputs)
            self.vis.vis_paf(paf_out,
                             inputs.data.cpu().squeeze().numpy().transpose(
                                 1, 2, 0),
                             name='paf_out')
            # Compute the loss of the train batch & backward.
            loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap)
            loss = loss_heatmap
            if len(data_tuple) > 3:
                vecmap = Variable(data_tuple[3].cuda(async=True))
                self.vis.vis_paf(vecmap,
                                 inputs.data.cpu().squeeze().numpy().transpose(
                                     1, 2, 0),
                                 name='paf')
                loss_associate = self.mse_loss(paf_out, vecmap, maskmap)
                loss += loss_associate

            self.train_losses.update(loss.data[0], inputs.size(0))
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # Update the vars of the train phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()
            self.iters += 1

            # Print the log info & reset the states.
            if self.iters % self.configer.get('solver', 'display_iter') == 0:
                Log.info(
                    'Train Iteration: {0}\t'
                    'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
                    'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
                    'Learning rate = {2}\n'
                    'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
                        self.iters,
                        self.configer.get('solver', 'display_iter'),
                        self.lr,
                        batch_time=self.batch_time,
                        data_time=self.data_time,
                        loss=self.train_losses))
                self.batch_time.reset()
                self.data_time.reset()
                self.train_losses.reset()

            # Check to val the current model.
            if self.val_loader is not None and \
               self.iters % self.configer.get('solver', 'test_interval') == 0:
                self.__val()

            # Adjust the learning rate after every iteration.
            self.optimizer, self.lr = self.module_utilizer.update_optimizer(
                self.pose_net, self.iters)

    def __val(self):
        """
          Validation function during the train phase.
        """
        self.pose_net.eval()
        start_time = time.time()

        for j, data_tuple in enumerate(self.val_loader):
            # Change the data type.
            inputs = Variable(data_tuple[0].cuda(async=True), volatile=True)
            heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True)
            maskmap = None
            if len(data_tuple) > 2:
                maskmap = Variable(data_tuple[2].cuda(async=True),
                                   volatile=True)

            # Forward pass.
            paf_out, heatmap_out = self.pose_net(inputs)
            # Compute the loss of the val batch.
            loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap)
            loss = loss_heatmap

            if len(data_tuple) > 3:
                vecmap = Variable(data_tuple[3].cuda(async=True),
                                  volatile=True)
                loss_associate = self.mse_loss(paf_out, vecmap, maskmap)
                loss = loss_heatmap + loss_associate

            self.val_losses.update(loss.data[0], inputs.size(0))

            # Update the vars of the val phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()

        self.module_utilizer.save_net(self.pose_net, self.iters)

        # Print the log info & reset the states.
        Log.info('Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t'
                 'Loss {loss.avg:.8f}\n'.format(batch_time=self.batch_time,
                                                loss=self.val_losses))
        self.batch_time.reset()
        self.val_losses.reset()
        self.pose_net.train()

    def train(self):
        cudnn.benchmark = True
        while self.iters < self.configer.get('solver', 'max_iter'):
            self.__train()
            if self.iters == self.configer.get('solver', 'max_iter'):
                break
Esempio n. 6
0
class ConvPoseMachine(object):
    """
      The class for Pose Estimation. Include train, val, val & predict.
    """
    def __init__(self, configer):
        self.configer = configer
        self.batch_time = AverageMeter()
        self.data_time = AverageMeter()
        self.train_losses = AverageMeter()
        self.val_losses = AverageMeter()
        self.pose_visualizer = PoseVisualizer(configer)
        self.loss_manager = PoseLossManager(configer)
        self.model_manager = PoseModelManager(configer)
        self.train_utilizer = ModuleUtilizer(configer)

        self.pose_net = None
        self.train_loader = None
        self.val_loader = None
        self.optimizer = None
        self.best_model_loss = None
        self.is_best = None
        self.lr = None
        self.iters = None

    def init_model(self, train_loader=None, val_loader=None):
        self.pose_net = self.model_manager.pose_detector()

        self.pose_net, self.iters = self.train_utilizer.load_net(self.pose_net)

        self.optimizer = self.train_utilizer.update_optimizer(
            self.pose_net, self.iters)

        self.train_loader = train_loader
        self.val_loader = val_loader

        self.heatmap_loss = self.loss_manager.get_pose_loss('heatmap_loss')

    def __train(self):
        """
          Train function of every epoch during train phase.
        """
        self.pose_net.train()
        start_time = time.time()

        # data_tuple: (inputs, heatmap, maskmap, tagmap, num_objects)
        for i, data_tuple in enumerate(self.train_loader):
            self.data_time.update(time.time() - start_time)
            # Change the data type.
            if len(data_tuple) < 2:
                Log.error('Train Loader Error!')
                exit(0)

            inputs = Variable(data_tuple[0].cuda(async=True))
            heatmap = Variable(data_tuple[1].cuda(async=True))
            maskmap = None
            if len(data_tuple) > 2:
                maskmap = Variable(data_tuple[2].cuda(async=True))

            self.pose_visualizer.vis_tensor(heatmap, name='heatmap')
            self.pose_visualizer.vis_tensor((inputs * 256 + 128) / 255,
                                            name='image')
            # Forward pass.
            outputs = self.pose_net(inputs)

            self.pose_visualizer.vis_tensor(outputs, name='output')
            self.pose_visualizer.vis_peaks(inputs, outputs, name='peak')
            # Compute the loss of the train batch & backward.
            loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap)
            loss = loss_heatmap

            self.train_losses.update(loss.data[0], inputs.size(0))
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # Update the vars of the train phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()
            self.iters += 1

            # Print the log info & reset the states.
            if self.iters % self.configer.get('solver', 'display_iter') == 0:
                Log.info(
                    'Train Iteration: {0}\t'
                    'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
                    'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
                    'Learning rate = {2}\n'
                    'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
                        self.iters,
                        self.configer.get('solver', 'display_iter'),
                        self.lr,
                        batch_time=self.batch_time,
                        data_time=self.data_time,
                        loss=self.train_losses))
                self.batch_time.reset()
                self.data_time.reset()
                self.train_losses.reset()

            # Check to val the current model.
            if self.val_loader is not None and \
               self.iters % self.configer.get('solver', 'test_interval') == 0:
                self.__val()

            self.optimizer = self.train_utilizer.update_optimizer(
                self.pose_net, self.iters)

    def __val(self):
        """
          Validation function during the train phase.
        """
        self.pose_net.eval()
        start_time = time.time()

        for j, data_tuple in enumerate(self.val_loader):
            # Change the data type.
            inputs = Variable(data_tuple[0].cuda(async=True), volatile=True)
            heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True)
            maskmap = None
            if len(data_tuple) > 2:
                maskmap = Variable(data_tuple[2].cuda(async=True),
                                   volatile=True)

            # Forward pass.
            outputs = self.pose_net(inputs)
            self.pose_visualizer.vis_peaks(inputs, outputs, name='peak_val')
            # Compute the loss of the val batch.
            loss_heatmap = self.heatmap_loss(outputs, heatmap, maskmap)
            loss = loss_heatmap

            self.val_losses.update(loss.data[0], inputs.size(0))

            # Update the vars of the val phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()

        # Print the log info & reset the states.
        Log.info('Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t'
                 'Loss {loss.avg:.8f}\n'.format(batch_time=self.batch_time,
                                                loss=self.val_losses))
        self.batch_time.reset()
        self.val_losses.reset()
        self.pose_net.train()

    def train(self):
        cudnn.benchmark = True
        while self.iters < self.configer.get('solver', 'max_iter'):
            self.__train()
            if self.iters == self.configer.get('solver', 'max_iter'):
                break

    def test(self, img_path=None, img_dir=None):
        if img_path is not None and os.path.exists(img_path):
            image = Image.open(img_path).convert('RGB')
class OpenPose(object):
    """
      The class for Pose Estimation. Include train, val, test & predict.
    """
    def __init__(self, configer):
        self.configer = configer
        self.batch_time = AverageMeter()
        self.data_time = AverageMeter()
        self.train_losses = AverageMeter()
        self.val_losses = AverageMeter()
        self.vis = PoseVisualizer(configer)
        self.loss_manager = PoseLossManager(configer)
        self.model_manager = PoseModelManager(configer)
        self.data_loader = PoseDataLoader(configer)
        self.module_utilizer = ModuleUtilizer(configer)

        self.pose_net = None
        self.train_loader = None
        self.val_loader = None
        self.optimizer = None
        self.lr = None
        self.iters = None

    def init_model(self):
        self.pose_net = self.model_manager.pose_detector()
        self.iters = 0

        self.pose_net, _ = self.module_utilizer.load_net(self.pose_net)

        self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.pose_net, self.iters)

        if self.configer.get('dataset') == 'coco':
            self.train_loader = self.data_loader.get_trainloader(OPCocoLoader)
            self.val_loader = self.data_loader.get_valloader(OPCocoLoader)

        else:
            Log.error('Dataset: {} is not valid!'.format(self.configer.get('dataset')))
            exit(1)

        self.mse_loss = self.loss_manager.get_pose_loss('mse_loss')

    def __train(self):
        """
          Train function of every epoch during train phase.
        """
        self.pose_net.train()
        start_time = time.time()

        # data_tuple: (inputs, heatmap, maskmap, vecmap)
        for i, data_tuple in enumerate(self.train_loader):
            self.data_time.update(time.time() - start_time)
            # Change the data type.
            if len(data_tuple) < 2:
                Log.error('Train Loader Error!')
                exit(0)

            inputs = Variable(data_tuple[0].cuda(async=True))
            heatmap = Variable(data_tuple[1].cuda(async=True))
            maskmap = None
            if len(data_tuple) > 2:
                maskmap = Variable(data_tuple[2].cuda(async=True))

            # Forward pass.
            paf_out, heatmap_out = self.pose_net(inputs)
            self.vis.vis_paf(paf_out, inputs.data.cpu().squeeze().numpy().transpose(1, 2, 0), name='paf_out')
            # Compute the loss of the train batch & backward.
            loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap)
            loss = loss_heatmap
            if len(data_tuple) > 3:
                vecmap = Variable(data_tuple[3].cuda(async=True))
                self.vis.vis_paf(vecmap, inputs.data.cpu().squeeze().numpy().transpose(1, 2, 0), name='paf')
                loss_associate = self.mse_loss(paf_out, vecmap, maskmap)
                loss += loss_associate

            self.train_losses.update(loss.data[0], inputs.size(0))
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # Update the vars of the train phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()
            self.iters += 1

            # Print the log info & reset the states.
            if self.iters % self.configer.get('solver', 'display_iter') == 0:
                Log.info('Train Iteration: {0}\t'
                         'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
                         'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
                         'Learning rate = {2}\n'
                         'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
                         self.iters, self.configer.get('solver', 'display_iter'), self.lr, batch_time=self.batch_time,
                         data_time=self.data_time, loss=self.train_losses))
                self.batch_time.reset()
                self.data_time.reset()
                self.train_losses.reset()

            # Check to val the current model.
            if self.val_loader is not None and \
               self.iters % self.configer.get('solver', 'test_interval') == 0:
                self.__val()

            # Adjust the learning rate after every iteration.
            self.optimizer, self.lr = self.module_utilizer.update_optimizer(self.pose_net, self.iters)

    def __val(self):
        """
          Validation function during the train phase.
        """
        self.pose_net.eval()
        start_time = time.time()

        for j, data_tuple in enumerate(self.val_loader):
            # Change the data type.
            inputs = Variable(data_tuple[0].cuda(async=True), volatile=True)
            heatmap = Variable(data_tuple[1].cuda(async=True), volatile=True)
            maskmap = None
            if len(data_tuple) > 2:
                maskmap = Variable(data_tuple[2].cuda(async=True), volatile=True)

            # Forward pass.
            paf_out, heatmap_out = self.pose_net(inputs)
            # Compute the loss of the val batch.
            loss_heatmap = self.mse_loss(heatmap_out, heatmap, maskmap)
            loss = loss_heatmap

            if len(data_tuple) > 3:
                vecmap = Variable(data_tuple[3].cuda(async=True), volatile=True)
                loss_associate = self.mse_loss(paf_out, vecmap, maskmap)
                loss = loss_heatmap + loss_associate

            self.val_losses.update(loss.data[0], inputs.size(0))

            # Update the vars of the val phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()

        self.module_utilizer.save_net(self.pose_net, self.iters)

        # Print the log info & reset the states.
        Log.info(
            'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t'
            'Loss {loss.avg:.8f}\n'.format(
            batch_time=self.batch_time, loss=self.val_losses))
        self.batch_time.reset()
        self.val_losses.reset()
        self.pose_net.train()

    def train(self):
        cudnn.benchmark = True
        while self.iters < self.configer.get('solver', 'max_iter'):
            self.__train()
            if self.iters == self.configer.get('solver', 'max_iter'):
                break
Esempio n. 8
0
class Trainer(CheckpointRunner):
    # noinspection PyAttributeOutsideInit
    def init_fn(self, shared_model=None, **kwargs):
        # Create auxiliary models
        self.init_auxiliary()
        if shared_model is not None:
            self.model = shared_model
        else:
            self.model = self.init_model()
            self.model = DataParallelModel(self.model.cuda(),
                                           device_ids=self.gpus)
            # self.model = torch.nn.DataParallel(self.model, device_ids=self.gpus).cuda()
        # Setup a joint optimizer for the 2 models
        self.optimizer = self.init_optimizer(self.options.optim.name)
        self.lr_scheduler = self.init_lr(self.options.optim.lr_scheduler)
        # Create loss functions
        self.criterion = self.init_loss_functions()
        self.criterion = DataParallelCriterion(self.criterion.cuda(),
                                               device_ids=self.gpus)
        # Create AverageMeters for losses
        self.losses = AverageMeter()
        # Evaluators
        # self.evaluators = [Evaluator(self.options, self.logger, self.summary_writer, shared_model=self.model)]
        self.dataset_size = None

    def init_auxiliary(self):
        pass

    def init_model(self):
        raise NotImplementedError("Your model is not found")

    def init_loss_functions(self):
        raise NotImplementedError("Your loss is not found")

    def init_optimizer(self, optim_name):
        if optim_name == "adam":
            optimizer = torch.optim.Adam(params=list(self.model.parameters()),
                                         lr=self.options.optim.lr,
                                         betas=(self.options.optim.adam_beta1,
                                                0.999),
                                         weight_decay=self.options.optim.wd)
        elif optim_name == "sgd":
            optimizer = torch.optim.SGD(
                params=list(self.model.parameters()),
                lr=self.options.optim.lr,
                momentum=self.options.optim.sgd_momentum,
                weight_decay=self.options.optim.wd)
        elif optim_name == "adam_gan":
            optimizer_d = torch.optim.Adam(
                params=list(self.model.module.D.parameters()),
                lr=self.options.optim.lr_d,
                betas=(self.options.optim.adam_beta1, 0.999),
                weight_decay=0)
            optimizer_g = torch.optim.Adam(
                params=list(self.model.module.G.parameters()),
                lr=self.options.optim.lr_g,
                betas=(self.options.optim.adam_beta1, 0.999),
                weight_decay=0)
            return {"optimizer_d": optimizer_d, "optimizer_g": optimizer_g}
        else:
            raise NotImplementedError("Your optimizer is not found")
        return optimizer

    def init_lr(self, lr_scheduler_name):
        if lr_scheduler_name == "multistep":
            lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
                self.optimizer, self.options.optim.lr_step,
                self.options.optim.lr_factor)
        elif lr_scheduler_name == "exp":
            lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
                self.optimizer, gamma=self.options.optim.lr_gamma)
        elif lr_scheduler_name == "multistep_gan":
            lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
                self.optimizer["optimizer_d"], self.options.optim.lr_step,
                self.options.optim.lr_factor)
        else:
            r_scheduler = None

        return lr_scheduler

    def models_dict(self):
        return {'model': self.model}

    def optimizers_dict(self):
        return {'optimizer': self.optimizer, 'lr_scheduler': self.lr_scheduler}

    def train_step(self, input_batch):
        # Grab data from the batch, predict with model
        out = self.model(input_batch)
        # compute loss
        loss, loss_summary = self.criterion(out, input_batch)
        self.losses.update(loss.detach().cpu().item())
        # Do backprop
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        # Pack output arguments to be used for visualization
        return recursive_detach(out), recursive_detach(loss_summary)

    def get_dataloader(self):
        data_loader = DataLoader(self.dataset,
                                 batch_size=self.options.train.batch_size *
                                 self.options.num_gpus,
                                 num_workers=self.options.num_workers,
                                 pin_memory=self.options.pin_memory,
                                 shuffle=self.options.train.shuffle)
        return data_loader

    def train(self):
        self.logger.info("Start Trainning.")
        # Create data loader at very begining
        train_data_loader = self.get_dataloader()
        self.dataset_size = len(train_data_loader)

        # Run training for num_epochs epochs
        for epoch in range(self.epoch_count, self.options.train.num_epochs):
            self.epoch_count += 1
            # Reset loss
            self.losses.reset()
            # Iterate over all batches in an epoch
            for step, batch in enumerate(train_data_loader):
                # Send input to GPU
                batch = {
                    k: v.cuda() if isinstance(v, torch.Tensor) else v
                    for k, v in batch.items()
                }
                # Run training step
                out = self.train_step(batch)
                self.step_count += 1
                # Tensorboard logging every summary_steps steps
                if self.step_count % self.options.train.summary_steps == 0:
                    self.train_summaries(batch, *out)
                # Save checkpoint every checkpoint_steps steps
                if self.step_count % self.options.train.checkpoint_steps == 0:
                    self.dump_checkpoint()
            if not self.options.model.name.endswith('gan'):
                self.dump_checkpoint()
            if self.lr_scheduler is not None:
                self.lr_scheduler.step()

    def train_summaries(self, input_batch, out_summary, loss_summary):
        # Debug info for filenames
        self.logger.debug(input_batch["filename"])
        # Save results in Tensorboard
        self.tensorboard_step(loss_summary)
        # Save results to log
        self.log_step(loss_summary)

    def log_step(self, loss_summary):
        self.logger.info(
            "Epoch %03d, Step %06d/%06d, Time elapsed %s, Loss %.5f (AvgLoss %.5f)"
            % (self.epoch_count, self.step_count,
               self.options.train.num_epochs * len(self.dataset) //
               (self.options.train.batch_size * self.options.num_gpus),
               self.time_elapsed, self.losses.val, self.losses.avg))

    def tensorboard_step(self, loss_summary):
        for k, v in loss_summary.items():
            self.summary_writer.add_scalar(k, v, self.step_count)

    def init_with_pretrained_backbone(self):
        checkpoint_file = os.path.abspath(
            self.options.train.backbone_pretrained_model)
        pretrained_dict = torch.load(checkpoint_file)
        self.model.module.load_state_dict(pretrained_dict, strict=False)
        self.logger.info("Init with pre-trained backbone from %s." %
                         checkpoint_file)

    def test(self):
        self.model.eval()
        for evaluator in self.evaluators:
            evaluator.evaluate()
        self.model.train()