Esempi in Python per HRNet.load_state_dict

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: models.hrnet

Classe/tipologia: HRNet

Metodo/funzione: load_state_dict

Esempi su hotexamples.com: 9

HRNet.load_state_dict in Python: 9 esempi trovati. Questi sono i migliori esempi reali in Python per models.hrnet.HRNet.load_state_dict, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

HRNet(13)

load_state_dict(9)

eval(8)

Metodi utilizzati di frequente

HRNet (13)

load_state_dict (9)

eval (8)

Esempio n. 1

Mostra file

def main(args):

    if args.task == 'train':
        if not args.train_image_path:
            raise 'train data path should be specified !'
        train_dataset = SumitomoCADDS(file_path=args.train_image_path)
        #train_dataset = SumitomoCADDS(file_path=args.val_image_path)

        if args.val_image_path:
            val_dataset = SumitomoCADDS(file_path=args.val_image_path,
                                        val=True)

        model = HRNet(3, 32, 8).to(device)
        #model = ResUNet(3, 8).to(device)
        #model = R2AttU_Net(3, 1).to(device)
        optimizer = torch.optim.Adam(params=model.parameters(), lr=0.0003)

        if args.resume:
            if not os.path.isfile(args.resume):
                raise '=> no checkpoint found at %s' % args.resume
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            args.best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print('=> loaded checkpoint %s (epoch %d)' %
                  (args.resume, args.start_epoch))

        train(args, model, optimizer, train_dataset, val_dataset)

    else:  # test
        if not args.test_image_path:
            raise '=> test data path should be specified'
        if not args.resume or not os.path.isfile(args.resume):
            raise '=> resume not specified or no checkpoint found'
        test_dataset = SumitomoCADDS(file_path=args.test_image_path, test=True)
        model = ResUNet(3, 8).to(device)
        #model = R2AttU_Net(3, 1).to(device)
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        print(f'Successfully loaded model from {args.resume}')
        test(args, model, test_dataset)

Esempio n. 2

Mostra file

def convertToONNX():
    checkpoint_path = "./weights/pose_hrnet_w48_384x288.pth"

    model = HRNet(c=48, nof_joints=17)
    checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
    model.load_state_dict(checkpoint)
    count_parameters(model)

    x = torch.randn(1, 3, 384, 288, requires_grad=True)

    torch.onnx.export(
        model=model,
        args=x,
        f=ONNX_PATH,  # where should it be saved
        verbose=False,
        export_params=True,
        do_constant_folding=False,  # fold constant values for optimization
        # do_constant_folding=True,   # fold constant values for optimization
        input_names=['input'],
        output_names=['output'])

    onnx_model = onnx.load(ONNX_PATH)
    onnx.checker.check_model(onnx_model)

Esempio n. 3

Mostra file

File: SimpleHRNet.py Progetto: pxssw/GCN_ActionRecongtionTools

class SimpleHRNet:
    """
    SimpleHRNet class.

    The class provides a simple and customizable method to load the HRNet network, load the official pre-trained
    weights, and predict the human pose on single images.
    Multi-person support with the YOLOv3 detector is also included (and enabled by default).
    """
    def __init__(
        self,
        c,
        nof_joints,
        checkpoint_path,
        resolution=(384, 288),
        interpolation=cv2.INTER_CUBIC,
        multiperson=True,
        return_bounding_boxes=False,
        max_batch_size=32,
        yolo_model_def="./models/detectors/yolo/config/yolov3.cfg",
        yolo_class_path="./models/detectors/yolo/data/coco.names",
        yolo_weights_path="./models/detectors/yolo/weights/yolov3.weights",
        device=torch.device("cpu")):
        """
        Initializes a new SimpleHRNet object.
        HRNet (and YOLOv3) are initialized on the torch.device("device") and
        its (their) pre-trained weights will be loaded from disk.

        Args:
            c (int): number of channels.
            nof_joints (int): number of joints.
            checkpoint_path (str): path to an official hrnet checkpoint or a checkpoint obtained with `train_coco.py`.
            resolution (tuple): hrnet input resolution - format: (height, width).
                Default: (384, 288)
            interpolation (int): opencv interpolation algorithm.
                Default: cv2.INTER_CUBIC
            multiperson (bool): if True, multiperson detection will be enabled.
                This requires the use of a people detector (like YOLOv3).
                Default: True
            return_bounding_boxes (bool): if True, bounding boxes will be returned along with poses by self.predict.
                Default: False
            max_batch_size (int): maximum batch size used in hrnet inference.
                Useless without multiperson=True.
                Default: 16
            yolo_model_def (str): path to yolo model definition file.
                Default: "./models/detectors/yolo/config/yolov3.cfg"
            yolo_class_path (str): path to yolo class definition file.
                Default: "./models/detectors/yolo/data/coco.names"
            yolo_weights_path (str): path to yolo pretrained weights file.
                Default: "./models/detectors/yolo/weights/yolov3.weights.cfg"
            device (:class:`torch.device`): the hrnet (and yolo) inference will be run on this device.
                Default: torch.device("cpu")
        """

        self.c = c
        self.nof_joints = nof_joints
        self.checkpoint_path = checkpoint_path
        self.resolution = resolution  # in the form (height, width) as in the original implementation
        self.interpolation = interpolation
        self.multiperson = multiperson
        self.return_bounding_boxes = return_bounding_boxes
        self.max_batch_size = max_batch_size
        self.yolo_model_def = yolo_model_def
        self.yolo_class_path = yolo_class_path
        self.yolo_weights_path = yolo_weights_path
        self.device = device

        self.model = HRNet(c=c, nof_joints=nof_joints).to(device)
        checkpoint = torch.load(checkpoint_path, map_location=self.device)
        if 'model' in checkpoint:
            self.model.load_state_dict(checkpoint['model'])
        else:
            self.model.load_state_dict(checkpoint)
        self.model.eval()

        if not self.multiperson:
            self.transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

        else:
            self.detector = YOLOv3(model_def=yolo_model_def,
                                   class_path=yolo_class_path,
                                   weights_path=yolo_weights_path,
                                   classes=('person', ),
                                   max_batch_size=self.max_batch_size,
                                   device=device)
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((self.resolution[0],
                                   self.resolution[1])),  # (height, width)
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

    def predict(self, image):
        """
        Predicts the human pose on a single image or a stack of n images.

        Args:
            image (:class:`np.ndarray`):
                the image(s) on which the human pose will be estimated.

                image is expected to be in the opencv format.
                image can be:
                    - a single image with shape=(height, width, BGR color channel)
                    - a stack of n images with shape=(n, height, width, BGR color channel)

        Returns:
            :class:`np.ndarray`:
                a numpy array containing human joints for each (detected) person.

                Format:
                    if image is a single image:
                        shape=(# of people, # of joints (nof_joints), 3);  dtype=(np.float32).
                    if image is a stack of n images:
                        list of n np.ndarrays with
                        shape=(# of people, # of joints (nof_joints), 3);  dtype=(np.float32).

                Each joint has 3 values: (x position, y position, joint confidence).

                If self.return_bounding_boxes, the class returns a list with (bounding boxes, human joints)
        """
        if len(image.shape) == 3:
            return self._predict_single(image)
        elif len(image.shape) == 4:
            return self._predict_batch(image)
        else:
            raise ValueError('Wrong image format.')

    def _predict_single(self, image):
        if not self.multiperson:
            old_res = image.shape
            if self.resolution is not None:
                image = cv2.resize(
                    image,
                    (self.resolution[1],
                     self.resolution[0]),  # (width, height)
                    interpolation=self.interpolation)

            images = self.transform(cv2.cvtColor(
                image, cv2.COLOR_BGR2RGB)).unsqueeze(dim=0)
            boxes = np.asarray([[0, 0, old_res[1], old_res[0]]],
                               dtype=np.float32)  # [x1, y1, x2, y2]

        else:
            detections = self.detector.predict_single(image)

            boxes = []
            if detections is not None:
                images = torch.empty((len(detections), 3, self.resolution[0],
                                      self.resolution[1]))  # (height, width)
                for i, (x1, y1, x2, y2, conf, cls_conf,
                        cls_pred) in enumerate(detections):
                    x1 = int(round(x1.item()))
                    x2 = int(round(x2.item()))
                    y1 = int(round(y1.item()))
                    y2 = int(round(y2.item()))

                    # Adapt detections to match HRNet input aspect ratio (as suggested by xtyDoge in issue #14)
                    correction_factor = self.resolution[0] / self.resolution[
                        1] * (x2 - x1) / (y2 - y1)
                    if correction_factor > 1:
                        # increase y side
                        center = y1 + (y2 - y1) // 2
                        length = int(round((y2 - y1) * correction_factor))
                        y1 = max(0, center - length // 2)
                        y2 = min(image.shape[0], center + length // 2)
                    elif correction_factor < 1:
                        # increase x side
                        center = x1 + (x2 - x1) // 2
                        length = int(round((x2 - x1) * 1 / correction_factor))
                        x1 = max(0, center - length // 2)
                        x2 = min(image.shape[1], center + length // 2)

                    boxes.append([x1, y1, x2, y2])
                    images[i] = self.transform(image[y1:y2, x1:x2, ::-1])

            else:
                images = torch.empty((0, 3, self.resolution[0],
                                      self.resolution[1]))  # (height, width)

            boxes = np.asarray(boxes, dtype=np.int32)

        if images.shape[0] > 0:
            images = images.to(self.device)

            with torch.no_grad():
                if len(images) <= self.max_batch_size:
                    out = self.model(images)

                else:
                    out = torch.empty(
                        (images.shape[0], self.nof_joints,
                         self.resolution[0] // 4, self.resolution[1] // 4),
                        device=self.device)
                    for i in range(0, len(images), self.max_batch_size):
                        out[i:i + self.max_batch_size] = self.model(
                            images[i:i + self.max_batch_size])

            out = out.detach().cpu().numpy()
            pts = np.empty((out.shape[0], out.shape[1], 3), dtype=np.float32)
            # For each human, for each joint: x, y, confidence
            for i, human in enumerate(out):
                for j, joint in enumerate(human):
                    pt = np.unravel_index(
                        np.argmax(joint),
                        (self.resolution[0] // 4, self.resolution[1] // 4))
                    # 0: pt_x / (width // 4) * (bb_x2 - bb_x1) + bb_x1
                    # 1: pt_y / (height // 4) * (bb_y2 - bb_y1) + bb_y1
                    # 2: confidences
                    pts[i, j, 0] = pt[0] * 1. / (self.resolution[0] // 4) * (
                        boxes[i][3] - boxes[i][1]) + boxes[i][1]
                    pts[i, j, 1] = pt[1] * 1. / (self.resolution[1] // 4) * (
                        boxes[i][2] - boxes[i][0]) + boxes[i][0]
                    pts[i, j, 2] = joint[pt]

        else:
            pts = np.empty((0, 0, 3), dtype=np.float32)

        if self.return_bounding_boxes:
            return boxes, pts
        else:
            return pts

    def _predict_batch(self, images):
        if not self.multiperson:
            old_res = images[0].shape

            if self.resolution is not None:
                images_tensor = torch.empty(images.shape[0], 3,
                                            self.resolution[0],
                                            self.resolution[1])
            else:
                images_tensor = torch.empty(images.shape[0], 3,
                                            images.shape[1], images.shape[2])

            for i, image in enumerate(images):
                if self.resolution is not None:
                    image = cv2.resize(
                        image,
                        (self.resolution[1],
                         self.resolution[0]),  # (width, height)
                        interpolation=self.interpolation)

                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                images_tensor[i] = self.transform(image)

            images = images_tensor
            boxes = np.repeat(np.asarray([[0, 0, old_res[1], old_res[0]]],
                                         dtype=np.float32),
                              len(images),
                              axis=0)  # [x1, y1, x2, y2]

        else:
            image_detections = self.detector.predict(images)

            boxes = []
            images_tensor = []
            for d, detections in enumerate(image_detections):
                image = images[d]
                boxes_image = []
                if detections is not None:
                    images_tensor_image = torch.empty(
                        (len(detections), 3, self.resolution[0],
                         self.resolution[1]))  # (height, width)
                    for i, (x1, y1, x2, y2, conf, cls_conf,
                            cls_pred) in enumerate(detections):
                        x1 = int(round(x1.item()))
                        x2 = int(round(x2.item()))
                        y1 = int(round(y1.item()))
                        y2 = int(round(y2.item()))

                        # Adapt detections to match HRNet input aspect ratio (as suggested by xtyDoge in issue #14)
                        correction_factor = self.resolution[
                            0] / self.resolution[1] * (x2 - x1) / (y2 - y1)
                        if correction_factor > 1:
                            # increase y side
                            center = y1 + (y2 - y1) // 2
                            length = int(round((y2 - y1) * correction_factor))
                            y1 = max(0, center - length // 2)
                            y2 = min(image.shape[0], center + length // 2)
                        elif correction_factor < 1:
                            # increase x side
                            center = x1 + (x2 - x1) // 2
                            length = int(
                                round((x2 - x1) * 1 / correction_factor))
                            x1 = max(0, center - length // 2)
                            x2 = min(image.shape[1], center + length // 2)

                        boxes_image.append([x1, y1, x2, y2])
                        images_tensor_image[i] = self.transform(
                            image[y1:y2, x1:x2, ::-1])

                else:
                    images_tensor_image = torch.empty(
                        (0, 3, self.resolution[0],
                         self.resolution[1]))  # (height, width)

                # stack all images and boxes in single lists
                images_tensor.extend(images_tensor_image)
                boxes.extend(boxes_image)

            # convert lists into tensors/np.ndarrays
            images = torch.tensor(np.stack(images_tensor))
            boxes = np.asarray(boxes, dtype=np.int32)

        images = images.to(self.device)

        with torch.no_grad():
            if len(images) <= self.max_batch_size:
                out = self.model(images)

            else:
                out = torch.empty(
                    (images.shape[0], self.nof_joints, self.resolution[0] // 4,
                     self.resolution[1] // 4),
                    device=self.device)
                for i in range(0, len(images), self.max_batch_size):
                    out[i:i + self.max_batch_size] = self.model(
                        images[i:i + self.max_batch_size])

        out = out.detach().cpu().numpy()
        pts = np.empty((out.shape[0], out.shape[1], 3), dtype=np.float32)
        # For each human, for each joint: x, y, confidence
        for i, human in enumerate(out):
            for j, joint in enumerate(human):
                pt = np.unravel_index(
                    np.argmax(joint),
                    (self.resolution[0] // 4, self.resolution[1] // 4))
                # 0: pt_x / (width // 4) * (bb_x2 - bb_x1) + bb_x1
                # 1: pt_y / (height // 4) * (bb_y2 - bb_y1) + bb_y1
                # 2: confidences
                pts[i, j, 0] = pt[0] * 1. / (self.resolution[0] // 4) * (
                    boxes[i][3] - boxes[i][1]) + boxes[i][1]
                pts[i, j, 1] = pt[1] * 1. / (self.resolution[1] // 4) * (
                    boxes[i][2] - boxes[i][0]) + boxes[i][0]
                pts[i, j, 2] = joint[pt]

        if self.multiperson:
            # re-add the removed batch axis (n)
            pts_batch = []
            index = 0
            for detections in image_detections:
                if detections is not None:
                    pts_batch.append(pts[index:index + len(detections)])
                    index += len(detections)
                else:
                    pts_batch.append(
                        np.zeros((0, self.nof_joints, 3), dtype=np.float32))
            pts = pts_batch

        else:
            pts = np.expand_dims(pts, axis=1)

        if self.return_bounding_boxes:
            return boxes, pts
        else:
            return pts

Esempio n. 4

Mostra file

class SimpleHRNet:
    """
    SimpleHRNet class.
    The class provides a simple and customizable method to load the HRNet network, load the official pre-trained
    weights, and predict the human pose on single images.
    Multi-person support with the YOLOv3 detector is also included (and enabled by default).
    """
    def __init__(
        self,
        c,
        nof_joints,
        checkpoint_path,
        resolution=(384, 288),
        interpolation=cv2.INTER_CUBIC,
        multiperson=True,
        max_batch_size=32,
        yolo_model_def="./models/detectors/yolo/config/yolov3.cfg",
        yolo_class_path="./models/detectors/yolo/data/coco.names",
        yolo_weights_path="./models/detectors/yolo/weights/yolov3.weights",
        device=torch.device("cpu")):
        """
        Initializes a new SimpleHRNet object.
        HRNet (and YOLOv3) are initialized on the torch.device(``device``) and
        its (their) pretrained weights will be loaded from disk.

        Arguments:
            c (int): number of channels.
            nof_joints (int): number of joints.
            checkpoint_path (str): hrnet checkpoint path.
            resolution (tuple): hrnet input resolution - format: (height, width).
                Default: ``(384, 288)``
            interpolation (int): opencv interpolation algorithm.
                Default: ``cv2.INTER_CUBIC``
            multiperson (bool): if ``True``, multiperson detection will be enabled.
                This requires the use of a people detector (like YOLOv3).
                Default: ``True``
            max_batch_size (int): maximum batch size used in hrnet inference.
                Useless without multiperson=True.
                Default: ``16``
            yolo_model_def (str): path to yolo model definition file.
                Default: ``"./models/detectors/yolo/config/yolov3.cfg"``
            yolo_class_path (str): path to yolo class definition file.
                Default: ``"./models/detectors/yolo/data/coco.names"``
            yolo_weights_path (str): path to yolo pretrained weights file.
                Default: ``"./models/detectors/yolo/weights/yolov3.weights.cfg"``
            device (:class:`torch.device`): the hrnet (and yolo) inference will be run on this device.
                Default: ``torch.device("cpu")``
        """

        self.c = c
        self.nof_joints = nof_joints
        self.checkpoint_path = checkpoint_path
        self.resolution = resolution  # in the form (height, width) as in the original implementation
        self.interpolation = interpolation
        self.multiperson = multiperson
        self.max_batch_size = max_batch_size
        self.yolo_model_def = yolo_model_def
        self.yolo_class_path = yolo_class_path
        self.yolo_weights_path = yolo_weights_path
        self.device = device

        self.model = HRNet(c=c, nof_joints=nof_joints).to(device)
        self.model.load_state_dict(torch.load(checkpoint_path))
        self.model.eval()

        if not self.multiperson:
            self.transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

        else:
            self.detector = YOLOv3(model_def=yolo_model_def,
                                   class_path=yolo_class_path,
                                   weights_path=yolo_weights_path,
                                   classes=('person', ),
                                   device=device)
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((self.resolution[0],
                                   self.resolution[1])),  # (height, width)
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

        pass

    def predict(self, image):
        """
        Predicts the human pose on a single image.

        Arguments:
            image (:class:`np.ndarray`):
                the image on which the human pose will be estimated.

                Image must be in the opencv format, i.e. shape=(height, width, BGR color channel).

        Returns:
            `:class:np.ndarray`:
                a numpy array containing human joints for each (detected) person.

                Format: shape=(# of people, # of joints (nof_joints), 3);  dtype=(np.float32).

                Each joint has 3 values: (x position, y position, joint confidence)
        """
        if not self.multiperson:
            old_res = image.shape
            if self.resolution is not None:
                image = cv2.resize(
                    image,
                    (self.resolution[1],
                     self.resolution[0]),  # (width, height)
                    interpolation=self.interpolation)

            images = self.transform(cv2.cvtColor(
                image, cv2.COLOR_BGR2RGB)).unsqueeze(dim=0)
            boxes = np.asarray([[0, 0, old_res[1], old_res[0]]],
                               dtype=np.float32)  # [x1, y1, x2, y2]

        else:
            detections = self.detector.predict_single(image)

            boxes = []
            if detections is not None:
                images = torch.empty((len(detections), 3, self.resolution[0],
                                      self.resolution[1]))  # (height, width)
                for i, (x1, y1, x2, y2, conf, cls_conf,
                        cls_pred) in enumerate(detections):
                    x1 = int(round(x1.item()))
                    x2 = int(round(x2.item()))
                    y1 = int(round(y1.item()))
                    y2 = int(round(y2.item()))

                    boxes.append([x1, y1, x2, y2])
                    images[i] = self.transform(image[y1:y2, x1:x2, ::-1])

            else:
                images = torch.empty((0, 3, self.resolution[0],
                                      self.resolution[1]))  # (height, width)

            boxes = np.asarray(boxes, dtype=np.int32)

        if images.shape[0] > 0:
            images = images.to(self.device)

            with torch.no_grad():
                if len(images) <= self.max_batch_size:
                    out = self.model(images)

                else:
                    out = torch.empty(
                        (images.shape[0], self.nof_joints,
                         self.resolution[0] // 4,
                         self.resolution[1] // 4)).to(self.device)
                    for i in range(0, len(images), self.max_batch_size):
                        out[i:i + self.max_batch_size] = self.model(
                            images[i:i + self.max_batch_size])

            out = out.detach().cpu().numpy()
            pts = np.empty((out.shape[0], out.shape[1], 3), dtype=np.float32)
            # For each human, for each joint: x, y, confidence
            for i, human in enumerate(out):
                for j, joint in enumerate(human):
                    pt = np.unravel_index(np.argmax(joint),
                                          shape=(self.resolution[0] // 4,
                                                 self.resolution[1] // 4))
                    # 0: pt_x / (width // 4) * (bb_x2 - bb_x1) + bb_x1
                    # 1: pt_y / (height // 4) * (bb_y2 - bb_y1) + bb_y1
                    # 2: confidences
                    pts[i, j, 0] = pt[0] * 1. / (self.resolution[0] // 4) * (
                        boxes[i][3] - boxes[i][1]) + boxes[i][1]
                    pts[i, j, 1] = pt[1] * 1. / (self.resolution[1] // 4) * (
                        boxes[i][2] - boxes[i][0]) + boxes[i][0]
                    pts[i, j, 2] = joint[pt]

        else:
            pts = np.empty((0, 0, 3), dtype=np.float32)

        return pts

Esempio n. 5

Mostra file

class Train(object):
    """
    Train  class.

    The class provides a basic tool for training HRNet.
    Most of the training options are customizable.

    The only method supposed to be directly called is `run()`.
    """

    def __init__(self,
                 exp_name,
                 ds_train,
                 ds_val,
                 epochs=210,
                 batch_size=16,
                 num_workers=4,
                 loss='JointsMSELoss',
                 lr=0.001,
                 lr_decay=True,
                 lr_decay_steps=(170, 200),
                 lr_decay_gamma=0.1,
                 optimizer='Adam',
                 weight_decay=0.,
                 momentum=0.9,
                 nesterov=False,
                 pretrained_weight_path=None,
                 checkpoint_path=None,
                 log_path='./logs',
                 use_tensorboard=True,
                 model_c=48,
                 model_nof_joints=17,
                 model_bn_momentum=0.1,
                 flip_test_images=True,
                 device=None
                 ):
        """
        Initializes a new Train object.

        The log folder is created, the HRNet model is initialized and optional pre-trained weights or saved checkpoints
        are loaded.
        The DataLoaders, the loss function, and the optimizer are defined.

        Args:
            exp_name (str):  experiment name.
            ds_train (HumanPoseEstimationDataset): train dataset.
            ds_val (HumanPoseEstimationDataset): validation dataset.
            epochs (int): number of epochs.
                Default: 210
            batch_size (int): batch size.
                Default: 16
            num_workers (int): number of workers for each DataLoader
                Default: 4
            loss (str): loss function. Valid values are 'JointsMSELoss' and 'JointsOHKMMSELoss'.
                Default: "JointsMSELoss"
            lr (float): learning rate.
                Default: 0.001
            lr_decay (bool): learning rate decay.
                Default: True
            lr_decay_steps (tuple): steps for the learning rate decay scheduler.
                Default: (170, 200)
            lr_decay_gamma (float): scale factor for each learning rate decay step.
                Default: 0.1
            optimizer (str): network optimizer. Valid values are 'Adam' and 'SGD'.
                Default: "Adam"
            weight_decay (float): weight decay.
                Default: 0.
            momentum (float): momentum factor.
                Default: 0.9
            nesterov (bool): Nesterov momentum.
                Default: False
            pretrained_weight_path (str): path to pre-trained weights (such as weights from pre-train on imagenet).
                Default: None
            checkpoint_path (str): path to a previous checkpoint.
                Default: None
            log_path (str): path where tensorboard data and checkpoints will be saved.
                Default: "./logs"
            use_tensorboard (bool): enables tensorboard use.
                Default: True
            model_c (int): hrnet parameters - number of channels.
                Default: 48
            model_nof_joints (int): hrnet parameters - number of joints.
                Default: 17
            model_bn_momentum (float): hrnet parameters - path to the pretrained weights.
                Default: 0.1
            flip_test_images (bool): flip images during validating.
                Default: True
            device (torch.device): device to be used (default: cuda, if available).
                Default: None
        """
        super(Train, self).__init__()

        self.exp_name = exp_name
        self.ds_train = ds_train
        self.ds_val = ds_val
        self.epochs = epochs
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.loss = loss
        self.lr = lr
        self.lr_decay = lr_decay
        self.lr_decay_steps = lr_decay_steps
        self.lr_decay_gamma = lr_decay_gamma
        self.optimizer = optimizer
        self.weight_decay = weight_decay
        self.momentum = momentum
        self.nesterov = nesterov
        self.pretrained_weight_path = pretrained_weight_path
        self.checkpoint_path = checkpoint_path
        self.log_path = os.path.join(log_path, self.exp_name)
        self.use_tensorboard = use_tensorboard
        self.model_c = model_c
        self.model_nof_joints = model_nof_joints
        self.model_bn_momentum = model_bn_momentum
        self.flip_test_images = flip_test_images
        self.epoch = 0

        # torch device
        if device is not None:
            self.device = device
        else:
            if torch.cuda.is_available():
                self.device = torch.device('cuda:0')
            else:
                self.device = torch.device('cpu')

        print(self.device)

        os.makedirs(self.log_path, 0o755, exist_ok=False)  # exist_ok=False to avoid overwriting
        if self.use_tensorboard:
            self.summary_writer = tb.SummaryWriter(self.log_path)

        #
        # write all experiment parameters in parameters.txt and in tensorboard text field
        self.parameters = [x + ': ' + str(y) + '\n' for x, y in locals().items()]
        with open(os.path.join(self.log_path, 'parameters.txt'), 'w') as fd:
            fd.writelines(self.parameters)
        if self.use_tensorboard:
            self.summary_writer.add_text('parameters', '\n'.join(self.parameters))

        #
        # load model
        self.model = HRNet(c=self.model_c, nof_joints=self.model_nof_joints,
                           bn_momentum=self.model_bn_momentum).to(self.device)

        #
        # define loss and optimizers
        if self.loss == 'JointsMSELoss':
            self.loss_fn = JointsMSELoss().to(self.device)
        elif self.loss == 'JointsOHKMMSELoss':
            self.loss_fn = JointsOHKMMSELoss().to(self.device)
        else:
            raise NotImplementedError

        if optimizer == 'SGD':
            self.optim = SGD(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay,
                             momentum=self.momentum, nesterov=self.nesterov)
        elif optimizer == 'Adam':
            self.optim = Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        else:
            raise NotImplementedError

        #
        # load pre-trained weights (such as those pre-trained on imagenet)
        if self.pretrained_weight_path is not None:
            self.model.load_state_dict(torch.load(self.pretrained_weight_path, map_location=self.device), strict=True)
            print('Pre-trained weights loaded.')

        #
        # load previous checkpoint
        if self.checkpoint_path is not None:
            print('Loading checkpoint %s...' % self.checkpoint_path)
            if os.path.isdir(self.checkpoint_path):
                path = os.path.join(self.checkpoint_path, 'checkpoint_last.pth')
            else:
                path = self.checkpoint_path
            self.starting_epoch, self.model, self.optim, self.params = load_checkpoint(path, self.model, self.optim,
                                                                                       self.device)
        else:
            self.starting_epoch = 0

        if lr_decay:
            self.lr_scheduler = MultiStepLR(self.optim, list(self.lr_decay_steps), gamma=self.lr_decay_gamma,
                                            last_epoch=self.starting_epoch if self.starting_epoch else -1)

        #
        # load train and val datasets
        self.dl_train = DataLoader(self.ds_train, batch_size=self.batch_size, shuffle=True,
                                   num_workers=self.num_workers, drop_last=True)
        self.len_dl_train = len(self.dl_train)

        # dl_val = DataLoader(self.ds_val, batch_size=1, shuffle=False, num_workers=num_workers)
        self.dl_val = DataLoader(self.ds_val, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
        self.len_dl_val = len(self.dl_val)

        #
        # initialize variables
        self.mean_loss_train = 0.
        self.mean_acc_train = 0.
        self.mean_loss_val = 0.
        self.mean_acc_val = 0.
        self.mean_mAP_val = 0.

        self.best_loss = None
        self.best_acc = None
        self.best_mAP = None

    def _train(self):
        self.model.train()

        for step, (image, target, target_weight, joints_data) in enumerate(tqdm(self.dl_train, desc='Training')):
            image = image.to(self.device)
            target = target.to(self.device)
            target_weight = target_weight.to(self.device)

            self.optim.zero_grad()

            output = self.model(image)

            loss = self.loss_fn(output, target, target_weight)

            loss.backward()

            self.optim.step()

            # Evaluate accuracy
            # Get predictions on the input
            accs, avg_acc, cnt, joints_preds, joints_target = self.ds_train.evaluate_accuracy(output, target)

            self.mean_loss_train += loss.item()
            self.mean_acc_train += avg_acc.item()
            if self.use_tensorboard:
                self.summary_writer.add_scalar('train_loss', loss.item(),
                                               global_step=step + self.epoch * self.len_dl_train)
                self.summary_writer.add_scalar('train_acc', avg_acc.item(),
                                               global_step=step + self.epoch * self.len_dl_train)
                if step == 0:
                    save_images(image, target, joints_target, output, joints_preds, joints_data['joints_visibility'],
                                self.summary_writer, step=step + self.epoch * self.len_dl_train, prefix='train_')

        self.mean_loss_train /= len(self.dl_train)
        self.mean_acc_train /= len(self.dl_train)

        print('\nTrain: Loss %f - Accuracy %f' % (self.mean_loss_train, self.mean_acc_train))

    def _val(self):
        self.model.eval()

        with torch.no_grad():
            for step, (image, target, target_weight, joints_data) in enumerate(tqdm(self.dl_val, desc='Validating')):
                image = image.to(self.device)
                target = target.to(self.device)
                target_weight = target_weight.to(self.device)

                output = self.model(image)

                if self.flip_test_images:
                    image_flipped = flip_tensor(image, dim=-1)
                    output_flipped = self.model(image_flipped)

                    output_flipped = flip_back(output_flipped, self.ds_val.flip_pairs)

                    output = (output + output_flipped) * 0.5

                loss = self.loss_fn(output, target, target_weight)

                # Evaluate accuracy
                # Get predictions on the input
                accs, avg_acc, cnt, joints_preds, joints_target = \
                    self.ds_train.evaluate_accuracy(output, target)

                self.mean_loss_train += loss.item()
                self.mean_acc_train += avg_acc.item()
                if self.use_tensorboard:
                    self.summary_writer.add_scalar('val_loss', loss.item(),
                                                   global_step=step + self.epoch * self.len_dl_train)
                    self.summary_writer.add_scalar('val_acc', avg_acc.item(),
                                                   global_step=step + self.epoch * self.len_dl_train)
                    if step == 0:
                        save_images(image, target, joints_target, output, joints_preds,
                                    joints_data['joints_visibility'], self.summary_writer,
                                    step=step + self.epoch * self.len_dl_train, prefix='val_')

        self.mean_loss_val /= len(self.dl_val)
        self.mean_acc_val /= len(self.dl_val)

        print('\nValidation: Loss %f - Accuracy %f' % (self.mean_loss_val, self.mean_acc_val))

    def _checkpoint(self):

        save_checkpoint(path=os.path.join(self.log_path, 'checkpoint_last.pth'), epoch=self.epoch + 1, model=self.model,
                        optimizer=self.optim, params=self.parameters)

        if self.best_loss is None or self.best_loss > self.mean_loss_val:
            self.best_loss = self.mean_loss_val
            print('best_loss %f at epoch %d' % (self.best_loss, self.epoch + 1))
            save_checkpoint(path=os.path.join(self.log_path, 'checkpoint_best_loss.pth'), epoch=self.epoch + 1,
                            model=self.model, optimizer=self.optim, params=self.parameters)
        if self.best_acc is None or self.best_acc < self.mean_acc_val:
            self.best_acc = self.mean_acc_val
            print('best_acc %f at epoch %d' % (self.best_acc, self.epoch + 1))
            save_checkpoint(path=os.path.join(self.log_path, 'checkpoint_best_acc.pth'), epoch=self.epoch + 1,
                            model=self.model, optimizer=self.optim, params=self.parameters)
        if self.best_mAP is None or self.best_mAP < self.mean_mAP_val:
            self.best_mAP = self.mean_mAP_val
            print('best_mAP %f at epoch %d' % (self.best_mAP, self.epoch + 1))
            save_checkpoint(path=os.path.join(self.log_path, 'checkpoint_best_mAP.pth'), epoch=self.epoch + 1,
                            model=self.model, optimizer=self.optim, params=self.parameters)

    def run(self):
        """
        Runs the training.
        """

        print('\nTraining started @ %s' % datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

        # start training
        for self.epoch in range(self.starting_epoch, self.epochs):
            print('\nEpoch %d of %d @ %s' % (self.epoch + 1, self.epochs, datetime.now().strftime("%Y-%m-%d %H:%M:%S")))

            self.mean_loss_train = 0.
            self.mean_loss_val = 0.
            self.mean_acc_train = 0.
            self.mean_acc_val = 0.
            self.mean_mAP_val = 0.

            #
            # Train

            self._train()

            #
            # Val

            self._val()

            #
            # LR Update

            if self.lr_decay:
                self.lr_scheduler.step()

            #
            # Checkpoint

            self._checkpoint()

        print('\nTraining ended @ %s' % datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

Esempio n. 6

Mostra file

File: simpleHRnet.py Progetto: vasukalariya/Human-Pose-Estimation-using-HRNets

class SimpleHRNet(object):
    def __init__(
        self,
        c,
        nof_joints,
        checkpoint_path='weights/mod_pose_hrnet_w32_256x192.pth',
        resolution=(384, 288),
        interpolation=cv2.INTER_CUBIC,
        multiperson=True,
        max_batch_size=32,
        yolo_model_def="./models/detectors/yolo/config/yolov3.cfg",
        yolo_class_path="./models/detectors/yolo/data/coco.names",
        yolo_weights_path="./models/detectors/yolo/weights/yolov3.weights",
        device=torch.device("cpu")):

        self.c = c
        self.nof_joints = nof_joints
        self.checkpoint_path = checkpoint_path
        self.resolution = resolution  # in the form (height, width) as in the original implementation
        self.interpolation = interpolation
        self.multiperson = multiperson
        self.max_batch_size = max_batch_size
        self.yolo_model_def = yolo_model_def
        self.yolo_class_path = yolo_class_path
        self.yolo_weights_path = yolo_weights_path
        self.device = device

        self.model = HRNet(c=c, nof_joints=nof_joints).to(device)
        self.model.load_state_dict(
            torch.load(checkpoint_path, map_location=self.device))
        self.model.eval()

        if not self.multiperson:
            self.transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

        else:
            self.detector = YOLOv3(model_def=yolo_model_def,
                                   class_path=yolo_class_path,
                                   weights_path=yolo_weights_path,
                                   classes=('person', ),
                                   max_batch_size=self.max_batch_size,
                                   device=device)
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((self.resolution[0],
                                   self.resolution[1])),  # (height, width)
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

        pass

    def predict(self, image):
        if len(image.shape) == 3:
            return self._predict_single(image)
        elif len(image.shape) == 4:
            return self._predict_batch(image)
        else:
            raise ValueError('Wrong image format.')

    def _predict_single(self, image):
        if not self.multiperson:
            old_res = image.shape
            if self.resolution is not None:
                image = cv2.resize(
                    image,
                    (self.resolution[1],
                     self.resolution[0]),  # (width, height)
                    interpolation=self.interpolation)

            images = self.transform(cv2.cvtColor(
                image, cv2.COLOR_BGR2RGB)).unsqueeze(dim=0)
            boxes = np.asarray([[0, 0, old_res[1], old_res[0]]],
                               dtype=np.float32)  # [x1, y1, x2, y2]

        else:
            detections = self.detector.predict_single(image)

            boxes = []
            if detections is not None:
                images = torch.empty((len(detections), 3, self.resolution[0],
                                      self.resolution[1]))  # (height, width)
                for i, (x1, y1, x2, y2, conf, cls_conf,
                        cls_pred) in enumerate(detections):
                    x1 = int(round(x1.item()))
                    x2 = int(round(x2.item()))
                    y1 = int(round(y1.item()))
                    y2 = int(round(y2.item()))

                    boxes.append([x1, y1, x2, y2])
                    images[i] = self.transform(image[y1:y2, x1:x2, ::-1])

            else:
                images = torch.empty((0, 3, self.resolution[0],
                                      self.resolution[1]))  # (height, width)

            boxes = np.asarray(boxes, dtype=np.int32)

        if images.shape[0] > 0:
            images = images.to(self.device)

            with torch.no_grad():
                if len(images) <= self.max_batch_size:
                    out = self.model(images)

                else:
                    out = torch.empty(
                        (images.shape[0], self.nof_joints,
                         self.resolution[0] // 4,
                         self.resolution[1] // 4)).to(self.device)
                    for i in range(0, len(images), self.max_batch_size):
                        out[i:i + self.max_batch_size] = self.model(
                            images[i:i + self.max_batch_size])

            out = out.detach().cpu().numpy()
            pts = np.empty((out.shape[0], out.shape[1], 3), dtype=np.float32)
            # For each human, for each joint: x, y, confidence
            for i, human in enumerate(out):
                for j, joint in enumerate(human):
                    pt = np.unravel_index(
                        np.argmax(joint),
                        (self.resolution[0] // 4, self.resolution[1] // 4))

                    pts[i, j, 0] = pt[0] * 1. / (self.resolution[0] // 4) * (
                        boxes[i][3] - boxes[i][1]) + boxes[i][1]
                    pts[i, j, 1] = pt[1] * 1. / (self.resolution[1] // 4) * (
                        boxes[i][2] - boxes[i][0]) + boxes[i][0]
                    pts[i, j, 2] = joint[pt]

        else:
            pts = np.empty((0, 0, 3), dtype=np.float32)

        return pts

    def _predict_batch(self, images):
        if not self.multiperson:
            old_res = images[0].shape

            if self.resolution is not None:
                images_tensor = torch.empty(images.shape[0], 3,
                                            self.resolution[0],
                                            self.resolution[1])
            else:
                images_tensor = torch.empty(images.shape[0], 3,
                                            images.shape[1], images.shape[2])

            for i, image in enumerate(images):
                if self.resolution is not None:
                    image = cv2.resize(
                        image,
                        (self.resolution[1],
                         self.resolution[0]),  # (width, height)
                        interpolation=self.interpolation)

                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                images_tensor[i] = self.transform(image)

            images = images_tensor
            boxes = np.repeat(np.asarray([[0, 0, old_res[1], old_res[0]]],
                                         dtype=np.float32),
                              len(images),
                              axis=0)  # [x1, y1, x2, y2]

        else:
            image_detections = self.detector.predict(images)

            boxes = []
            images_tensor = []
            for d, detections in enumerate(image_detections):
                image = images[d]
                boxes_image = []
                if detections is not None:
                    images_tensor_image = torch.empty(
                        (len(detections), 3, self.resolution[0],
                         self.resolution[1]))  # (height, width)
                    for i, (x1, y1, x2, y2, conf, cls_conf,
                            cls_pred) in enumerate(detections):
                        x1 = int(round(x1.item()))
                        x2 = int(round(x2.item()))
                        y1 = int(round(y1.item()))
                        y2 = int(round(y2.item()))

                        boxes_image.append([x1, y1, x2, y2])
                        images_tensor_image[i] = self.transform(
                            image[y1:y2, x1:x2, ::-1])

                else:
                    images_tensor_image = torch.empty(
                        (0, 3, self.resolution[0],
                         self.resolution[1]))  # (height, width)

                # stack all images and boxes in single lists
                images_tensor.extend(images_tensor_image)
                boxes.extend(boxes_image)

            # convert lists into tensors/np.ndarrays
            images = torch.tensor(np.stack(images_tensor))
            boxes = np.asarray(boxes, dtype=np.int32)

        images = images.to(self.device)

        with torch.no_grad():
            if len(images) <= self.max_batch_size:
                out = self.model(images)

            else:
                out = torch.empty(
                    (images.shape[0], self.nof_joints, self.resolution[0] // 4,
                     self.resolution[1] // 4)).to(self.device)
                for i in range(0, len(images), self.max_batch_size):
                    out[i:i + self.max_batch_size] = self.model(
                        images[i:i + self.max_batch_size])

        out = out.detach().cpu().numpy()
        pts = np.empty((out.shape[0], out.shape[1], 3), dtype=np.float32)
        # For each human, for each joint: x, y, confidence
        for i, human in enumerate(out):
            for j, joint in enumerate(human):
                pt = np.unravel_index(
                    np.argmax(joint),
                    (self.resolution[0] // 4, self.resolution[1] // 4))
                # 0: pt_x / (width // 4) * (bb_x2 - bb_x1) + bb_x1
                # 1: pt_y / (height // 4) * (bb_y2 - bb_y1) + bb_y1
                # 2: confidences
                pts[i, j, 0] = pt[0] * 1. / (self.resolution[0] // 4) * (
                    boxes[i][3] - boxes[i][1]) + boxes[i][1]
                pts[i, j, 1] = pt[1] * 1. / (self.resolution[1] // 4) * (
                    boxes[i][2] - boxes[i][0]) + boxes[i][0]
                pts[i, j, 2] = joint[pt]

        if self.multiperson:
            # re-add the removed batch axis (n)
            pts_batch = []
            index = 0
            for detections in image_detections:
                if detections is not None:
                    pts_batch.append(pts[index:index + len(detections)])
                    index += len(detections)
                else:
                    pts_batch.append(
                        np.zeros((0, self.nof_joints, 3), dtype=np.float32))
            pts = pts_batch

        else:
            pts = np.expand_dims(pts, axis=1)

        return pts

Esempio n. 7

Mostra file

File: Train.py Progetto: AlexanderS03/Lab5

class Train(object):

    #Clase de entrenamiento.
    #porporciona herrmientas basicas para entrenar HRNet

    def __init__(self,
                 exp_name,
                 ds_train,
                 ds_val,
                 epochs=210,
                 batch_size=16,
                 num_workers=4,
                 loss='JointsMSELoss',
                 lr=0.001,
                 lr_decay=True,
                 lr_decay_steps=(170, 200),
                 lr_decay_gamma=0.1,
                 optimizer='Adam',
                 weight_decay=0.00001,
                 momentum=0.9,
                 nesterov=False,
                 pretrained_weight_path=None,
                 checkpoint_path=None,
                 log_path='./logs',
                 use_tensorboard=True,
                 model_c=48,
                 model_nof_joints=17,
                 model_bn_momentum=0.1,
                 flip_test_images=True,
                 device=None):
        """
        Inicializa el nuevo objeto Train
        Se crea el folder de logs, se inicializa el modelo HRNet y se determinan dimensiones pre entrenadas o puntos
        de control guardos son cargados
        """
        super(Train, self).__init__()

        self.exp_name = exp_name
        self.ds_train = ds_train
        self.ds_val = ds_val
        self.epochs = epochs
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.loss = loss
        self.lr = lr
        self.lr_decay = lr_decay
        self.lr_decay_steps = lr_decay_steps
        self.lr_decay_gamma = lr_decay_gamma
        self.optimizer = optimizer
        self.weight_decay = weight_decay
        self.momentum = momentum
        self.nesterov = nesterov
        self.pretrained_weight_path = pretrained_weight_path
        self.checkpoint_path = checkpoint_path
        self.log_path = os.path.join(log_path, self.exp_name)
        self.use_tensorboard = use_tensorboard
        self.model_c = model_c
        self.model_nof_joints = model_nof_joints
        self.model_bn_momentum = model_bn_momentum
        self.flip_test_images = flip_test_images
        self.epoch = 0

        # torch devz
        if device is not None:
            self.device = device
        else:
            if torch.cuda.is_available():
                self.device = torch.device('cuda:0')
            else:
                self.device = torch.device('cpu')

        print(self.device)

        os.makedirs(self.log_path, 0o755, exist_ok=False)
        if self.use_tensorboard:
            self.summary_writer = tb.SummaryWriter(self.log_path)

        #escribe todos los parametros experimentales en parameters.txt y en campos de texto de tensorboard
        self.parameters = [
            x + ': ' + str(y) + '\n' for x, y in locals().items()
        ]
        with open(os.path.join(self.log_path, 'parameters.txt'), 'w') as fd:
            fd.writelines(self.parameters)
        if self.use_tensorboard:
            self.summary_writer.add_text('parameters',
                                         '\n'.join(self.parameters))

        #
        # Carga el modelo
        self.model = HRNet(c=self.model_c,
                           nof_joints=self.model_nof_joints,
                           bn_momentum=self.model_bn_momentum).to(self.device)

        if self.loss == 'JointsMSELoss':
            self.loss_fn = JointsMSELoss().to(self.device)
        elif self.loss == 'JointsOHKMMSELoss':
            self.loss_fn = JointsOHKMMSELoss().to(self.device)
        else:
            raise NotImplementedError

        if optimizer == 'SGD':
            self.optim = SGD(self.model.parameters(),
                             lr=self.lr,
                             weight_decay=self.weight_decay,
                             momentum=self.momentum,
                             nesterov=self.nesterov)
        elif optimizer == 'Adam':
            self.optim = Adam(self.model.parameters(),
                              lr=self.lr,
                              weight_decay=self.weight_decay)
        else:
            raise NotImplementedError

        # Carga las dimensiones preentrenadas
        if self.pretrained_weight_path is not None:
            self.model.load_state_dict(torch.load(self.pretrained_weight_path,
                                                  map_location=self.device),
                                       strict=False)

        #
        # carga puntos de control previos
        if self.checkpoint_path is not None:
            print('Loading checkpoint %s...' % self.checkpoint_path)
            if os.path.isdir(self.checkpoint_path):
                path = os.path.join(self.checkpoint_path,
                                    'checkpoint_last.pth')
            else:
                path = self.checkpoint_path
            self.starting_epoch, self.model, self.optim, self.params = load_checkpoint(
                path, self.model, self.optim, self.device)
        else:
            self.starting_epoch = 0

        if lr_decay:
            self.lr_scheduler = MultiStepLR(self.optim,
                                            list(self.lr_decay_steps),
                                            gamma=self.lr_decay_gamma,
                                            last_epoch=self.starting_epoch)

        # Carga el entrenamiento y los valores de los datasets
        self.dl_train = DataLoader(self.ds_train,
                                   batch_size=self.batch_size,
                                   shuffle=True,
                                   num_workers=self.num_workers,
                                   drop_last=True)
        self.len_dl_train = len(self.dl_train)

        self.dl_val = DataLoader(self.ds_val,
                                 batch_size=self.batch_size,
                                 shuffle=False,
                                 num_workers=self.num_workers)
        self.len_dl_val = len(self.dl_val)

        #
        # inicializa las variables
        self.mean_loss_train = 0.
        self.mean_acc_train = 0.
        self.mean_loss_val = 0.
        self.mean_acc_val = 0.
        self.mean_mAP_val = 0.

        self.best_loss = None
        self.best_acc = None
        self.best_mAP = None

    def _train(self):
        self.model.train()

        for step, (image, target, target_weight, joints_data) in enumerate(
                tqdm(self.dl_train, desc='Training')):
            image = image.to(self.device)
            target = target.to(self.device)
            target_weight = target_weight.to(self.device)

            self.optim.zero_grad()

            output = self.model(image)

            loss = self.loss_fn(output, target, target_weight)

            loss.backward()

            self.optim.step()

            # evalua el aciertos
            # obtiene predicciones
            accs, avg_acc, cnt, joints_preds, joints_target = self.ds_train.evaluate_accuracy(
                output, target)

            self.mean_loss_train += loss.item()
            self.mean_acc_train += avg_acc.item()
            if self.use_tensorboard:
                self.summary_writer.add_scalar('train_loss',
                                               loss.item(),
                                               global_step=step +
                                               self.epoch * self.len_dl_train)
                self.summary_writer.add_scalar('train_acc',
                                               avg_acc.item(),
                                               global_step=step +
                                               self.epoch * self.len_dl_train)
                if step == 0:
                    save_images(image,
                                target,
                                joints_target,
                                output,
                                joints_preds,
                                joints_data['joints_visibility'],
                                self.summary_writer,
                                step=step + self.epoch * self.len_dl_train,
                                prefix='train_')

        self.mean_loss_train /= len(self.dl_train)
        self.mean_acc_train /= len(self.dl_train)

        print('\nTrain: Loss %f - Accuracy %f' %
              (self.mean_loss_train, self.mean_acc_train))

    def _val(self):
        self.model.eval()

        with torch.no_grad():
            for step, (image, target, target_weight, joints_data) in enumerate(
                    tqdm(self.dl_val, desc='Validating')):
                image = image.to(self.device)
                target = target.to(self.device)
                target_weight = target_weight.to(self.device)

                output = self.model(image)

                if self.flip_test_images:
                    image_flipped = flip_tensor(image, dim=-1)
                    output_flipped = self.model(image_flipped)

                    output_flipped = flip_back(output_flipped,
                                               self.ds_val.flip_pairs)

                    output = (output + output_flipped) * 0.5

                loss = self.loss_fn(output, target, target_weight)

                # evalua el aciertos
                # obtiene predicciones
                accs, avg_acc, cnt, joints_preds, joints_target = \
                    self.ds_train.evaluate_accuracy(output, target)

                self.mean_loss_train += loss.item()
                self.mean_acc_train += avg_acc.item()
                if self.use_tensorboard:
                    self.summary_writer.add_scalar(
                        'val_loss',
                        loss.item(),
                        global_step=step + self.epoch * self.len_dl_train)
                    self.summary_writer.add_scalar(
                        'val_acc',
                        avg_acc.item(),
                        global_step=step + self.epoch * self.len_dl_train)
                    if step == 0:
                        save_images(image,
                                    target,
                                    joints_target,
                                    output,
                                    joints_preds,
                                    joints_data['joints_visibility'],
                                    self.summary_writer,
                                    step=step + self.epoch * self.len_dl_train,
                                    prefix='val_')

        self.mean_loss_val /= len(self.dl_val)
        self.mean_acc_val /= len(self.dl_val)

        print('\nValidation: Loss %f - Accuracy %f' %
              (self.mean_loss_val, self.mean_acc_val))

    def _checkpoint(self):

        save_checkpoint(path=os.path.join(self.log_path,
                                          'checkpoint_last.pth'),
                        epoch=self.epoch + 1,
                        model=self.model,
                        optimizer=self.optim,
                        params=self.parameters)

        if self.best_loss is None or self.best_loss > self.mean_loss_val:
            self.best_loss = self.mean_loss_val
            print('best_loss %f at epoch %d' %
                  (self.best_loss, self.epoch + 1))
            save_checkpoint(path=os.path.join(self.log_path,
                                              'checkpoint_best_loss.pth'),
                            epoch=self.epoch + 1,
                            model=self.model,
                            optimizer=self.optim,
                            params=self.parameters)
        if self.best_acc is None or self.best_acc < self.mean_acc_val:
            self.best_acc = self.mean_acc_val
            print('best_acc %f at epoch %d' % (self.best_acc, self.epoch + 1))
            save_checkpoint(path=os.path.join(self.log_path,
                                              'checkpoint_best_acc.pth'),
                            epoch=self.epoch + 1,
                            model=self.model,
                            optimizer=self.optim,
                            params=self.parameters)
        if self.best_mAP is None or self.best_mAP < self.mean_mAP_val:
            self.best_mAP = self.mean_mAP_val
            print('best_mAP %f at epoch %d' % (self.best_mAP, self.epoch + 1))
            save_checkpoint(path=os.path.join(self.log_path,
                                              'checkpoint_best_mAP.pth'),
                            epoch=self.epoch + 1,
                            model=self.model,
                            optimizer=self.optim,
                            params=self.parameters)

    def run(self):
        """
        Runs the training.
        """

        print('\nTraining started @ %s' %
              datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

        # Inica el entrenamiento
        for self.epoch in range(self.starting_epoch, self.epochs):
            print('\nEpoch %d of %d @ %s' %
                  (self.epoch + 1, self.epochs,
                   datetime.now().strftime("%Y-%m-%d %H:%M:%S")))

            self.mean_loss_train = 0.
            self.mean_loss_val = 0.
            self.mean_acc_train = 0.
            self.mean_acc_val = 0.
            self.mean_mAP_val = 0.

            #
            # entrenamiento

            self._train()

            #
            # Val

            self._val()

            #
            # Actualiza LR

            if self.lr_decay:
                self.lr_scheduler.step()

            #
            # Punto de control

            self._checkpoint()

        print('\nTraining ended @ %s' %
              datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

Esempio n. 8

Mostra file

import cv2
from torchvision.transforms import transforms
import matplotlib.pyplot as plt
import numpy as np
import warnings
from vidgear.gears import CamGear
from models.hrnet import HRNet

warnings.filterwarnings("ignore",category=UserWarning)

if __name__ == "__main__":

    model = HRNet(32, 17, 0.1)

    model.load_state_dict(
        torch.load('weights/mod_pose_hrnet_w32_256x192.pth')
    )
    print('ok!!')

    if torch.cuda.is_available() and False:
        torch.backends.cudnn.deterministic = True
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')

    print(device)

    model = model.to(device)

    video = CamGear(0).start()

Esempio n. 9

Mostra file

File: THRNet.py Progetto: AlexanderS03/Lab5

class THRNet:
    """
    Clase HRNet.

    La clase proporciona un método simple y personalizable para cargar la red HRNet, cargar el oficial pre-entrenado
    pesos y predecir la pose humana en imágenes individuales.
    """
    def __init__(
        self,
        c,
        nof_joints,
        checkpoint_path,
        resolution=(384, 288),
        interpolation=cv2.INTER_CUBIC,
        multiperson=True,
        yolo_model_def="./models/detectors/yolo/config/yolov3.cfg",
        yolo_class_path="./models/detectors/yolo/data/coco.names",
        yolo_weights_path="./models/detectors/yolo/weights/yolov3.weights",
        device=torch.device("cpu")):

        self.c = c
        self.nof_joints = nof_joints
        self.checkpoint_path = checkpoint_path
        self.resolution = resolution  #en la forma (alto, ancho) como en la implementación original
        self.interpolation = interpolation
        self.multiperson = multiperson
        self.max_batch_size = max_batch_size
        self.yolo_model_def = yolo_model_def
        self.yolo_class_path = yolo_class_path
        self.yolo_weights_path = yolo_weights_path
        self.device = device

        self.model = HRNet(c=c, nof_joints=nof_joints).to(device)
        checkpoint = torch.load(checkpoint_path, map_location=self.device)
        if 'model' in checkpoint:
            self.model.load_state_dict(checkpoint['model'])
        else:
            self.model.load_state_dict(checkpoint)
        self.model.eval()

        if not self.multiperson:
            self.transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

        else:
            self.detector = YOLOv3(model_def=yolo_model_def,
                                   class_path=yolo_class_path,
                                   weights_path=yolo_weights_path,
                                   classes=('person', ),
                                   max_batch_size=self.max_batch_size,
                                   device=device)
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((self.resolution[0],
                                   self.resolution[1])),  # (height, width)
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

        pass

    def predict(self, image):
        """
        Predice la pose humana en una sola imagen.
        """
        if len(image.shape) == 3:
            return self._predict_single(image)
        elif len(image.shape) == 4:
            return self._predict_batch(image)
        else:
            raise ValueError('Mal formato de imagen.')

    def _predict_single(self, image):
        if not self.multiperson:
            old_res = image.shape
            if self.resolution is not None:
                image = cv2.resize(
                    image,
                    (self.resolution[1], self.resolution[0]),  # (ancho, alto)
                    interpolation=self.interpolation)

            images = self.transform(cv2.cvtColor(
                image, cv2.COLOR_BGR2RGB)).unsqueeze(dim=0)
            boxes = np.asarray([[0, 0, old_res[1], old_res[0]]],
                               dtype=np.float32)  # [x1, y1, x2, y2]

        else:
            detections = self.detector.predict_single(image)

            boxes = []
            if detections is not None:
                images = torch.empty((len(detections), 3, self.resolution[0],
                                      self.resolution[1]))
                for i, (x1, y1, x2, y2, conf, cls_conf,
                        cls_pred) in enumerate(detections):
                    x1 = int(round(x1.item()))
                    x2 = int(round(x2.item()))
                    y1 = int(round(y1.item()))
                    y2 = int(round(y2.item()))

                    # Adapte las detecciones para que coincidan con la relación de aspecto de entrada de HRNet
                    correction_factor = self.resolution[0] / self.resolution[
                        1] * (x2 - x1) / (y2 - y1)
                    if correction_factor > 1:
                        #  incrementando
                        center = y1 + (y2 - y1) // 2
                        length = int(round((y2 - y1) * correction_factor))
                        y1 = max(0, center - length // 2)
                        y2 = min(image.shape[0], center + length // 2)
                    elif correction_factor < 1:
                        # seguimos incrementando
                        center = x1 + (x2 - x1) // 2
                        length = int(round((x2 - x1) * 1 / correction_factor))
                        x1 = max(0, center - length // 2)
                        x2 = min(image.shape[1], center + length // 2)

                    boxes.append([x1, y1, x2, y2])
                    images[i] = self.transform(image[y1:y2, x1:x2, ::-1])

            else:
                images = torch.empty((0, 3, self.resolution[0],
                                      self.resolution[1]))  # (height, width)

            boxes = np.asarray(boxes, dtype=np.int32)

        if images.shape[0] > 0:
            images = images.to(self.device)

            with torch.no_grad():
                if len(images) <= self.max_batch_size:
                    out = self.model(images)

                else:
                    out = torch.empty(
                        (images.shape[0], self.nof_joints,
                         self.resolution[0] // 4,
                         self.resolution[1] // 4)).to(self.device)
                    for i in range(0, len(images), self.max_batch_size):
                        out[i:i + self.max_batch_size] = self.model(
                            images[i:i + self.max_batch_size])

            out = out.detach().cpu().numpy()
            pts = np.empty((out.shape[0], out.shape[1], 3), dtype=np.float32)
            # Para cada humano, para cada articulación: x, y, confianza
            for i, human in enumerate(out):
                for j, joint in enumerate(human):
                    pt = np.unravel_index(
                        np.argmax(joint),
                        (self.resolution[0] // 4, self.resolution[1] // 4))
                    pts[i, j, 0] = pt[0] * 1. / (self.resolution[0] // 4) * (
                        boxes[i][3] - boxes[i][1]) + boxes[i][1]
                    pts[i, j, 1] = pt[1] * 1. / (self.resolution[1] // 4) * (
                        boxes[i][2] - boxes[i][0]) + boxes[i][0]
                    pts[i, j, 2] = joint[pt]

        else:
            pts = np.empty((0, 0, 3), dtype=np.float32)

        return pts

    def _predict_batch(self, images):
        if not self.multiperson:
            old_res = images[0].shape

            if self.resolution is not None:
                images_tensor = torch.empty(images.shape[0], 3,
                                            self.resolution[0],
                                            self.resolution[1])
            else:
                images_tensor = torch.empty(images.shape[0], 3,
                                            images.shape[1], images.shape[2])

            for i, image in enumerate(images):
                if self.resolution is not None:
                    image = cv2.resize(
                        image, (self.resolution[1], self.resolution[0]),
                        interpolation=self.interpolation)

                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                images_tensor[i] = self.transform(image)

            images = images_tensor
            boxes = np.repeat(np.asarray([[0, 0, old_res[1], old_res[0]]],
                                         dtype=np.float32),
                              len(images),
                              axis=0)  # [x1, y1, x2, y2]

        else:
            image_detections = self.detector.predict(images)

            boxes = []
            images_tensor = []
            for d, detections in enumerate(image_detections):
                image = images[d]
                boxes_image = []
                if detections is not None:
                    images_tensor_image = torch.empty(
                        (len(detections), 3, self.resolution[0],
                         self.resolution[1]))  # (height, width)
                    for i, (x1, y1, x2, y2, conf, cls_conf,
                            cls_pred) in enumerate(detections):
                        x1 = int(round(x1.item()))
                        x2 = int(round(x2.item()))
                        y1 = int(round(y1.item()))
                        y2 = int(round(y2.item()))

                        correction_factor = self.resolution[
                            0] / self.resolution[1] * (x2 - x1) / (y2 - y1)
                        if correction_factor > 1:

                            center = y1 + (y2 - y1) // 2
                            length = int(round((y2 - y1) * correction_factor))
                            y1 = max(0, center - length // 2)
                            y2 = min(image.shape[0], center + length // 2)
                        elif correction_factor < 1:

                            center = x1 + (x2 - x1) // 2
                            length = int(
                                round((x2 - x1) * 1 / correction_factor))
                            x1 = max(0, center - length // 2)
                            x2 = min(image.shape[1], center + length // 2)

                        boxes_image.append([x1, y1, x2, y2])
                        images_tensor_image[i] = self.transform(
                            image[y1:y2, x1:x2, ::-1])

                else:
                    images_tensor_image = torch.empty(
                        (0, 3, self.resolution[0],
                         self.resolution[1]))  # (height, width)

                # apilar todas las imágenes y cuadros en listas individuales
                images_tensor.extend(images_tensor_image)
                boxes.extend(boxes_image)

            # convertir listas en tensores / np.ndarrays
            images = torch.tensor(np.stack(images_tensor))
            boxes = np.asarray(boxes, dtype=np.int32)

        images = images.to(self.device)

        with torch.no_grad():
            if len(images) <= self.max_batch_size:
                out = self.model(images)

            else:
                out = torch.empty(
                    (images.shape[0], self.nof_joints, self.resolution[0] // 4,
                     self.resolution[1] // 4)).to(self.device)
                for i in range(0, len(images), self.max_batch_size):
                    out[i:i + self.max_batch_size] = self.model(
                        images[i:i + self.max_batch_size])

        out = out.detach().cpu().numpy()
        pts = np.empty((out.shape[0], out.shape[1], 3), dtype=np.float32)

        for i, human in enumerate(out):
            for j, joint in enumerate(human):
                pt = np.unravel_index(
                    np.argmax(joint),
                    (self.resolution[0] // 4, self.resolution[1] // 4))

                pts[i, j, 0] = pt[0] * 1. / (self.resolution[0] // 4) * (
                    boxes[i][3] - boxes[i][1]) + boxes[i][1]
                pts[i, j, 1] = pt[1] * 1. / (self.resolution[1] // 4) * (
                    boxes[i][2] - boxes[i][0]) + boxes[i][0]
                pts[i, j, 2] = joint[pt]

        if self.multiperson:
            # volver a agregar el eje de lote eliminado (n)
            pts_batch = []
            index = 0
            for detections in image_detections:
                if detections is not None:
                    pts_batch.append(pts[index:index + len(detections)])
                    index += len(detections)
                else:
                    pts_batch.append(
                        np.zeros((0, self.nof_joints, 3), dtype=np.float32))
            pts = pts_batch

        else:
            pts = np.expand_dims(pts, axis=1)

        return pts