def getObjectPointFromImage(camera):
    """Get Object Pose from camera image"""
    max_pix_value = 1.0
    normalizer = 255.0 / max_pix_value
    cam_img = CvBridge().imgmsg_to_cv2(camera, 'rgb8') / normalizer
    cam_img = torch.tensor(cam_img).permute(2, 1, 0)

    targ = localize_target(cam_img)

    point = PointStamped(header=camera.header, point=Point(x=(1.581 - targ[0]/154.29), y=(-0.16 - targ[1]/154.29), z=0.964))
    return point
def find_object_1(t, camera, obj_pos_topic):
    underSmpl = 5
    if int(t * 50) % underSmpl == 0:
        import numpy as np
        from specs import localize_target
        from cv_bridge import CvBridge
        import torch
        max_pix_value = 1.0
        normalizer = 255.0 / max_pix_value
        cam_img = CvBridge().imgmsg_to_cv2(camera.value, 'rgb8') / normalizer
        cam_img = torch.tensor(cam_img).permute(2, 1, 0)

        targ = localize_target(cam_img)
        if targ[1] < 100:  # otherwise it is detecting something else.
            targ = (-1, -1)

        msg = Point()
        msg.x = targ[0]
        msg.y = targ[1]
        obj_pos_topic.send_message(msg)
Ejemplo n.º 3
0
def img_to_pred(t, camera, plot_topic, latent_topic, pred_pos_topic, pred_msg,
                model, model_path, model_inputs, optimizer, scheduler,
                run_step):

    # Imports
    import os
    import torch
    import torch.nn.functional as F
    import numpy as np
    from prednet import PredNet
    from cv_bridge import CvBridge
    from std_msgs.msg import Float32MultiArray, MultiArrayLayout, MultiArrayDimension
    from specs import localize_target, complete_target_positions, mark_target, exp_dir

    # Image and model parameters
    underSmpl = 5  # Avoiding too sharp time resolution (no change between frames)
    nt = 15  # Number of "past" frames given to the network
    t_extrap = 5  # After this frame, input is not used for future predictions
    n_feat = 1  # Factor for number of features used in the network
    max_pix_value = 1.0
    normalizer = 255.0 / max_pix_value
    C_channels = 3  # 1 or 3 (color channels)
    A_channels = (C_channels, n_feat * 4, n_feat * 8, n_feat * 16)
    R_channels = (C_channels, n_feat * 4, n_feat * 8, n_feat * 16)
    scale = 4  # 2 or 4 (how much layers down/upsample images)
    pad = 8 if scale == 4 else 0  # For up/downsampling to work
    model_name = 'model' + str(n_feat) + '.pt'
    new_model_path = os.getcwd() + '/resources/' + model_name
    trained_w_path = exp_dir + model_name  # exp_dir computed in specs.py
    device = 'cpu'
    if torch.cuda.is_available():
        device = 'cuda'

    # Training parameters
    use_new_w = False  # If True, do not use weights that are saved in new_model_path
    use_trained_w = True  # If above is False, use trained_w_path as model weights
    do_train = False  # Train with present frames if True, predicts future if False
    initial_lr = 1e-4  # Then, the learning rate is scheduled with cosine annealing
    epoch_loop = 100  # Every epoch_loop, a prediction is made, to monitor progress
    n_batches = 1  # For now, not usable (could roll images for multiple batches)

    # Check that the simulation frame is far enough
    if camera.value is not None and int(t * 50) % underSmpl == 0:

        # Collect input image and initialize the network input
        cam_img = CvBridge().imgmsg_to_cv2(camera.value, 'rgb8') / normalizer
        if C_channels == 3:  # Below I messed up, it should be (2,0,1) but the model is already trained.
            cam_img = torch.tensor(cam_img, device=device).permute(
                2, 1, 0)  # --> channels last
        if C_channels == 1:
            cam_img = cam_img[:, :, 1]  # .mean(axis=2)
            cam_img = torch.tensor(cam_img,
                                   device=device).unsqueeze(dim=2).permute(
                                       2, 1, 0)
        img_shp = cam_img.shape
        cam_img = F.pad(cam_img, (pad, pad), 'constant',
                        0.0)  # width may need to be 256
        if model_inputs.value is None:
            model_inputs.value = torch.zeros((1, nt) + cam_img.shape,
                                             device=device)

        # Update the model or the mode, if needed
        run_step.value = run_step.value + 1
        if new_model_path != model_path.value:

            # Update the model path if new or changed and reset prediction plot
            model_path.value = new_model_path
            pred_msg.value = torch.ones(img_shp[0], img_shp[1] *
                                        (nt - t_extrap),
                                        img_shp[2] + 10) * 64.0

            # Load or reload the model
            model.value = PredNet(R_channels,
                                  A_channels,
                                  device=device,
                                  t_extrap=t_extrap,
                                  scale=scale)
            if device == 'cuda': model.value = model.value.to('cuda')
            if run_step.value == 1:
                try:
                    if use_new_w:
                        a = 1. / 0.
                    if use_trained_w:
                        model.value.load_state_dict(torch.load(trained_w_path))
                        clientLogger.info(
                            'Model initialized with pre-trained weights.')
                    else:
                        model.value.load_state_dict(
                            torch.load(model_path.value))
                        clientLogger.info(
                            'Learning weights loaded in the model.')
                except:
                    clientLogger.info(
                        'No existing weight file found. Model initialized randomly.'
                    )

        # Initialize some variables needed for training
        time_loss_w = [1.0 / (nt - 1) if s > 0 else 0.0 for s in range(nt)]
        if t_extrap < nt:
            time_loss_w = [
                w if n < t_extrap else 2.0 * w
                for n, w in enumerate(time_loss_w)
            ]

        if None in [optimizer.value, scheduler.value]:
            optimizer.value = torch.optim.Adam(model.value.parameters(),
                                               lr=initial_lr)
            scheduler.value = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
                optimizer.value, T_0=50)

        # Save the model at each epoch
        if run_step.value % epoch_loop == 1:
            torch.save(model.value.state_dict(), model_path.value)

        # Check that the model exists and initialize plot message
        if model.value is not None:

            # Feed network and train it or compute prediction
            model_inputs.value = model_inputs.value.roll(-1, dims=1)
            model_inputs.value[0, -1, :, :, :] = cam_img
            if run_step.value > nt:

                # Compute prediction along present frames and updates weights
                if do_train:

                    # Compute prediction loss for every frame
                    pred, latent = model.value(model_inputs.value, nt)
                    loss = torch.tensor([0.0], device=device)
                    for s in range(nt):
                        error = (pred[s][0] - model_inputs.value[0][s])**2
                        loss += torch.sum(error) * time_loss_w[s]

                    # Backward pass and weight updates
                    optimizer.value.zero_grad()
                    loss.backward()
                    optimizer.value.step()
                    scheduler.value.step()

                # Predicts future frames without weight updates
                else:
                    with torch.no_grad():
                        pred, latent = model.value(
                            model_inputs.value[:, -t_extrap:, :, :, :], nt)

                # Collect prediction frames
                displays = []
                targ_pos = []
                for s in range(nt - t_extrap):
                    disp = torch.detach(pred[t_extrap + s].clamp(
                        0.0, 1.0)[0, :, :, pad:-pad]).cpu()
                    # disp = model_inputs.value[0,-(s+1),:,:,pad:-pad].cpu()  # for tests
                    targ_pos.append(localize_target(disp))
                    displays.append(disp)

                # Complete for missing target positions, highlight target and set the display message
                if 0 < np.sum(
                    [any([np.isnan(p) for p in pos])
                     for pos in targ_pos]) < len(targ_pos) - 2:
                    targ_pos = complete_target_positions(targ_pos)
                for s, (disp, pos) in enumerate(zip(displays, targ_pos)):
                    pred_msg.value[:, s * img_shp[1]:(s + 1) *
                                   img_shp[1], :img_shp[2]] = mark_target(
                                       disp, pos)

                # Print loss or prediction messages
                if do_train:
                    clientLogger.info('Epoch: %2i - step: %2i - error: %5.4f - lr: %5.4f' % \
                        (int(run_step.value/epoch_loop), run_step.value%epoch_loop, loss.item(), \
                         scheduler.value.get_lr()[0]))
                else:
                    clientLogger.info(
                        'Prediction for future target locations: ' +
                        str(targ_pos))

                # Send latent state message (latent[0] to remove batch dimension)
                latent_msg = list(latent[0].cpu().numpy().flatten())
                layout_msg = MultiArrayLayout(
                    dim=[MultiArrayDimension(size=d) for d in latent[0].shape])
                latent_topic.send_message(
                    Float32MultiArray(layout=layout_msg, data=latent_msg))

                # Send predicted position according to the index of the frame that has to be reported
                pos_3d_msg = [[
                    1.562 - p[0] / 156.274, -0.14 - p[1] / 152.691,
                    0.964 + p[0] - p[0]
                ] for p in targ_pos]
                pos_3d_msg = [p for pos in pos_3d_msg
                              for p in pos]  # flatten the list
                layout_msg = MultiArrayLayout(dim=[
                    MultiArrayDimension(size=d) for d in [len(targ_pos), 3]
                ])
                pred_pos_topic.send_message(
                    Float32MultiArray(layout=layout_msg, data=pos_3d_msg))

            # Collect input frames
            inpt_msg = torch.zeros(img_shp[0], img_shp[1] * (nt - t_extrap),
                                   img_shp[2])
            for s in range(nt - t_extrap):
                inpt_msg[:, s * img_shp[1]:(s + 1) *
                         img_shp[1], :] = model_inputs.value[0, t_extrap +
                                                             s, :, :, pad:-pad]

            # Build and send the display message
            plot_msg = torch.cat(
                (pred_msg.value, inpt_msg), 2).numpy().transpose(
                    2, 1, 0) * int(normalizer)
            if C_channels == 1:
                plot_msg = np.dstack((plot_msg, plot_msg, plot_msg))
            plot_topic.send_message(CvBridge().cv2_to_imgmsg(
                plot_msg.astype(np.uint8), 'rgb8'))
Ejemplo n.º 4
0
    def run_step(self):

        # Check that the camera device is on and that it is the right time-step
        if self.camera is not None:
            t = self.camera.header.stamp.to_secs() * 1000.0  # in milliseconds
            if t > self.last_cam_time + 20 * self.underSmpl:  # one ros time-step is 20 ms
                self.last_cam_time = t

                # Collect input image and initialize the network input
                cam_img = CvBridge().imgmsg_to_cv2(self.camera,
                                                   'rgb8') / self.normalizer
                if self.C_channels == 3:  # Below I messed up, it should be (2,0,1) but the model is already trained.
                    cam_img = torch.tensor(cam_img,
                                           device=self.device).permute(
                                               2, 1, 0)  # --> channels last
                if self.C_channels == 1:
                    cam_img = cam_img[:, :, 1]  # .mean(axis=2)
                    cam_img = torch.tensor(
                        cam_img,
                        device=self.device).unsqueeze(dim=2).permute(2, 1, 0)
                img_shp = cam_img.shape
                cam_inp = F.pad(cam_img, (self.pad, self.pad), 'constant',
                                0.0)  # width may need to be 256
                if self.model_inputs is None:
                    self.model_inputs = torch.zeros(
                        (1, self.nt) + cam_inp.shape, device=self.device)

                # Update the model or the mode, if needed
                self.running_step = self.running_step + 1
                if self.new_model_path != self.model_path:

                    # Update the model path if new or changed and reset prediction plot
                    self.model_path = self.new_model_path
                    self.pred_msg = torch.ones(
                        img_shp[0], img_shp[1] *
                        (self.nt - self.t_extrap + 1), img_shp[2] + 10) * 64.0

                    # Load or reload the model
                    self.model = PredNet(self.R_channels,
                                         self.A_channels,
                                         device=self.device,
                                         t_extrap=self.t_extrap,
                                         scale=self.scale)
                    if self.device == 'cuda':
                        self.model = self.model.to('cuda')
                    if self.running_step == 1:
                        try:
                            if self.use_new_w:
                                a = 1. / 0.
                            if self.use_trained_w:
                                self.model.load_state_dict(
                                    torch.load(self.trained_w_path))
                                rospy.loginfo(
                                    'Model initialized with pre-trained weights.'
                                )
                            else:
                                self.model.load_state_dict(
                                    torch.load(self.model_path))
                                rospy.loginfo(
                                    'Learning weights loaded in the model.')
                        except:
                            rospy.loginfo(
                                'No existing weight file found. Model initialized randomly.'
                            )

                # Initialize some variables needed for training
                time_loss_w = [
                    1.0 / (self.nt - 1) if s > 0 else 0.0
                    for s in range(self.nt)
                ]
                if self.t_extrap < self.nt:
                    time_loss_w = [
                        w if n < self.t_extrap else 2.0 * w
                        for n, w in enumerate(time_loss_w)
                    ]

                # Initialize the optimizer and the scheduler if needed
                if None in [self.optimizer, self.scheduler]:
                    self.optimizer = torch.optim.Adam(self.model.parameters(),
                                                      lr=self.initial_lr)
                    self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
                        self.optimizer, T_0=50)

                # Save the model at each epoch
                if self.running_step % self.epoch_loop == 1:
                    torch.save(self.model.state_dict(), self.model_path)

                # Check that the model exists and initialize plot message
                if self.model is not None:

                    # Feed network and train it or compute prediction
                    self.model_inputs = self.model_inputs.roll(-1, dims=1)
                    self.model_inputs[0, -1, :, :, :] = cam_inp
                    if self.running_step > self.nt:

                        # Compute prediction along present frames and updates weights
                        if self.do_train:

                            # Compute prediction loss for every frame
                            pred, latent = self.model(self.model_inputs,
                                                      self.nt)
                            loss = torch.tensor([0.0], device=self.device)
                            for s in range(self.nt):
                                error = (pred[s][0] -
                                         self.model_inputs[0][s])**2
                                loss += torch.sum(error) * time_loss_w[s]

                            # Backward pass and weight updates
                            self.optimizer.zero_grad()
                            loss.backward()
                            self.optimizer.step()
                            self.scheduler.step()

                        # Predicts future frames without weight updates
                        else:
                            with torch.no_grad():
                                pred, latent = self.model(
                                    self.
                                    model_inputs[:, -self.t_extrap:, :, :, :],
                                    self.nt)

                        # Collect prediction frames
                        displays = [
                            cam_img
                        ]  # First frame to be displayed is the present frame
                        targ_pos = [
                            localize_target(cam_img)
                        ]  # Localize the target on the present camera frame
                        t_stamps = [
                            t
                        ]  # Time of the present frame is the camera rostime
                        for s in range(self.nt - self.t_extrap):
                            disp = torch.detach(pred[self.t_extrap + s].clamp(
                                0.0, 1.0)[0, :, :, self.pad:-self.pad]).cpu()
                            targ_pos.append(localize_target(disp))
                            displays.append(disp)
                            t_stamps.append(
                                t + (s + 1) * 0.02 *
                                self.underSmpl)  # Not sure about this

                        # Complete for missing target positions, highlight target and set the display message
                        if 0 < np.sum([
                                any([np.isnan(p) for p in pos])
                                for pos in targ_pos
                        ]) < len(targ_pos) - 2:
                            targ_pos = complete_target_positions(targ_pos)
                        for s, (disp,
                                pos) in enumerate(zip(displays, targ_pos)):
                            self.pred_msg[:, s * img_shp[1]:(
                                s +
                                1) * img_shp[1], :img_shp[2]] = mark_target(
                                    disp, pos)

                        # Print loss or prediction messages
                        if self.do_train:
                            rospy.loginfo(
                                'Epoch: %2i - step: %2i - error: %5.4f - lr: %5.4f'
                                % (int(self.running_step / self.epoch_loop),
                                   self.running_step % self.epoch_loop,
                                   loss.item(), self.scheduler.get_lr()[0]))
                        else:
                            rospy.loginfo(
                                'Prediction for future target locations: ' +
                                str(targ_pos))

                        # Send latent state message (latent[0] to remove batch dimension)
                        latent_msg = list(latent[0].cpu().numpy().flatten())
                        layout_msg = MultiArrayLayout(dim=[
                            MultiArrayDimension(size=d)
                            for d in latent[0].shape
                        ])
                        self.latent_pub.publish(
                            Float32MultiArray(layout=layout_msg,
                                              data=latent_msg))

                        # Send predicted position according to the index of the frame that has to be reported
                        pos_3d_msg = [[
                            1.562 - p[0] / 156.274, -0.14 - p[1] / 152.691,
                            0.964 + p[0] - p[0]
                        ] for p in targ_pos]
                        pos_4d_msg = [[p[0], p[1], p[2], s]
                                      for (s, p) in zip(t_stamps, pos_3d_msg)
                                      ]  # Add time stamps
                        pos_4d_msg = [p for pos in pos_3d_msg
                                      for p in pos]  # Flatten the list
                        layout_msg = MultiArrayLayout(dim=[
                            MultiArrayDimension(size=d)
                            for d in [len(targ_pos), 4]
                        ])
                        self.pred_pos_pub.publish(
                            Float32MultiArray(layout=layout_msg,
                                              data=pos_3d_msg))

                    # Collect input frames
                    inpt_msg = torch.zeros(
                        img_shp[0], img_shp[1] * (self.nt - self.t_extrap + 1),
                        img_shp[2])
                    for s in range(self.nt - self.t_extrap):
                        inpt_msg[:, (s + 1) * img_shp[1]:(s + 2) *
                                 img_shp[1], :] = self.model_inputs[
                                     0, self.t_extrap + s, :, :,
                                     self.pad:-self.pad]

                    # Build and send the display message
                    plot_msg = torch.cat(
                        (self.pred_msg, inpt_msg), 2).numpy().transpose(
                            2, 1, 0) * int(self.normalizer)
                    if self.C_channels == 1:
                        plot_msg = np.dstack((plot_msg, plot_msg, plot_msg))
                    self.plot_pub.publish(CvBridge().cv2_to_imgmsg(
                        plot_msg.astype(np.uint8), 'rgb8'))