Ejemplo n.º 1
0
        def get_input_color_and_depth_data(self):
            color_img, depth_img = self.get_camera_data()
            color_img = get_prepared_img(color_img, 'rgb')
            depth_img = get_prepared_img(depth_img, 'depth')
            color_heightmap, depth_heightmap = get_heightmap(color_img, depth_img, robot.cam_intrinsics, robot.cam_pose, robot.workspace_limits, robot.heightmap_resolution)
            valid_depth_heightmap = depth_heightmap.copy()
            valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0
            input_color_data, input_depth_data = get_input_tensors(color_heightmap, valid_depth_heightmap)

            return input_color_data, input_depth_data
Ejemplo n.º 2
0
        def get_action(self, iteration):

            self.execute_action = True

            for i in range(2):
                color_img, depth_img = self.get_camera_data()
                color_img = get_prepared_img(color_img, 'rgb')
                depth_img = get_prepared_img(depth_img, 'depth')
                color_heightmap, depth_heightmap = get_heightmap(
                    color_img, depth_img, robot.cam_intrinsics, robot.cam_pose,
                    robot.workspace_limits, robot.heightmap_resolution)
                valid_depth_heightmap = depth_heightmap.copy()
                valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0

                # Save RGB-D images and RGB-D heightmaps # trainer.iteration
                self.logger.save_images(1, color_img, depth_img,
                                        '0')  # trainer.iteration
                self.logger.save_heightmaps(1, color_heightmap,
                                            valid_depth_heightmap,
                                            '0')  # trainer.iteration

                grasp_predictions, state_feat = self.trainer_forward(
                    color_heightmap, valid_depth_heightmap, is_volatile=True)

                ############################################ EXECUTING THREAD ############################################
                ############################################ EXECUTING THREAD ############################################
                ############################################ EXECUTING THREAD ############################################

                if self.execute_action:

                    explore_actions = np.random.uniform() < self.explore_prob
                    if explore_actions:  # Exploitation (do best action) vs exploration (do other action)
                        print(
                            'Strategy: explore (exploration probability: %f)' %
                            (self.explore_prob))
                        # nonlocal_variables['primitive_action'] = 'push' if np.random.randint(0,2) == 0 else 'grasp'
                    else:
                        print(
                            'Strategy: exploit (exploration probability: %f)' %
                            (self.explore_prob))

                    print('grasp_predictions.shape: {}'.format(
                        grasp_predictions.shape))
                    # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x)
                    best_pix_ind = np.unravel_index(
                        np.argmax(grasp_predictions), grasp_predictions.shape)
                    predicted_value = np.max(grasp_predictions)

                    # Save predicted confidence value
                    self.predicted_value_log.append([predicted_value])
                    self.logger.write_to_log('predicted-value',
                                             self.predicted_value_log)

                    print(
                        'best_pix_ind[0]: {}, best_pix_ind[1]: {}, best_pix_ind[2]: {}'
                        .format(best_pix_ind[0], best_pix_ind[1],
                                best_pix_ind[2]))

                    # Compute 3D position of pixel
                    print('Action: %s at (%d, %d, %d)' %
                          ('Grasp', best_pix_ind[0], best_pix_ind[1],
                           best_pix_ind[2]))
                    best_rotation_angle = np.deg2rad(
                        best_pix_ind[0] * (360.0 / robot.model.num_rotations))
                    best_pix_x = best_pix_ind[2]  # 118
                    best_pix_y = best_pix_ind[1]  # 115
                    primitive_position = [
                        best_pix_x * self.heightmap_resolution +
                        self.workspace_limits[0][0],
                        best_pix_y * self.heightmap_resolution +
                        self.workspace_limits[1][0],
                        valid_depth_heightmap[best_pix_y][best_pix_x] +
                        self.workspace_limits[2][0]
                    ]
                    position = primitive_position

                    # Save executed primitive
                    self.executed_action_log.append(
                        [1, best_pix_ind[0], best_pix_ind[1],
                         best_pix_ind[2]])  # 1 - grasp
                    self.logger.write_to_log('executed-action',
                                             self.executed_action_log)

                    # Execute Primitive
                    grasp_success = self.grasp(position, best_rotation_angle)

                    self.execute_action = False

                ########################## TRAINING ##########################
                ########################## TRAINING ##########################
                ########################## TRAINING ##########################

                # Run training iteration in current thread (aka training thread)
                if 'prev_color_img' in locals():

                    # Detect changes
                    depth_diff = abs(depth_heightmap - prev_depth_heightmap)
                    depth_diff[np.isnan(depth_diff)] = 0
                    depth_diff[depth_diff > 0.3] = 0
                    depth_diff[depth_diff < 0.01] = 0
                    depth_diff[depth_diff > 0] = 1
                    change_threshold = 300
                    change_value = np.sum(depth_diff)
                    change_detected = change_value > change_threshold or prev_grasp_success
                    print('Change detected: %r (value: %d)' %
                          (change_detected, change_value))

                    # if change_detected:
                    #     if prev_primitive_action == 'push':
                    #         no_change_count[0] = 0
                    #     elif prev_primitive_action == 'grasp':
                    #         no_change_count[1] = 0
                    # else:
                    #     if prev_primitive_action == 'push':
                    #         no_change_count[0] += 1
                    #     elif prev_primitive_action == 'grasp':
                    #         no_change_count[1] += 1

                    # Compute training labels
                    label_value, prev_reward_value = self.get_label_value(
                        prev_primitive_action,
                        prev_grasp_success,
                        change_detected,
                        prev_grasp_predictions,
                        color_heightmap,  # Fix get_label since it's using the local_network call, instead of the trainer call like in the original code, which goes through the preprocessing step.
                        valid_depth_heightmap)

                    self.label_value_log.append([label_value])
                    self.logger.write_to_log('label-value',
                                             self.label_value_log)
                    self.reward_value_log.append([prev_reward_value])
                    self.logger.write_to_log('reward-value',
                                             self.reward_value_log)

                    # Backpropagate
                    self.backprop(prev_color_heightmap,
                                  prev_valid_depth_heightmap,
                                  prev_primitive_action, prev_best_pix_ind,
                                  label_value)

                    self.explore_prob = max(
                        0.5 * np.power(0.9998, iteration),
                        0.1) if self.explore_rate_decay else 0.5

                    # Do sampling for experience replay
                    if self.experience_replay:
                        sample_primitive_action = prev_primitive_action
                        sample_primitive_action_id = 1
                        sample_reward_value = 0 if prev_reward_value == 1.0 else 1.0

                        # Get samples of the same primitive but with different results
                        # Indices where the primitive is the prev_prev, and also have different results. This has the same shape as trainer.reward_value_log as well as trainer.executed_action_log.
                        # argwhere returns the indices of the True booleans from the preceding operation.
                        sample_ind = np.argwhere(
                            np.logical_and(
                                np.asarray(self.reward_value_log)[
                                    1:iteration, 0] == sample_reward_value,
                                np.asarray(self.executed_action_log)
                                [1:iteration,
                                 0] == sample_primitive_action_id))

                        print('sample_reward_value: {}'.format(
                            sample_reward_value))
                        print()
                        print('self.reward_value_log: {}'.format(
                            self.reward_value_log))
                        print()
                        print('self.executed_action_log: {}'.format(
                            self.executed_action_log))

                        if sample_ind.size > 0:
                            # Find sample with highest surprise value
                            sample_surprise_values = np.abs(
                                np.asarray(self.predicted_value_log)
                                [sample_ind[:, 0]] - np.asarray(
                                    self.label_value_log)[sample_ind[:, 0]])
                            sorted_surprise_ind = np.argsort(
                                sample_surprise_values[:, 0])
                            sorted_sample_ind = sample_ind[sorted_surprise_ind,
                                                           0]
                            pow_law_exp = 2
                            rand_sample_ind = int(
                                np.round(
                                    np.random.power(pow_law_exp, 1) *
                                    (sample_ind.size - 1)))
                            sample_iteration = sorted_sample_ind[
                                rand_sample_ind]
                            print(
                                'Experience replay: iteration %d (surprise value: %f)'
                                % (sample_iteration, sample_surprise_values[
                                    sorted_surprise_ind[rand_sample_ind]]))

                            # Load sample RGB-D heightmap
                            sample_color_heightmap = cv2.imread(
                                os.path.join(
                                    self.logger.color_heightmaps_directory,
                                    '%06d.0.color.png' % (sample_iteration)))
                            sample_color_heightmap = cv2.cvtColor(
                                sample_color_heightmap, cv2.COLOR_BGR2RGB)
                            sample_depth_heightmap = cv2.imread(
                                os.path.join(
                                    self.logger.depth_heightmaps_directory,
                                    '%06d.0.depth.png' % (sample_iteration)),
                                -1)
                            sample_depth_heightmap = sample_depth_heightmap.astype(
                                np.float32) / 100000

                            # Compute forward pass with sample
                            with torch.no_grad():
                                sample_grasp_predictions, sample_state_feat = trainer.forward(
                                    sample_color_heightmap,
                                    sample_depth_heightmap,
                                    is_volatile=True)

                            # Load next sample RGB-D heightmap
                            next_sample_color_heightmap = cv2.imread(
                                os.path.join(
                                    self.logger.color_heightmaps_directory,
                                    '%06d.0.color.png' %
                                    (sample_iteration + 1)))
                            next_sample_color_heightmap = cv2.cvtColor(
                                next_sample_color_heightmap, cv2.COLOR_BGR2RGB)
                            next_sample_depth_heightmap = cv2.imread(
                                os.path.join(
                                    self.logger.depth_heightmaps_directory,
                                    '%06d.0.depth.png' %
                                    (sample_iteration + 1)), -1)
                            next_sample_depth_heightmap = next_sample_depth_heightmap.astype(
                                np.float32) / 100000

                            sample_grasp_success = sample_reward_value == 1
                            sample_change_detected = sample_push_success
                            # new_sample_label_value, _ = trainer.get_label_value(sample_primitive_action, sample_push_success, sample_grasp_success, sample_change_detected, sample_push_predictions, sample_grasp_predictions, next_sample_color_heightmap, next_sample_depth_heightmap)

                            # Get labels for sample and backpropagate
                            sample_best_pix_ind = (np.asarray(
                                self.executed_action_log)[sample_iteration,
                                                          1:4]).astype(int)
                            self.backprop(
                                sample_color_heightmap, sample_depth_heightmap,
                                sample_primitive_action, sample_best_pix_ind,
                                self.label_value_log[sample_iteration])

                            # Recompute prediction value and label for replay buffer
                            self.predicted_value_log[sample_iteration] = [
                                np.max(sample_grasp_predictions)
                            ]
                            # trainer.label_value_log[sample_iteration] = [new_sample_label_value]

                        else:
                            print(
                                'Not enough prior training samples. Skipping experience replay.'
                            )

                    self.reset_cube()

                # Save information for next training step
                prev_color_img = color_img.copy()
                prev_depth_img = depth_img.copy()
                prev_color_heightmap = color_heightmap.copy()
                prev_depth_heightmap = depth_heightmap.copy()
                prev_valid_depth_heightmap = valid_depth_heightmap.copy()
                prev_grasp_predictions = grasp_predictions.copy()
                prev_grasp_success = grasp_success
                prev_primitive_action = 'grasp'
                prev_best_pix_ind = best_pix_ind
Ejemplo n.º 3
0
        def get_action(self):

            color_img, depth_img = self.get_camera_data()
            color_img = get_prepared_img(color_img, 'rgb')
            depth_img = get_prepared_img(depth_img, 'depth')
            color_heightmap, depth_heightmap = get_heightmap(
                color_img, depth_img, robot.cam_intrinsics, robot.cam_pose,
                robot.workspace_limits, robot.heightmap_resolution)
            valid_depth_heightmap = depth_heightmap.copy()
            valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0
            depth_heightmap = valid_depth_heightmap
            # Apply 2x scale to input heightmaps
            color_heightmap_2x = ndimage.zoom(color_heightmap,
                                              zoom=[2, 2, 1],
                                              order=0)
            depth_heightmap_2x = ndimage.zoom(depth_heightmap,
                                              zoom=[2, 2],
                                              order=0)
            assert (
                color_heightmap_2x.shape[0:2] == depth_heightmap_2x.shape[0:2])

            # Add extra padding (to handle rotations inside network)
            diag_length = float(color_heightmap_2x.shape[0]) * np.sqrt(2)
            diag_length = np.ceil(diag_length / 32) * 32
            padding_width = int(
                (diag_length - color_heightmap_2x.shape[0]) / 2)
            color_heightmap_2x_r = np.pad(color_heightmap_2x[:, :, 0],
                                          padding_width,
                                          'constant',
                                          constant_values=0)
            color_heightmap_2x_r.shape = (color_heightmap_2x_r.shape[0],
                                          color_heightmap_2x_r.shape[1], 1)
            color_heightmap_2x_g = np.pad(color_heightmap_2x[:, :, 1],
                                          padding_width,
                                          'constant',
                                          constant_values=0)
            color_heightmap_2x_g.shape = (color_heightmap_2x_g.shape[0],
                                          color_heightmap_2x_g.shape[1], 1)
            color_heightmap_2x_b = np.pad(color_heightmap_2x[:, :, 2],
                                          padding_width,
                                          'constant',
                                          constant_values=0)
            color_heightmap_2x_b.shape = (color_heightmap_2x_b.shape[0],
                                          color_heightmap_2x_b.shape[1], 1)
            color_heightmap_2x = np.concatenate(
                (color_heightmap_2x_r, color_heightmap_2x_g,
                 color_heightmap_2x_b),
                axis=2)
            depth_heightmap_2x = np.pad(depth_heightmap_2x,
                                        padding_width,
                                        'constant',
                                        constant_values=0)

            # Pre-process color image (scale and normalize)
            image_mean = [0.485, 0.456, 0.406]
            image_std = [0.229, 0.224, 0.225]
            input_color_image = color_heightmap_2x.astype(float) / 255
            for c in range(3):
                input_color_image[:, :, c] = (input_color_image[:, :, c] -
                                              image_mean[c]) / image_std[c]

            # Pre-process depth image (normalize)
            image_mean = [0.01, 0.01, 0.01]
            image_std = [0.03, 0.03, 0.03]
            depth_heightmap_2x.shape = (depth_heightmap_2x.shape[0],
                                        depth_heightmap_2x.shape[1], 1)
            input_depth_image = np.concatenate(
                (depth_heightmap_2x, depth_heightmap_2x, depth_heightmap_2x),
                axis=2)
            for c in range(3):
                input_depth_image[:, :, c] = (input_depth_image[:, :, c] -
                                              image_mean[c]) / image_std[c]

            # Construct minibatch of size 1 (b,c,h,w)
            input_color_image.shape = (input_color_image.shape[0],
                                       input_color_image.shape[1],
                                       input_color_image.shape[2], 1)
            input_depth_image.shape = (input_depth_image.shape[0],
                                       input_depth_image.shape[1],
                                       input_depth_image.shape[2], 1)
            input_color_data = torch.from_numpy(
                input_color_image.astype(np.float32)).permute(3, 2, 0, 1)
            input_depth_data = torch.from_numpy(
                input_depth_image.astype(np.float32)).permute(3, 2, 0, 1)

            # Pass input data through model
            output_prob, state_feat = self.model.forward(
                input_color_data,
                input_depth_data)  # is_volatile, specific_rotation)

            # Return Q values (and remove extra padding)
            for rotate_idx in range(len(output_prob)):
                if rotate_idx == 0:
                    grasp_predictions = output_prob[rotate_idx][0].cpu(
                    ).data.numpy()[:, 0,
                                   int(padding_width /
                                       2):int(color_heightmap_2x.shape[0] / 2 -
                                              padding_width / 2),
                                   int(padding_width /
                                       2):int(color_heightmap_2x.shape[0] / 2 -
                                              padding_width / 2)]
                else:
                    grasp_predictions = np.concatenate(
                        (grasp_predictions,
                         output_prob[rotate_idx][0].cpu().data.numpy()
                         [:, 0,
                          int(padding_width /
                              2):int(color_heightmap_2x.shape[0] / 2 -
                                     padding_width / 2),
                          int(padding_width /
                              2):int(color_heightmap_2x.shape[0] / 2 -
                                     padding_width / 2)]),
                        axis=0)

            # Get pixel location and rotation with highest affordance prediction from heuristic algorithms (rotation, y, x)
            best_pix_ind = np.unravel_index(np.argmax(grasp_predictions),
                                            grasp_predictions.shape)
            predicted_value = np.max(grasp_predictions)

            # Compute 3D position of pixel
            print('Action: %s at (%d, %d, %d)' %
                  ('Grasp', best_pix_ind[0], best_pix_ind[1], best_pix_ind[2]))
            best_rotation_angle = np.deg2rad(
                best_pix_ind[0] * (360.0 / robot.model.num_rotations))
            best_pix_x = best_pix_ind[2]
            best_pix_y = best_pix_ind[1]
            primitive_position = [
                best_pix_x * self.heightmap_resolution +
                self.workspace_limits[0][0],
                best_pix_y * self.heightmap_resolution +
                self.workspace_limits[1][0],
                valid_depth_heightmap[best_pix_y][best_pix_x] +
                self.workspace_limits[2][0]
            ]

            return primitive_position  # grasp_predictions, state_feat