def __init__(self, **params): self.model = Config.grasp_model self.watch_for_model_modification = True self.model_last_modified = Loader.get_model_path( self.model).stat().st_mtime self.monte_carlo = 40 if 'mc' in self.model[1] else None self.with_types = 'types' in self.model[1] self.output_layer = 'prob' if not self.with_types else ['prob', 'type'] self.inference = InferencePlanarPose( model=Loader.get_model(self.model, output_layer=self.output_layer), box=Config.box, lower_random_pose=Config.lower_random_pose, upper_random_pose=Config.upper_random_pose, monte_carlo=self.monte_carlo, with_types=self.with_types, ) self.inference.keep_indixes = None self.indexer = GraspIndexer(gripper_classes=Config.gripper_classes) self.converter = Converter(grasp_z_offset=Config.grasp_z_offset, box=Config.box) # # self.indexer = GraspFinalDIndexer(gripper_classes=Config.gripper_classes, final_d_classes=[0.0, 0.035]) # self.indexer = LateralIndexer( # angles=[(0, 0), (0.3, 0)], # gripper_classes=[0.05, 0.07, 0.084], # ) # self.converter = Converter(grasp_z_offset=Config.grasp_z_offset, box=Config.box) self.reinfer_next_time = True # Always true in contrast to AgentPredict
def __init__(self, prediction_model): self.grasp_model = Config.grasp_model self.shift_model = Config.shift_model self.with_types = 'types' in self.grasp_model[1] self.output_layer = 'prob' if not self.with_types else ['prob', 'type'] self.grasp_inference = InferencePlanarPose( Loader.get_model(self.grasp_model, output_layer=self.output_layer), box=Config.box, lower_random_pose=Config.lower_random_pose, upper_random_pose=Config.upper_random_pose, with_types=self.with_types, input_uncertainty=True, ) self.grasp_inference.keep_indixes = [0, 1] self.grasp_indexer = GraspIndexer( gripper_classes=Config.gripper_classes) self.shift_inference = InferencePlanarPose( Loader.get_model(self.shift_model, output_layer='prob'), box=Config.box, lower_random_pose=Config.lower_random_pose, upper_random_pose=Config.upper_random_pose, with_types=False, ) self.shift_inference.a_space = np.linspace( -3.0, 3.0, 26) # [rad] # Don't use a=0.0 self.shift_inference.size_original_cropped = (240, 240) self.shift_indexer = ShiftIndexer(shift_distance=Config.shift_distance) self.grasp_shift_indexer = GraspShiftIndexer( gripper_classes=Config.gripper_classes, shift_distance=Config.shift_distance, ) self.converter = Converter(grasp_z_offset=Config.grasp_z_offset, shift_z_offset=0.007, box=Config.box) # [m] self.prediction_model = prediction_model self.monte_carlo = 20 self.actions_since_last_inference = 0 self.actions: List[Action] = [] self.output_path = Path.home() / 'Desktop' self.reinfer_next_time = True # First inference is slower self.prediction_model.predict([ np.zeros((1, 64, 64, 1)), np.zeros((1, 1, 1, 1)), np.zeros((1, 1, 1)), np.zeros((1, 1, 1, 8)) ])
def __init__(self, episodes, seed=None): self.episodes = episodes self.episodes_place_success_index = list(map(lambda e: e[0], filter(lambda e: e[1]['actions'][1]['reward'] > 0, enumerate(episodes)))) self.episodes_different_objects_ids = { 'wooden': ('2019-12-16-17-01-18-409', '2020-01-22-09-34-02-952'), 'baby': ('2020-01-22-09-34-02-953', '2020-01-29-23-17-15-032'), } # Get indexes of episodes between the ids (from above) which have a positive place action self.episodes_different_objects_index = { k: list(map(lambda e: e[0], filter(lambda e: v[0] <= e[1]['id'] <= v[1] and e[1]['actions'][1]['reward'] > 0, enumerate(episodes)))) for k, v in self.episodes_different_objects_ids.items() } self.size_input = (752, 480) self.size_memory_scale = 4 self.size_cropped = (200, 200) self.size_result = (32, 32) self.size_cropped_area = (self.size_cropped[0] // self.size_memory_scale, self.size_cropped[1] // self.size_memory_scale) self.use_hindsight = True self.use_further_hindsight = False self.use_negative_foresight = True self.use_own_goal = True self.use_different_episodes_as_goals = True self.jittered_hindsight_images = 1 self.jittered_hindsight_x_images = 2 # Only if place reward > 0 self.jittered_goal_images = 1 self.different_episodes_images = 1 self.different_episodes_images_success = 4 # Only if place reward > 0 self.different_object_images = 4 # Only if place reward > 0 self.different_jittered_object_images = 0 # Only if place reward > 0 self.box_distance = 0.281 # [m] self.indexer = GraspIndexer([0.05, 0.07, 0.086]) # [m] # self.indexer = GraspIndexer([0.025, 0.05, 0.07, 0.086]) # [m] self.cameras = ('ed',) # self.cameras = ('ed', 'rd', 'rc') self.seed = seed self.random_gen = np.random.RandomState(seed)
def test_indexer(self): indexer = GraspIndexer(gripper_classes=[0.04, 0.06, 0.08]) def define_action(d=0.0, index=0): a = Action() a.pose.d = d a.index = index return a self.assertEqual(indexer.from_action(define_action(d=0.04)), 0) self.assertEqual(indexer.from_action(define_action(d=0.059)), 1) self.assertEqual(indexer.from_action(define_action(d=0.06)), 1) self.assertEqual(indexer.from_action(define_action(d=0.07)), 2) self.assertEqual(indexer.from_action(define_action(d=0.08)), 2) self.assertEqual(indexer.from_action(define_action(d=0.1)), 2) a = define_action(index=2) indexer.to_action(a) self.assertEqual(indexer.from_action(a), 2)
def __init__(self, episodes, seed=None): self.episodes = episodes self.episodes_place_success_index = list( map( lambda e: e[0], filter( lambda e: len(e[1]['actions']) > 1 and e[1]['actions'][1][ 'reward'] > 0, enumerate(episodes)))) # self.episodes_different_objects_index = list(map(lambda e: e[0], filter(lambda e: '2019-12-16-17-01-18-409' <= e[1]['id'] <= '2020-01-16-17-09-47-989' and e[1]['actions'][1]['reward'] > 0, enumerate(episodes)))) self.size_input = (752, 480) self.size_memory_scale = 4 self.size_cropped = (200, 200) self.size_result = (32, 32) self.size_cropped_area = (self.size_cropped[0] // self.size_memory_scale, self.size_cropped[1] // self.size_memory_scale) self.use_hindsight = True self.use_further_hindsight = False self.use_negative_foresight = True self.use_own_goal = True self.use_different_episodes_as_goals = True self.jittered_hindsight_images = 3 self.jittered_hindsight_x_images = 3 self.jittered_goal_images = 2 self.different_episodes_images = 2 self.different_object_images = 5 # self.different_jittered_object_images = 2 self.box_distance = 0.281 # [m] # self.indexer = GraspIndexer([0.05, 0.07, 0.086]) # [m] self.indexer = GraspIndexer([0.025, 0.05, 0.07, 0.086]) # [m] self.cameras = ('ed', 'rd', 'rc') self.random_gen = np.random.RandomState(seed)
def __init__(self): # Parameters self.grasp_model = rospy.get_param('graspro/grasp_model', 'graspro-v1') self.gripper_classes = rospy.get_param('graspro/gripper_classes') self.z_offset = rospy.get_param('graspro/z_offset', 0.0) self.ensenso_depth = rospy.get_param('graspro/camera/ensenso_depth') self.realsense_depth = rospy.get_param( 'graspro/camera/realsense_depth') self.realsense_color = rospy.get_param( 'graspro/camera/realsense_color') self.lower_random_pose = rospy.get_param('graspro/lower_random_pose', [-0.1, -0.1, 0.0]) self.upper_random_pose = rospy.get_param('graspro/upper_random_pose', [0.1, 0.1, 0.0]) self.box_center = rospy.get_param('graspro/bin_center', [0, 0, 0]) self.box_size = rospy.get_param('graspro/bin_size', False) self.box = {'center': self.box_center, 'size': self.box_size} self.publish_heatmap = rospy.get_param('graspro/publish_heatmap', False) # Inference self.inference = PlanarInference( model=Loader.get_model(self.grasp_model, output_layer='prob'), box=self.box, lower_random_pose=self.lower_random_pose, upper_random_pose=self.upper_random_pose, ) self.indexer = GraspIndexer(gripper_classes=self.gripper_classes) self.converter = Converter(grasp_z_offset=self.z_offset, box=self.box) # [m] if self.publish_heatmap: self.heatmapper = Heatmap(self.inference, self.inference.model, box=self.box) self.heatmap_publisher = rospy.Publisher('graspro/heatmap') self.bridge = CvBridge() self.image_publisher = rospy.Publisher('graspro/pose_on_image', Image, queue_size=10) s1 = rospy.Service('graspro/infer_grasp', InferGrasp, self.infer_grasp) s2 = rospy.Service('graspro/estimate_reward_for_grasp', EstimateRewardForGrasp, self.estimate_reward_for_grasp) rospy.spin()
def __init__(self): self.grasp_inference = InferencePlanarPose( model=Loader.get_model(Config.grasp_model, output_layer='prob'), box=Config.box, lower_random_pose=Config.lower_random_pose, upper_random_pose=Config.upper_random_pose, ) self.grasp_indexer = GraspIndexer(gripper_classes=Config.gripper_classes) self.converter = Converter(grasp_z_offset=Config.grasp_z_offset, shift_z_offset=0.007, box=Config.box) # [m] if Config.shift_objects: self.shift_inference = InferencePlanarPose( model=Loader.get_model(Config.shift_model, output_layer='prob'), box=Config.box, lower_random_pose=Config.lower_random_pose, upper_random_pose=Config.upper_random_pose, ) self.shift_inference.a_space = np.linspace(-3.0, 3.0, 26) # [rad] # Don't use a=0.0 self.shift_inference.size_original_cropped = (240, 240) self.shift_indexer = ShiftIndexer(shift_distance=Config.shift_distance) self.reinfer_next_time = True # Always true in contrast to AgentPredict
def __init__(self, databases: Union[str, List[str]], validation_databases: Union[str, List[str]] = None, indexer=None): validation_databases = validation_databases or [] self.databases = [databases] if isinstance(databases, str) else databases self.validation_databases = [validation_databases] if isinstance( validation_databases, str) else validation_databases self.output_path = Loader.get_database_path(self.databases[0]) self.image_output_path = self.output_path / 'input' self.model_path = self.output_path / 'models' self.result_path = self.output_path / 'results' self.indexer = indexer if indexer else GraspIndexer( gripper_classes=Config.gripper_classes) self.box = Config.box self.percent_validation_set = 0.2
# ) inference = InferencePlanarPose( Loader.get_model('cylinder-cube-1', 'model-6-arch-more-layer', output_layer='prob'), box=Config.box, ) # inference = InferencePlanarPose( # Loader.get_model('shifting', 'model-3'), # box=Config.box, # ) _, image = Loader.get_action('cylinder-cube-mc-1', '2019-07-02-13-27-22-246', 'ed-v') indexer = GraspIndexer(gripper_classes=Config.gripper_classes) converter = Converter(grasp_z_offset=Config.grasp_z_offset, box=Config.box) times = [] for i in range(1): start = time.time() action = inference.infer([image], SelectionMethod.Top5, verbose=1) indexer.to_action(action) end = time.time() times.append(end - start) converter.calculate_pose(action, [image]) print(action) print(
class Agent: def __init__(self, **params): self.model = Config.grasp_model self.watch_for_model_modification = True self.model_last_modified = Loader.get_model_path( self.model).stat().st_mtime self.monte_carlo = 40 if 'mc' in self.model[1] else None self.with_types = 'types' in self.model[1] self.output_layer = 'prob' if not self.with_types else ['prob', 'type'] self.inference = InferencePlanarPose( model=Loader.get_model(self.model, output_layer=self.output_layer), box=Config.box, lower_random_pose=Config.lower_random_pose, upper_random_pose=Config.upper_random_pose, monte_carlo=self.monte_carlo, with_types=self.with_types, ) self.inference.keep_indixes = None self.indexer = GraspIndexer(gripper_classes=Config.gripper_classes) self.converter = Converter(grasp_z_offset=Config.grasp_z_offset, box=Config.box) # # self.indexer = GraspFinalDIndexer(gripper_classes=Config.gripper_classes, final_d_classes=[0.0, 0.035]) # self.indexer = LateralIndexer( # angles=[(0, 0), (0.3, 0)], # gripper_classes=[0.05, 0.07, 0.084], # ) # self.converter = Converter(grasp_z_offset=Config.grasp_z_offset, box=Config.box) self.reinfer_next_time = True # Always true in contrast to AgentPredict def check_for_model_reload(self): current_model_st_mtime = Loader.get_model_path( self.model).stat().st_mtime if self.watch_for_model_modification and current_model_st_mtime > self.model_last_modified + 0.5: # [s] logger.warning(f'Reload model {self.model}.') try: self.inference.model = Loader.get_model( self.model, output_layer=self.output_layer) self.model_last_modified = Loader.get_model_path( self.model).stat().st_mtime except OSError: logger.info('Could not load model, probabily file locked.') def infer(self, images: List[OrthographicImage], method: SelectionMethod, **params) -> List[Action]: if self.monte_carlo: # Adapt monte carlo progress parameter s epoch_in_collection = Loader.get_episode_count(Config.collection) s_not_bounded = (epoch_in_collection - 3500) * 1 / (4500 - 3500) self.inference.current_s = max(min(s_not_bounded, 1.0), 0.0) self.check_for_model_reload() if len(images) == 3: images[2].mat = images[2].mat[:, :, ::-1] # BGR to RGB action = self.inference.infer(images, method) self.indexer.to_action(action) print(action, method) estimated_reward_lower_than_threshold = action.estimated_reward < Config.bin_empty_at_max_probability bin_empty = estimated_reward_lower_than_threshold and Epoch.selection_method_should_be_high( method) if bin_empty: return [Action('bin_empty', safe=1)] self.converter.calculate_pose(action, images) return [action] def reward_for_action(self, images: List[OrthographicImage], action: Action) -> float: estimated_rewards = self.inference.infer_at_pose(images, action.pose) if isinstance(estimated_rewards, tuple): estimated_rewards, _ = estimated_rewards index = self.indexer.from_action(action) return estimated_rewards[0][0][index]
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = '1' #%% data = Dataset([ 'small-cubes-2', 'cylinder-cube-mc-1', 'cylinder-cube-1', # 'cylinder-1', 'cylinder-2', 'cube-1', 'cube-3', ], indexer=GraspIndexer(gripper_classes=Config.gripper_classes) ) train_set, validation_set = data.load_data( force_images=False, suffixes=[ ['ed-v'], ], ) #%% def define_model(number_actions: float, number_types: float): image = tk.Input(shape=(None, None, 1), name='image') conv_block = conv_block_gen(l2_reg=0.02, dropout_rate=0.4)
class Agent: def __init__(self, prediction_model): self.grasp_model = Config.grasp_model self.shift_model = Config.shift_model self.with_types = 'types' in self.grasp_model[1] self.output_layer = 'prob' if not self.with_types else ['prob', 'type'] self.grasp_inference = InferencePlanarPose( Loader.get_model(self.grasp_model, output_layer=self.output_layer), box=Config.box, lower_random_pose=Config.lower_random_pose, upper_random_pose=Config.upper_random_pose, with_types=self.with_types, input_uncertainty=True, ) self.grasp_inference.keep_indixes = [0, 1] self.grasp_indexer = GraspIndexer( gripper_classes=Config.gripper_classes) self.shift_inference = InferencePlanarPose( Loader.get_model(self.shift_model, output_layer='prob'), box=Config.box, lower_random_pose=Config.lower_random_pose, upper_random_pose=Config.upper_random_pose, with_types=False, ) self.shift_inference.a_space = np.linspace( -3.0, 3.0, 26) # [rad] # Don't use a=0.0 self.shift_inference.size_original_cropped = (240, 240) self.shift_indexer = ShiftIndexer(shift_distance=Config.shift_distance) self.grasp_shift_indexer = GraspShiftIndexer( gripper_classes=Config.gripper_classes, shift_distance=Config.shift_distance, ) self.converter = Converter(grasp_z_offset=Config.grasp_z_offset, shift_z_offset=0.007, box=Config.box) # [m] self.prediction_model = prediction_model self.monte_carlo = 20 self.actions_since_last_inference = 0 self.actions: List[Action] = [] self.output_path = Path.home() / 'Desktop' self.reinfer_next_time = True # First inference is slower self.prediction_model.predict([ np.zeros((1, 64, 64, 1)), np.zeros((1, 1, 1, 1)), np.zeros((1, 1, 1)), np.zeros((1, 1, 1, 8)) ]) def predict_images_after_action( self, images: List[OrthographicImage], action: Action, reward: float, action_type: int, uncertainty_images=None, ) -> List[OrthographicImage]: image = images[0] uncertainty_image = uncertainty_images[0] start = time.time() draw_around_box(image, box=Config.box) area = get_area_of_interest(image, action.pose, size_cropped=(256, 256), size_result=(64, 64)) area_input = np.expand_dims(area.mat, axis=2).astype( np.float32) / np.iinfo(np.uint16).max * 2 - 1 reward = np.expand_dims(np.expand_dims(np.expand_dims(reward, axis=1), axis=1), axis=1).astype(np.float32) action_type = np.expand_dims(np.expand_dims(action_type, axis=1), axis=1) use_monte_carlo = self.monte_carlo and self.monte_carlo > 1 if not use_monte_carlo: area_result = self.prediction_model.predict([[area_input], [reward], [action_type], np.zeros( (1, 1, 1, 8))])[0] area_result = np.array(np.iinfo(np.uint16).max * (area_result + 1) / 2, dtype=np.uint16) else: latent = np.random.normal(scale=0.05, size=(self.monte_carlo, 1, 1, 8)) if self.monte_carlo > 3: latent[0, :, :, :] = 0.0 predictions = self.prediction_model.predict([ [area_input for _ in range(self.monte_carlo)], [reward for _ in range(self.monte_carlo)], [action_type for _ in range(self.monte_carlo)], latent, ]) predictions = (predictions + 1) / 2 predictions = np.array(predictions, dtype=np.float32) area_result = predictions[0] area_result = np.array(np.iinfo(np.uint16).max * area_result, dtype=np.uint16) predictions[predictions < 0.1] = np.nan area_uncertainty = np.nanvar(predictions, axis=0) area_uncertainty *= 7e3 area_uncertainty[area_uncertainty > 1] = 1 area_uncertainty = np.array(np.iinfo(np.uint16).max * area_uncertainty, dtype=np.uint16) uncertainty_image = patch_image_at( uncertainty_image, area_uncertainty, action.pose, size_cropped=(256, 256), operation='add', ) result = patch_image_at(image, area_result, action.pose, size_cropped=(256, 256)) logger.info(f'Predicted image [s]: {time.time() - start:0.3f}') if use_monte_carlo: return [result], [uncertainty_image] return [result] def plan_actions( self, images: List[OrthographicImage], method: SelectionMethod, depth=1, leaves=1, verbose=False, ) -> List[Action]: uncertainty_images = [ OrthographicImage(np.zeros(i.mat.shape, dtype=np.uint16), i.pixel_size, i.min_depth, i.max_depth, i.camera, i.pose) for i in images ] tree = PlanningTree(images, uncertainty_images) for node, i in tree.fill_nodes(leaves=leaves, depth=depth): # Visited actions: node.actions for image in node.images: draw_around_box(image, box=Config.box) grasp = self.grasp_inference.infer( node.images, method, uncertainty_images=node.uncertainty_images) self.grasp_indexer.to_action(grasp) # Shift actions if Config.shift_objects and grasp.estimated_reward < Config.grasp_shift_threshold: shift = self.shift_inference.infer(node.images, method) self.shift_indexer.to_action(shift) bin_empty = shift.estimated_reward < Config.shift_empty_threshold if bin_empty: action = Action('bin_empty', safe=1) else: self.converter.calculate_pose(shift, node.images) action = shift # Grasp actions else: estimated_reward_lower_than_threshold = grasp.estimated_reward < Config.bin_empty_at_max_probability bin_empty = estimated_reward_lower_than_threshold and Epoch.selection_method_should_be_high( method) new_image = False if bin_empty: action = Action('bin_empty', safe=1) elif grasp.estimated_reward_std > 0.9: # default=0.25 action = Action('new_image', safe=1) else: self.converter.calculate_pose(grasp, node.images) action = grasp logger.info(f'{i}: {action}') if verbose: image_copy = clone(images[0]) uncertainty_image_copy = clone(uncertainty_images[0]) draw_pose(image_copy, action.pose, convert_to_rgb=True) draw_pose(uncertainty_image_copy, action.pose, convert_to_rgb=True) cv2.imwrite(str(self.output_path / f'result-{i}.png'), image_copy.mat) cv2.imwrite(str(self.output_path / f'uncertainty-{i}.png'), uncertainty_image_copy.mat) if action.type == 'bin_empty' or action.type == 'new_image': break # Predict next image reward = action.estimated_reward > Config.bin_empty_at_max_probability if action.type == 'grasp' else action.estimated_reward action_type = self.grasp_shift_indexer.from_action(action) images = self.predict_images_after_action( node.images, action, reward=reward, action_type=action_type, uncertainty_images=node.uncertainty_images, ) if isinstance(images, tuple): images, uncertainty_images = images else: uncertainty_images = None node.add_action(action, images, uncertainty_images) if verbose: cv2.imwrite(str(self.output_path / f'result-{i+1}.png'), node.images[0].mat) cv2.imwrite(str(self.output_path / f'uncertainty-{i+1}.png'), node.uncertainty_images[0].mat) actions, max_reward, mean_reward = tree.get_actions_maximize_reward( max_depth=depth) print( f'Max reward: {max_reward:0.3f}, Mean reward: {mean_reward:0.3f}, Length: {len(actions)}' ) # actions, max_steps, mean_steps = tree.get_actions_minimize_steps() return actions def predict_actions( self, images: List[OrthographicImage], method: SelectionMethod, N=1, verbose=True, ) -> List[Action]: actions: List[Action] = [] uncertainty_images = [ OrthographicImage(np.zeros(i.mat.shape, dtype=np.uint16), i.pixel_size, i.min_depth, i.max_depth, i.camera, i.pose) for i in images ] for i in range(N): for image in images: draw_around_box(image, box=Config.box) grasp = self.grasp_inference.infer( images, method, uncertainty_images=uncertainty_images) self.grasp_indexer.to_action(grasp) # Shift actions if Config.shift_objects and grasp.estimated_reward < Config.grasp_shift_threshold: shift = self.shift_inference.infer(images, method) self.shift_indexer.to_action(shift) bin_empty = shift.estimated_reward < Config.shift_empty_threshold if bin_empty: actions.append(Action('bin_empty', safe=1)) else: self.converter.calculate_pose(shift, images) actions.append(shift) # Grasp actions else: estimated_reward_lower_than_threshold = grasp.estimated_reward < Config.bin_empty_at_max_probability bin_empty = estimated_reward_lower_than_threshold and Epoch.selection_method_should_be_high( method) new_image = False if bin_empty: actions.append(Action('bin_empty', safe=1)) elif grasp.estimated_reward_std > 0.9: # default=0.25 actions.append(Action('new_image', safe=1)) else: self.converter.calculate_pose(grasp, images) actions.append(grasp) actions[-1].step = i action = actions[-1] logger.info(f'{i}: {action}') if verbose: image_copy = clone(images[0]) uncertainty_image_copy = clone(uncertainty_images[0]) draw_pose(image_copy, action.pose, convert_to_rgb=True) draw_pose(uncertainty_image_copy, action.pose, convert_to_rgb=True) cv2.imwrite(str(self.output_path / f'result-{i}.png'), image_copy.mat) cv2.imwrite(str(self.output_path / f'uncertainty-{i}.png'), uncertainty_image_copy.mat) if action.type == 'bin_empty' or action.type == 'new_image': break # Predict next image reward = action.estimated_reward > Config.bin_empty_at_max_probability if action.type == 'grasp' else action.estimated_reward action_type = self.grasp_shift_indexer.from_action(action) images = self.predict_images_after_action( images, action, reward=reward, action_type=action_type, uncertainty_images=uncertainty_images, ) if isinstance(images, tuple): images, uncertainty_images = images else: uncertainty_images = None if verbose: cv2.imwrite(str(self.output_path / f'result-{i+1}.png'), images[0].mat) cv2.imwrite(str(self.output_path / f'uncertainty-{i+1}.png'), uncertainty_images[0].mat) return actions def infer(self, images: List[OrthographicImage], method: SelectionMethod, N=5, reinfer=False): if self.actions_since_last_inference == 0 or self.actions_since_last_inference >= N or self.reinfer_next_time or reinfer: logger.warning(f'Calculate {N} predictions.') # self.actions = self.predict_actions(images, method, N=(N+1)) self.actions = self.plan_actions(images, method, depth=N, leaves=1) self.actions_since_last_inference = 0 self.reinfer_next_time = False else: logger.warning( f'Saved action, last inference {self.actions_since_last_inference} actions ago.' ) if self.actions_since_last_inference == len(self.actions) - 2: self.reinfer_next_time = True self.actions_since_last_inference += 1 return self.actions[self.actions_since_last_inference - 1]
class PlacingDataset: def __init__(self, episodes, seed=None): self.episodes = episodes self.episodes_place_success_index = list(map(lambda e: e[0], filter(lambda e: e[1]['actions'][1]['reward'] > 0, enumerate(episodes)))) self.episodes_different_objects_ids = { 'wooden': ('2019-12-16-17-01-18-409', '2020-01-22-09-34-02-952'), 'baby': ('2020-01-22-09-34-02-953', '2020-01-29-23-17-15-032'), } # Get indexes of episodes between the ids (from above) which have a positive place action self.episodes_different_objects_index = { k: list(map(lambda e: e[0], filter(lambda e: v[0] <= e[1]['id'] <= v[1] and e[1]['actions'][1]['reward'] > 0, enumerate(episodes)))) for k, v in self.episodes_different_objects_ids.items() } self.size_input = (752, 480) self.size_memory_scale = 4 self.size_cropped = (200, 200) self.size_result = (32, 32) self.size_cropped_area = (self.size_cropped[0] // self.size_memory_scale, self.size_cropped[1] // self.size_memory_scale) self.use_hindsight = True self.use_further_hindsight = False self.use_negative_foresight = True self.use_own_goal = True self.use_different_episodes_as_goals = True self.jittered_hindsight_images = 1 self.jittered_hindsight_x_images = 2 # Only if place reward > 0 self.jittered_goal_images = 1 self.different_episodes_images = 1 self.different_episodes_images_success = 4 # Only if place reward > 0 self.different_object_images = 4 # Only if place reward > 0 self.different_jittered_object_images = 0 # Only if place reward > 0 self.box_distance = 0.281 # [m] self.indexer = GraspIndexer([0.05, 0.07, 0.086]) # [m] # self.indexer = GraspIndexer([0.025, 0.05, 0.07, 0.086]) # [m] self.cameras = ('ed',) # self.cameras = ('ed', 'rd', 'rc') self.seed = seed self.random_gen = np.random.RandomState(seed) @lru_cache(maxsize=None) def load_image(self, collection, episode_id, action_id, suffix): image = Loader.get_image(collection, episode_id, action_id, suffix, as_float=True) draw_around_box(image, box=Config.box) image.mat = cv2.resize(image.mat, (self.size_input[0] // self.size_memory_scale, self.size_input[1] // self.size_memory_scale)) image.pixel_size /= self.size_memory_scale return image def area_of_interest(self, image, pose): area = get_area_of_interest_new( image, RobotPose(all_data=pose), size_cropped=self.size_cropped_area, size_result=self.size_result, border_color=image.value_from_depth(self.box_distance) / (255 * 255), ) if len(area.mat.shape) == 2: return np.expand_dims(area.mat, 2) return area.mat def jitter_pose(self, pose, scale_x=0.05, scale_y=0.05, scale_a=1.5, around=True): new_pose = copy.deepcopy(pose) if around: low = [np.minimum(0.001, scale_x), np.minimum(0.001, scale_y), np.minimum(0.06, scale_a)] mode = [np.minimum(0.006, scale_x), np.minimum(0.006, scale_y), np.minimum(0.32, scale_a)] high = [scale_x + 1e-6, scale_y + 1e-6, scale_a + 1e-6] dx, dy, da = self.random_gen.choice([-1, 1], size=3) * self.random_gen.triangular(low, mode, high, size=3) else: low = [-scale_x - 1e-6, -scale_y - 1e-6, -scale_a - 1e-6] mode = [0.0, 0.0, 0.0] high = [scale_x + 1e-6, scale_y + 1e-6, scale_a + 1e-6] dx, dy, da = self.random_gen.triangular(low, mode, high, size=3) new_pose['x'] += np.cos(pose['a']) * dx - np.sin(pose['a']) * dy new_pose['y'] += np.sin(pose['a']) * dx + np.cos(pose['a']) * dy new_pose['a'] += da return new_pose def generator(self, index): e = self.episodes[index] result = [] collection = e['collection'] episode_id = e['id'] grasp = e['actions'][0] grasp_before = self.load_image(collection, episode_id, 0, 'ed-v') grasp_before_area = self.area_of_interest(grasp_before, grasp['pose']) grasp_index = self.indexer.from_pose(grasp['pose']) # Only single grasp if len(e['actions']) == 1: pass place = e['actions'][1] place_before = self.load_image(collection, episode_id, 1, 'ed-v') place_after = self.load_image(collection, episode_id, 1, 'ed-after') # Generate goal has no action_id def generate_goal(g_collection, g_episode_id, g_suffix, g_pose, g_suffix_before='v', g_reward=0, g_index=None, g_place_weight=1.0, g_merge_weight=1.0, jitter=None): if g_collection == collection and g_episode_id == episode_id and g_suffix == 'v' and g_suffix_before == 'v': place_goal_before = place_before place_goal = place_before elif g_collection == collection and g_episode_id == episode_id and g_suffix == 'v' and g_suffix_before == 'after': place_goal_before = place_after place_goal = place_before elif g_collection == collection and g_episode_id == episode_id and g_suffix == 'after' and g_suffix_before == 'v': place_goal_before = place_before place_goal = place_after elif g_collection == collection and g_episode_id == episode_id and g_suffix == 'after' and g_suffix_before == 'after': place_goal_before = place_after place_goal = place_after else: goal_e = self.episodes[g_index] g_collection = g_collection if g_collection else goal_e['collection'] g_episode_id = g_episode_id if g_episode_id else goal_e['id'] g_pose = g_pose if g_pose else goal_e['actions'][1]['pose'] place_goal_before = self.load_image(g_collection, g_episode_id, 1, 'ed-' + g_suffix_before) place_goal = self.load_image(g_collection, g_episode_id, 1, 'ed-' + g_suffix) if isinstance(jitter, dict): g_pose = self.jitter_pose(g_pose, **jitter) place_before_area = self.area_of_interest(place_goal_before, g_pose) place_goal_area = self.area_of_interest(place_goal, g_pose) reward_grasp = grasp['reward'] reward_place = g_reward * grasp['reward'] * place['reward'] reward_merge = reward_place grasp_weight = g_reward place_weight = (1.0 + 3.0 * reward_place) * reward_grasp * g_place_weight merge_weight = (1.0 + 3.0 * reward_merge) * reward_grasp * g_merge_weight return ( grasp_before_area, place_before_area, place_goal_area, (reward_grasp, grasp_index, grasp_weight), (reward_place, 0, place_weight), (reward_merge, 0, merge_weight), ) if self.use_hindsight: result.append(generate_goal(collection, episode_id, 'after', place['pose'], g_reward=1)) result += [ generate_goal(collection, episode_id, 'after', place['pose'], jitter={}) for _ in range(self.jittered_hindsight_images) ] if place['reward'] > 0: result += [ generate_goal(collection, episode_id, 'after', place['pose'], jitter={'scale_x': 0.02, 'scale_y': 0.01, 'scale_a': 0.2}) for _ in range(self.jittered_hindsight_x_images) ] if self.use_further_hindsight and 'bin_episode' in place: for i in range(index + 1, len(self.episodes)): place_later = self.episodes[i]['actions'][1] if place_later['bin_episode'] != place['bin_episode']: break if place_later['reward'] > 0: result.append(generate_goal(None, None, 'after', place['pose'], g_index=i, g_reward=1)) if self.use_negative_foresight: g_suffix, g_suffix_before = random.choice([('v', 'v'), ('after', 'after'), ('v', 'after')]) result.append(generate_goal(collection, episode_id, g_suffix, place['pose'], g_suffix_before=g_suffix_before, jitter={'around': False})) if self.use_own_goal and 'ed-goal' in place['images']: result.append(generate_goal(collection, episode_id, 'goal', place['pose'], g_place_weight=0.2, g_merge_weight=0.7, g_index=index)) result += [ generate_goal(collection, episode_id, 'goal', place['pose'], g_index=index, jitter={}) for _ in range(self.jittered_goal_images) ] if self.use_different_episodes_as_goals: result += [ generate_goal(None, None, 'after', None, g_index=goal_index, g_place_weight=0.0) for goal_index in self.random_gen.choice(self.episodes_place_success_index, size=self.different_episodes_images) ] if place['reward'] > 0: result += [ generate_goal(None, None, 'after', None, g_index=goal_index, g_place_weight=0.0) for goal_index in self.random_gen.choice(self.episodes_place_success_index, size=self.different_episodes_images_success) ] for k, v in self.episodes_different_objects_ids.items(): if v[0] <= e['id'] <= v[1]: result += [ generate_goal(None, None, 'after', None, g_index=goal_index, g_place_weight=0.0) for goal_index in self.random_gen.choice(self.episodes_different_objects_index[k], size=self.different_object_images) ] # result += [ # generate_goal(None, None, 'after', None, g_index=goal_index, jitter={}) # for goal_index in self.random_gen.choice(self.episodes_different_objects_index[k], size=self.different_jittered_object_images) # ] return [np.array(t, dtype=np.float32) for t in zip(*result)] def tf_generator(self, index): r = tf.py_function( self.generator, [index], (tf.float32,) * 6, ) r[0].set_shape((None, 32, 32, 1)) r[1].set_shape((None, 32, 32, 1)) r[2].set_shape((None, 32, 32, 1)) r[3].set_shape((None, 3)) r[4].set_shape((None, 3)) r[5].set_shape((None, 3)) return (r[0], r[1], r[2]), (r[3], r[4], r[5]) def get_data(self, shuffle=None): data = tf.data.Dataset.range(0, len(self.episodes)) if shuffle: shuffle_number = len(self.episodes) if shuffle == 'all' else int(shuffle) data = data.shuffle(shuffle_number, seed=self.seed) data = data.map(self.tf_generator, num_parallel_calls=tf.data.experimental.AUTOTUNE) # data = data.map(self.tf_generator) return data.interleave(lambda *x: tf.data.Dataset.from_tensor_slices(x), cycle_length=1)
class Agent: def __init__(self): self.grasp_inference = InferencePlanarPose( model=Loader.get_model(Config.grasp_model, output_layer='prob'), box=Config.box, lower_random_pose=Config.lower_random_pose, upper_random_pose=Config.upper_random_pose, ) self.grasp_indexer = GraspIndexer(gripper_classes=Config.gripper_classes) self.converter = Converter(grasp_z_offset=Config.grasp_z_offset, shift_z_offset=0.007, box=Config.box) # [m] if Config.shift_objects: self.shift_inference = InferencePlanarPose( model=Loader.get_model(Config.shift_model, output_layer='prob'), box=Config.box, lower_random_pose=Config.lower_random_pose, upper_random_pose=Config.upper_random_pose, ) self.shift_inference.a_space = np.linspace(-3.0, 3.0, 26) # [rad] # Don't use a=0.0 self.shift_inference.size_original_cropped = (240, 240) self.shift_indexer = ShiftIndexer(shift_distance=Config.shift_distance) self.reinfer_next_time = True # Always true in contrast to AgentPredict def infer(self, images: List[OrthographicImage], method: SelectionMethod) -> Action: if len(images) == 3: images[2].mat = images[2].mat[:, :, ::-1] # BGR to RGB grasp = self.grasp_inference.infer(images, method) self.grasp_indexer.to_action(grasp) estimated_reward_lower_than_threshold = grasp.estimated_reward < Config.bin_empty_at_max_probability bin_empty = estimated_reward_lower_than_threshold and Epoch.selection_method_should_be_high(method) if Config.shift_objects and grasp.estimated_reward < Config.grasp_shift_threshold: shift = self.shift_inference.infer(images, method) self.shift_indexer.to_action(shift) if shift.estimated_reward > Config.shift_empty_threshold: self.converter.calculate_pose(shift, images) return shift return Action('bin_empty', safe=1) if bin_empty: return Action('bin_empty', safe=1) self.converter.calculate_pose(grasp, images) return grasp def infer_shift(self, images: List[OrthographicImage], method: SelectionMethod) -> Action: shift = self.shift_inference.infer(images, method) self.shift_indexer.to_action(shift) return shift def infer_max_grasp_reward(self, images: List[OrthographicImage]) -> float: return self.grasp_inference.infer(images, SelectionMethod.Max).estimated_reward def infer_max_grasp_reward_around_action( self, images: List[OrthographicImage], action: Action, window=(0.13, 0.13) ) -> float: # [m] input_images = [self.grasp_inference.get_images(d) for d in images] estimated_reward = self.grasp_inference.model.predict(input_images) for index_raveled in range(estimated_reward.size): index = np.unravel_index(index_raveled, estimated_reward.shape) pose = self.grasp_inference.pose_from_index(index, estimated_reward.shape, images[0]) if not ( (action.pose.x - window[0] / 2 < pose.x < action.pose.x + window[0] / 2) and (action.pose.y - window[1] / 2 < pose.y < action.pose.y + window[1] / 2) ): estimated_reward[index] = 0.0 return np.max(estimated_reward)
class PlacingDataset: def __init__(self, episodes, seed=None): self.episodes = episodes self.episodes_place_success_index = list( map( lambda e: e[0], filter( lambda e: len(e[1]['actions']) > 1 and e[1]['actions'][1][ 'reward'] > 0, enumerate(episodes)))) # self.episodes_different_objects_index = list(map(lambda e: e[0], filter(lambda e: '2019-12-16-17-01-18-409' <= e[1]['id'] <= '2020-01-16-17-09-47-989' and e[1]['actions'][1]['reward'] > 0, enumerate(episodes)))) self.size_input = (752, 480) self.size_memory_scale = 4 self.size_cropped = (200, 200) self.size_result = (32, 32) self.size_cropped_area = (self.size_cropped[0] // self.size_memory_scale, self.size_cropped[1] // self.size_memory_scale) self.use_hindsight = True self.use_further_hindsight = False self.use_negative_foresight = True self.use_own_goal = True self.use_different_episodes_as_goals = True self.jittered_hindsight_images = 3 self.jittered_hindsight_x_images = 3 self.jittered_goal_images = 2 self.different_episodes_images = 2 self.different_object_images = 5 # self.different_jittered_object_images = 2 self.box_distance = 0.281 # [m] # self.indexer = GraspIndexer([0.05, 0.07, 0.086]) # [m] self.indexer = GraspIndexer([0.025, 0.05, 0.07, 0.086]) # [m] self.cameras = ('ed', 'rd', 'rc') self.random_gen = np.random.RandomState(seed) @lru_cache(maxsize=None) def load_image(self, collection, episode_id, action_id, suffix): image = Loader.get_image(collection, episode_id, action_id, suffix, as_float=True) draw_around_box(image, box=Config.box) image.mat = cv2.resize(image.mat, (self.size_input[0] // self.size_memory_scale, self.size_input[1] // self.size_memory_scale)) image.pixel_size /= self.size_memory_scale return image def area_of_interest(self, image, pose): area = get_area_of_interest_new( image, RobotPose(all_data=pose), size_cropped=self.size_cropped_area, size_result=self.size_result, border_color=image.value_from_depth(self.box_distance) / (255 * 255), ) if len(area.mat.shape) == 2: return np.expand_dims(area.mat, 2) return area.mat def jitter_pose(self, pose, scale_x=0.05, scale_y=0.05, scale_a=1.5): new_pose = copy.deepcopy(pose) low = [ np.minimum(0.002, scale_x), np.minimum(0.002, scale_y), np.minimum(0.05, scale_a) ] high = [scale_x, scale_y, scale_a] dx, dy, da = self.random_gen.choice( [-1, 1], size=3) * self.random_gen.uniform(low, high, size=3) new_pose['x'] += np.cos(pose['a']) * dx - np.sin(pose['a']) * dy new_pose['y'] += np.sin(pose['a']) * dx + np.cos(pose['a']) * dy new_pose['a'] += da return new_pose def generator(self, index): e = self.episodes[index] result = [] collection = e['collection'] episode_id = e['id'] grasp = e['actions'][0] grasp_before = tuple( self.load_image(collection, episode_id, 0, camera + '-v') for camera in self.cameras) grasp_before_area = tuple( self.area_of_interest(image, grasp['pose']) for image in grasp_before) grasp_index = self.indexer.from_pose(grasp['pose']) # Only grasp if len(e['actions']) == 1: zeros = (np.zeros(self.size_result + (1, )), np.zeros(self.size_result + (1, )), np.zeros(self.size_result + (3, ))) result = [ grasp_before_area + zeros + zeros + ( (grasp['reward'], grasp_index, 0.4), (0, 0, 0), (0, 0, 0), ) ] return [np.array(t, dtype=np.float32) for t in zip(*result)] place = e['actions'][1] place_before = tuple( self.load_image(collection, episode_id, 1, camera + '-v') for camera in self.cameras) place_after = tuple( self.load_image(collection, episode_id, 1, camera + '-after') for camera in self.cameras) # Generate goal has no action_id def generate_goal(g_collection, g_episode_id, g_suffix, g_pose, g_reward=0, g_index=None, g_merge_weight=1.0, jitter=None): if g_collection == collection and g_episode_id == episode_id and g_suffix == 'v': place_goal_before = place_before place_goal = place_before elif g_collection == collection and g_episode_id == episode_id and g_suffix == 'after': place_goal_before = place_before place_goal = place_after else: goal_e = self.episodes[g_index] g_collection = g_collection if g_collection else goal_e[ 'collection'] g_episode_id = g_episode_id if g_episode_id else goal_e['id'] g_pose = g_pose if g_pose else goal_e['actions'][1]['pose'] place_goal_before = tuple( self.load_image(g_collection, g_episode_id, 1, camera + '-v') for camera in self.cameras) place_goal = tuple( self.load_image(g_collection, g_episode_id, 1, camera + '-' + g_suffix) for camera in self.cameras) if isinstance(jitter, dict): g_pose = self.jitter_pose(g_pose, **jitter) place_before_area = tuple( self.area_of_interest(image, g_pose) for image in place_goal_before) place_goal_area = tuple( self.area_of_interest(image, g_pose) for image in place_goal) goal_reward = g_reward reward_grasp = grasp['reward'] reward_place = goal_reward * place['reward'] reward_merge = reward_place grasp_weight = g_reward place_weight = (1.0 + 5.0 * reward_place) * reward_grasp merge_weight = (1.0 + 5.0 * reward_merge) * g_merge_weight return grasp_before_area + place_before_area + place_goal_area + ( (reward_grasp, grasp_index, grasp_weight), (reward_place, 0, place_weight), (reward_merge, 0, merge_weight), ) if self.use_hindsight: result.append( generate_goal(collection, episode_id, 'after', place['pose'], g_reward=1)) result += [ generate_goal(collection, episode_id, 'after', place['pose'], jitter={}) for _ in range(self.jittered_hindsight_images) ] result += [ generate_goal(collection, episode_id, 'after', place['pose'], jitter={ 'scale_x': 0.025, 'scale_y': 0, 'scale_a': 0 }) for _ in range(self.jittered_hindsight_x_images) ] if self.use_further_hindsight and 'bin_episode' in place: for i in range(index + 1, len(self.episodes)): place_later = self.episodes[i]['actions'][1] if place_later['bin_episode'] != place['bin_episode']: break if place_later['reward'] > 0: result.append( generate_goal(None, None, 'after', place['pose'], g_index=i, g_reward=1)) if self.use_negative_foresight: result.append( generate_goal(collection, episode_id, 'v', place['pose'])) if self.use_own_goal and 'ed-goal' in place['images']: result.append( generate_goal(collection, episode_id, 'goal', place['pose'], g_index=index)) result += [ generate_goal(collection, episode_id, 'goal', place['pose'], g_index=index, g_merge_weight=0.5, jitter={}) for _ in range(self.jittered_goal_images) ] if self.use_different_episodes_as_goals: result += [ generate_goal(None, None, 'after', None, g_index=goal_index, g_merge_weight=0.3) for goal_index in self.random_gen.choice(self.episodes_place_success_index, size=self.different_episodes_images) ] # result += [ # generate_goal(None, None, 'after', None, g_index=goal_index, g_merge_weight=0.3) # for goal_index in self.random_gen.choice(self.episodes_different_objects_index, size=self.different_object_images) # ] # result += [ # generate_goal(None, None, 'after', None, g_index=goal_index, g_merge_weight=0.3, jitter={}) # for goal_index in self.random_gen.choice(self.episodes_different_objects_index, size=self.different_jittered_object_images) # ] return [np.array(t, dtype=np.float32) for t in zip(*result)] def tf_generator(self, index): r = tf.py_function( self.generator, [index], (tf.float32, ) * (3 * len(self.cameras) + 3), ) r[0].set_shape((None, 32, 32, 1)) r[1].set_shape((None, 32, 32, 1)) r[2].set_shape((None, 32, 32, 3)) r[3].set_shape((None, 32, 32, 1)) r[4].set_shape((None, 32, 32, 1)) r[5].set_shape((None, 32, 32, 3)) r[6].set_shape((None, 32, 32, 1)) r[7].set_shape((None, 32, 32, 1)) r[8].set_shape((None, 32, 32, 3)) r[9].set_shape((None, 3)) r[10].set_shape((None, 3)) r[11].set_shape((None, 3)) return (r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]), (r[9], r[10], r[11]) def get_data(self, shuffle=None): data = tf.data.Dataset.range(0, len(self.episodes)) if shuffle: shuffle_number = len( self.episodes) if shuffle == 'all' else int(shuffle) data = data.shuffle(shuffle_number) data = data.map(self.tf_generator, num_parallel_calls=tf.data.experimental.AUTOTUNE) return data.interleave( lambda *x: tf.data.Dataset.from_tensor_slices(x), cycle_length=1)