def main(): parser = argparse.ArgumentParser(description="yumi_push_qmap") parser.add_argument("--out", type=str, default="", help="outs directory that should be evaluated") parser.add_argument("--policy", type=str, default="", help="policy directory that should be evaluated") args = parser.parse_args() # Load and check directory. directory, path = parse_path(args) if not os.path.isdir(os.path.join(path, "checkpoints")): raise IOError("No model to load in {} !".format(path)) if not os.path.isfile(os.path.join(path, "config.txt")): raise IOError("No config found in {} !".format(path)) config = load_yaml(os.path.join(path, "config.txt")) config["visualize"] = False config["verbose"] = True config["debug"] = False config["scene"] = "cubes_fixed" config["rew_schedule"] = False config["train_steps"] = config["eval_steps"] = 1 config["working_dir"] = path cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"]+"/"+config['camera_config']) ckp = Checkpoint(config, tag=directory+"_qmap") # Task definition. task, sensor, actuator = init_task(config, cam_config, ckp) # Get Q values from agent. act = deepq(task, config, eval=True) obs, Qvalues, states = task.reset(), [], [] for _ in range(config["task_max_trials_eps"]): sample_probs = task.get_action_probabilities() act_out = act(obs,sample_probs) Qvalues.append(act_out[1][0]) _, segmented, _ = sensor.get_state() states.append(segmented) obs = task.step(act_out[0][0])[0] print("Start creating Q-maps ...") # Plot Q map by iterating over the whole array of actions, determine # the pixel by "unnormalizing" and assign the Q value. for iq, q_values in enumerate(Qvalues): n_actions = task.action_space.n directions = actuator.directions() n_directions = len(directions) n_steps = int(np.sqrt(task.action_space.n/n_directions)) q_map = np.ones((n_steps, n_steps, n_directions))*(-1000) for x in range(n_actions): action = actuator.undiscretize(np.asarray(x, dtype=int)) q_map[action[0],action[1],action[2]] = q_values[x] progress_bar(x, n_actions) # Plotting output graphs. draw_graphs(q_map,states[iq],directions,ckp,name="{}_qvalues".format(iq)) # Clearning up directory. os.remove(os.path.join(ckp.dir, "logging.txt")) os.remove(os.path.join(ckp.dir, "log.txt")) os.remove(os.path.join(ckp.dir, "progress.csv")) print("... finished creating Q-maps.")
def train(paramset): config = load_yaml(os.environ["YUMI_PUSH_CONFIG"]+"/simulation.yaml") cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"]+"/"+config['camera_config']) config["visualize"] = config["debug"] = config["verbose"] = False config["train_steps"] = 180000 tag = "!" for param in paramset: assert len(param.split("#")) == 3 p,v,t = param.split("#") if t == "str": config[p] = v elif t == "int": config[p] = int(v) elif t == "float": config[p] = float(v) elif t == "bool": config[p] = bool(int(v)) else: raise ValueError("unknown type") tag = tag + param + "!" ckp = Checkpoint(config, tag=tag) config["working_dir"] = ckp.get_path("checkpoints/") # Execute training in given configuration. world = sim_world.World(config, ckp) camera = sim_camera.RGBDCamera(world.physics_client, cam_config) robot = sim_robot.Robot2DHand(config, ckp, world) actuator = task_actuators.actuator_factory(config, ckp, robot) sensor = task_sensors.Sensor(config, ckp, camera=camera) reward = task_rewards.RewardFn(config,ckp,camera) task = task_task.Task(sensor,actuator,reward,world,config,ckp) learn_algo = config.get("train_algo", "ppo2") if learn_algo == "ppo2": ppo2(task, config) elif learn_algo == "deepq": deepq(task, config) else: raise ValueError("Invalid training algorithm {}!".format(learn_algo)) task.close()
def main(): parser = argparse.ArgumentParser(description="yumi_push_eval") parser.add_argument("--policy", type=str, default="", help="policies directory that should be evaluated") parser.add_argument("--out", type=str, default="", help="outs directory that should be evaluated") parser.add_argument("--visualize", action="store_true", help="enable visualization (default=False)") parser.add_argument("--debug", action="store_true", help="enable debug mode (default=False)") parser.add_argument("--activations", action="store_true", help="enable cnn layer visualization (default=False)") parser.add_argument("--eval_steps", type=int, default=10, help="number of evaluation steps") args = parser.parse_args() # Load and check directory. if not ((args.policy == "" and args.out != "") \ or (args.policy != "" and args.out == "")): raise IOError("Usage: python3 eval.py --policy=... or --out=...") directory = args.policy if args.policy != "" else args.out parent_dir = os.environ["YUMI_PUSH_POLICIES"] if args.policy != "" \ else os.environ["YUMI_PUSH_OUTS"] path = os.path.join(parent_dir, directory) if not os.path.isdir(path): raise IOError("Directory {} not existing !".format(path)) if not os.path.isdir(os.path.join(path, "checkpoints")): raise IOError("No model to load in {} !".format(path)) if not os.path.isfile(os.path.join(path, "config.txt")): raise IOError("No config found in {} !".format(path)) config = load_yaml(os.path.join(path, "config.txt")) config["visualize"] = args.visualize config["debug"] = args.debug config["working_dir"] = path config["eval_steps"] = config["seg_rew_list_len"] = args.eval_steps cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/" + config['camera_config']) ckp = Checkpoint(config, tag=directory + "_eval") # Load task and components. task, _, _ = init_task(config, cam_config, ckp) learn_algo = config.get("train_algo", "ppo2") if learn_algo == "ppo2": ppo2(task, config, eval=True) elif learn_algo == "deepq": deepq(task, config, eval=True) else: raise ValueError("Invalid training algorithm {}!".format(learn_algo)) task.close()
def main(): config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/simulation.yaml") cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/" + config['camera_config']) ckp = Checkpoint(config) config["working_dir"] = ckp.get_path("checkpoints/") task, _, _ = init_task(config, cam_config, ckp) learn_algo = config.get("train_algo", "ppo2") if learn_algo == "ppo2": ppo2(task, config) elif learn_algo == "deepq": deepq(task, config) else: raise ValueError("Invalid training algorithm {}!".format(learn_algo)) task.close()
def main(): parser = argparse.ArgumentParser(description="yumi_push_qmap") parser.add_argument("--out", type=str, default="", help="outs directory that should be evaluated") parser.add_argument("--policy", type=str, default="", help="policy directory that should be evaluated") args = parser.parse_args() # Load and check directory. if not ((args.policy == "" and args.out != "") \ or (args.policy != "" and args.out == "")): raise IOError("Usage: python3 eval.py --policy=... or --out=...") directory = args.policy if args.policy != "" else args.out parent_dir = os.environ["YUMI_PUSH_POLICIES"] if args.policy != "" \ else os.environ["YUMI_PUSH_OUTS"] path = os.path.join(parent_dir, directory) if not os.path.isdir(path): raise IOError("Directory {} not existing !".format(path)) if not os.path.isdir(os.path.join(path, "checkpoints")): raise IOError("No model to load in {} !".format(path)) if not os.path.isfile(os.path.join(path, "config.txt")): raise IOError("No config found in {} !".format(path)) config = load_yaml(os.path.join(path, "config.txt")) config["visualize"] = config["verbose"] = True config["debug"] = False config["rew_schedule"] = False config["working_dir"] = path cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/" + config['camera_config']) ckp = Checkpoint(config, tag=directory + "_multistep") # Task definition. task, sensor, actuator = new_init_task(config, cam_config, ckp) learn_algo = config.get("train_algo", "ppo2") if learn_algo == "ppo2": ppo2(task, config, eval=True) elif learn_algo == "deepq": deepq(task, config, eval=True) else: raise ValueError("Invalid training algorithm {}!".format(learn_algo)) task.close()
def from_yaml(file_path): """Read a transform from a yaml file. Example of the content of such a file: transform: translation: [1., 2., 3.] rotation: [0., 0., 0., 1.] Args: file_path: The path to the YAML file. Returns: A 4x4 homogeneous transformation matrix. """ cfg = io_utils.load_yaml(file_path) return from_dict(cfg["transform"])
def main(): parser = argparse.ArgumentParser(description="yumi_push_reward_map") parser.add_argument("--out", type=str, default="", help="outs directory that should be evaluated") parser.add_argument("--policy", type=str, default="", help="policy directory that should be evaluated") parser.add_argument("--config", type=str, default="", help="config file that should be evaluated") args = parser.parse_args() # Load and check directory. if (int(args.policy == "") + int(args.out == "") + int(args.config == "")) != 2: raise IOError( "Usage: python3 reward_map.py --policy=... or --out=... or --config=..." ) if args.policy != "": tag = args.policy file = os.path.join(args.policy, "config.txt") file = os.path.join(os.environ["YUMI_PUSH_POLICIES"], file) elif args.out != "": tag = args.out file = os.path.join(args.out, "config.txt") file = os.path.join(os.environ["YUMI_PUSH_OUTS"], file) else: tag = args.config.replace(".yaml", "") file = os.path.join(os.environ["YUMI_PUSH_CONFIG"], args.config) if not os.path.isfile(file): raise IOError("File {} not existing !".format(file)) # Load configuration and create output file (checkpoint). config = load_yaml(file) config["visualize"] = False config["verbose"] = True config["debug"] = False config["scene"] = "cubes_fixed" config["rew_schedule"] = False config["train_algo"] = "" cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/" + config['camera_config']) ckp = Checkpoint(config, tag=tag + "_reward_map") # Task definition. task, sensor, actuator = init_task(config, cam_config, ckp) # Get number of pushing actions (not implemented for all actuators, so will # throw error for "too continuous" actuators). directions = actuator.directions() n_directions = len(directions) # Iterate over all actions and determine reward # (assume start_point are 1&2 action and 3 pushing direction). print("Start creating reward map ...") rewards = None # For discrete action space the actions simply are a integer number. if config["act_discrete"]: n_actions = task.action_space.n n_action_steps = int(np.sqrt(task.action_space.n / n_directions)) rewards = np.ones( (n_action_steps, n_action_steps, n_directions)) * (-1000) for x in range(n_actions): task.reset() action = actuator.undiscretize(np.asarray(x, dtype=int)) _, reward, done, _ = task.step(np.asarray(x, dtype=int)) rewards[action[0], action[1], action[2]] = reward progress_bar(x, n_actions) # For continuous action space discretize the actions (assumes action to be # [starting point, pushing_direction]). else: res_cont = 0.1 #[m] n_action_steps = int(2.0 / res_cont) n_actions = n_directions * (n_action_steps**2) rewards = np.ones( (n_action_steps, n_action_steps, n_directions)) * (-1000) i = 0 for ix in range(n_action_steps): for iy in range(n_action_steps): for id in range(n_directions): x, y = -1.0 + ix * res_cont, -1.0 + iy * res_cont, d = -0.9 + id * (2.0 / n_directions) task.reset() _, reward, done, _ = task.step(np.asarray([x, y, d])) rewards[ix, iy, id] = reward i = i + 1 progress_bar(i, n_actions) # Plotting output graphs. _, segmented, _ = sensor.get_state() draw_graphs(rewards, segmented, directions, ckp, name="rewards") # Clearning up directory. os.remove(os.path.join(ckp.dir, "log_num_steps.pdf")) os.remove(os.path.join(ckp.dir, "log_reward.pdf")) os.remove(os.path.join(ckp.dir, "log_success_rate.pdf")) os.remove(os.path.join(ckp.dir, "logging.txt")) print("... finished creating reward map.")
def main(): parser = argparse.ArgumentParser(description="yumi_push_eval") parser.add_argument("--policy", type=str, default="", help="policies directory that should be evaluated") parser.add_argument("--out", type=str, default="", help="outs directory that should be evaluated") parser.add_argument("--visualize", action="store_true", help="enable visualization (default=False)") parser.add_argument("--debug", action="store_true", help="enable debug mode (default=False)") parser.add_argument("--activations", action="store_true", help="enable cnn layer visualization (default=False)") args = parser.parse_args() # Load and check directory. directory, path = parse_path(args) if not os.path.isdir(os.path.join(path, "checkpoints")): raise IOError("No model to load in {} !".format(path)) if not os.path.isfile(os.path.join(path, "config.txt")): raise IOError("No config found in {} !".format(path)) config = load_yaml(os.path.join(path, "config.txt")) config["visualize"] = args.visualize config["debug"] = args.debug config["working_dir"] = path cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/" + config['camera_config']) ckp = Checkpoint(config, tag=directory + "_eval") # Load task and components. world = sim_world.World(config, ckp) camera = sim_camera.RGBDCamera(world.physics_client, cam_config) robot = sim_robot.Robot2DHand(config, ckp, world) actuator = task_actuators.actuator_factory(config, ckp, robot) sensor = task_sensors.Sensor(config, ckp, camera=camera) reward = task_rewards.RewardFn(config, ckp, camera) task = task_task.Task(sensor, actuator, reward, world, config, ckp) learn_algo = config.get("train_algo", "ppo2") if learn_algo == "ppo2": ppo2(task, config, eval=True) elif learn_algo == "deepq": deepq(task, config, eval=True) else: raise ValueError("Invalid training algorithm {}!".format(learn_algo)) # Plot activations (if activated). if args.activations: fig = plt.figure() # an empty figure with no axes fig.suptitle( 'No axes on this figure') # Add a title so we know which it is fig, ax_lst = plt.subplots(2, 2) # Access layer activation of last evaluation by accessing the # tensorflow namespace, in which the network and all its parameters # are saved. session = model.sess imageToUse = sensor.get_state() graph = session.graph # First convolution. layer = graph.get_tensor_by_name("ppo2_model/pi/c1/Conv2D:0") tf_placeholder = session.graph.get_tensor_by_name("ppo2_model/Ob:0") getActivations(session, layer, tf_placeholder, imageToUse) # Second convolution. layer = graph.get_tensor_by_name("ppo2_model/pi/c2/Conv2D:0") tf_placeholder = session.graph.get_tensor_by_name("ppo2_model/Ob:0") getActivations(session, layer, tf_placeholder, imageToUse) task.close()