def main():
    parser = argparse.ArgumentParser(description="yumi_push_qmap")
    parser.add_argument("--out", type=str, default="",
                        help="outs directory that should be evaluated")
    parser.add_argument("--policy", type=str, default="",
                        help="policy directory that should be evaluated")
    args = parser.parse_args()

    # Load and check directory.
    directory, path = parse_path(args)
    if not os.path.isdir(os.path.join(path, "checkpoints")):
        raise IOError("No model to load in {} !".format(path))
    if not os.path.isfile(os.path.join(path, "config.txt")):
        raise IOError("No config found in {} !".format(path))
    config = load_yaml(os.path.join(path, "config.txt"))
    config["visualize"] = False
    config["verbose"] = True
    config["debug"] = False
    config["scene"] = "cubes_fixed"
    config["rew_schedule"] = False
    config["train_steps"] = config["eval_steps"] = 1
    config["working_dir"] = path
    cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"]+"/"+config['camera_config'])
    ckp = Checkpoint(config, tag=directory+"_qmap")

    # Task definition.
    task, sensor, actuator = init_task(config, cam_config, ckp)

    # Get Q values from agent.
    act = deepq(task, config, eval=True)
    obs, Qvalues, states = task.reset(), [], []
    for _ in range(config["task_max_trials_eps"]):
        sample_probs = task.get_action_probabilities()
        act_out = act(obs,sample_probs)
        Qvalues.append(act_out[1][0])
        _, segmented, _ = sensor.get_state()
        states.append(segmented)
        obs = task.step(act_out[0][0])[0]

    print("Start creating Q-maps ...")
    # Plot Q map by iterating over the whole array of actions, determine
    # the pixel by "unnormalizing" and assign the Q value.
    for iq, q_values in enumerate(Qvalues):
        n_actions    = task.action_space.n
        directions   = actuator.directions()
        n_directions = len(directions)
        n_steps      = int(np.sqrt(task.action_space.n/n_directions))
        q_map = np.ones((n_steps, n_steps, n_directions))*(-1000)
        for x in range(n_actions):
            action = actuator.undiscretize(np.asarray(x, dtype=int))
            q_map[action[0],action[1],action[2]] = q_values[x]
            progress_bar(x, n_actions)
        # Plotting output graphs.
        draw_graphs(q_map,states[iq],directions,ckp,name="{}_qvalues".format(iq))

    # Clearning up directory.
    os.remove(os.path.join(ckp.dir, "logging.txt"))
    os.remove(os.path.join(ckp.dir, "log.txt"))
    os.remove(os.path.join(ckp.dir, "progress.csv"))
    print("... finished creating Q-maps.")
def train(paramset):
    config = load_yaml(os.environ["YUMI_PUSH_CONFIG"]+"/simulation.yaml")
    cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"]+"/"+config['camera_config'])
    config["visualize"] = config["debug"] = config["verbose"] = False
    config["train_steps"] = 180000
    tag = "!"
    for param in paramset:
        assert len(param.split("#")) == 3
        p,v,t = param.split("#")
        if t == "str": config[p] = v
        elif t == "int": config[p] = int(v)
        elif t == "float": config[p] = float(v)
        elif t == "bool": config[p] = bool(int(v))
        else: raise ValueError("unknown type")
        tag = tag + param + "!"
    ckp = Checkpoint(config, tag=tag)
    config["working_dir"] = ckp.get_path("checkpoints/")
    # Execute training in given configuration.
    world = sim_world.World(config, ckp)
    camera = sim_camera.RGBDCamera(world.physics_client, cam_config)
    robot  = sim_robot.Robot2DHand(config, ckp, world)
    actuator = task_actuators.actuator_factory(config, ckp, robot)
    sensor = task_sensors.Sensor(config, ckp, camera=camera)
    reward = task_rewards.RewardFn(config,ckp,camera)
    task = task_task.Task(sensor,actuator,reward,world,config,ckp)
    learn_algo = config.get("train_algo", "ppo2")
    if learn_algo == "ppo2": ppo2(task, config)
    elif learn_algo == "deepq": deepq(task, config)
    else: raise ValueError("Invalid training algorithm {}!".format(learn_algo))
    task.close()
def main():
    parser = argparse.ArgumentParser(description="yumi_push_eval")
    parser.add_argument("--policy",
                        type=str,
                        default="",
                        help="policies directory that should be evaluated")
    parser.add_argument("--out",
                        type=str,
                        default="",
                        help="outs directory that should be evaluated")
    parser.add_argument("--visualize",
                        action="store_true",
                        help="enable visualization (default=False)")
    parser.add_argument("--debug",
                        action="store_true",
                        help="enable debug mode (default=False)")
    parser.add_argument("--activations",
                        action="store_true",
                        help="enable cnn layer visualization (default=False)")
    parser.add_argument("--eval_steps",
                        type=int,
                        default=10,
                        help="number of evaluation steps")
    args = parser.parse_args()
    # Load and check directory.
    if not ((args.policy == "" and args.out != "") \
    or (args.policy != "" and args.out == "")):
        raise IOError("Usage: python3 eval.py --policy=... or --out=...")
    directory = args.policy if args.policy != "" else args.out
    parent_dir = os.environ["YUMI_PUSH_POLICIES"] if args.policy != "" \
                 else os.environ["YUMI_PUSH_OUTS"]
    path = os.path.join(parent_dir, directory)
    if not os.path.isdir(path):
        raise IOError("Directory {} not existing !".format(path))
    if not os.path.isdir(os.path.join(path, "checkpoints")):
        raise IOError("No model to load in {} !".format(path))
    if not os.path.isfile(os.path.join(path, "config.txt")):
        raise IOError("No config found in {} !".format(path))
    config = load_yaml(os.path.join(path, "config.txt"))
    config["visualize"] = args.visualize
    config["debug"] = args.debug
    config["working_dir"] = path
    config["eval_steps"] = config["seg_rew_list_len"] = args.eval_steps
    cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/" +
                           config['camera_config'])
    ckp = Checkpoint(config, tag=directory + "_eval")

    # Load task and components.
    task, _, _ = init_task(config, cam_config, ckp)
    learn_algo = config.get("train_algo", "ppo2")
    if learn_algo == "ppo2": ppo2(task, config, eval=True)
    elif learn_algo == "deepq": deepq(task, config, eval=True)
    else: raise ValueError("Invalid training algorithm {}!".format(learn_algo))
    task.close()
Beispiel #4
0
def main():
    config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/simulation.yaml")
    cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/" +
                           config['camera_config'])
    ckp = Checkpoint(config)
    config["working_dir"] = ckp.get_path("checkpoints/")
    task, _, _ = init_task(config, cam_config, ckp)
    learn_algo = config.get("train_algo", "ppo2")
    if learn_algo == "ppo2": ppo2(task, config)
    elif learn_algo == "deepq": deepq(task, config)
    else: raise ValueError("Invalid training algorithm {}!".format(learn_algo))

    task.close()
def main():
    parser = argparse.ArgumentParser(description="yumi_push_qmap")
    parser.add_argument("--out",
                        type=str,
                        default="",
                        help="outs directory that should be evaluated")
    parser.add_argument("--policy",
                        type=str,
                        default="",
                        help="policy directory that should be evaluated")
    args = parser.parse_args()

    # Load and check directory.
    if not ((args.policy == "" and args.out != "") \
    or (args.policy != "" and args.out == "")):
        raise IOError("Usage: python3 eval.py --policy=... or --out=...")
    directory = args.policy if args.policy != "" else args.out
    parent_dir = os.environ["YUMI_PUSH_POLICIES"] if args.policy != "" \
                 else os.environ["YUMI_PUSH_OUTS"]
    path = os.path.join(parent_dir, directory)
    if not os.path.isdir(path):
        raise IOError("Directory {} not existing !".format(path))
    if not os.path.isdir(os.path.join(path, "checkpoints")):
        raise IOError("No model to load in {} !".format(path))
    if not os.path.isfile(os.path.join(path, "config.txt")):
        raise IOError("No config found in {} !".format(path))
    config = load_yaml(os.path.join(path, "config.txt"))
    config["visualize"] = config["verbose"] = True
    config["debug"] = False
    config["rew_schedule"] = False
    config["working_dir"] = path
    cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/" +
                           config['camera_config'])
    ckp = Checkpoint(config, tag=directory + "_multistep")

    # Task definition.
    task, sensor, actuator = new_init_task(config, cam_config, ckp)
    learn_algo = config.get("train_algo", "ppo2")
    if learn_algo == "ppo2": ppo2(task, config, eval=True)
    elif learn_algo == "deepq": deepq(task, config, eval=True)
    else: raise ValueError("Invalid training algorithm {}!".format(learn_algo))
    task.close()
Beispiel #6
0
def from_yaml(file_path):
    """Read a transform from a yaml file.

    Example of the content of such a file:

        transform:
            translation: [1., 2., 3.]
            rotation: [0., 0., 0., 1.]

    Args:
        file_path: The path to the YAML file.

    Returns:
        A 4x4 homogeneous transformation matrix.
    """
    cfg = io_utils.load_yaml(file_path)
    return from_dict(cfg["transform"])
def main():
    parser = argparse.ArgumentParser(description="yumi_push_reward_map")
    parser.add_argument("--out",
                        type=str,
                        default="",
                        help="outs directory that should be evaluated")
    parser.add_argument("--policy",
                        type=str,
                        default="",
                        help="policy directory that should be evaluated")
    parser.add_argument("--config",
                        type=str,
                        default="",
                        help="config file that should be evaluated")
    args = parser.parse_args()
    # Load and check directory.
    if (int(args.policy == "") + int(args.out == "") +
            int(args.config == "")) != 2:
        raise IOError(
            "Usage: python3 reward_map.py --policy=... or --out=... or --config=..."
        )
    if args.policy != "":
        tag = args.policy
        file = os.path.join(args.policy, "config.txt")
        file = os.path.join(os.environ["YUMI_PUSH_POLICIES"], file)
    elif args.out != "":
        tag = args.out
        file = os.path.join(args.out, "config.txt")
        file = os.path.join(os.environ["YUMI_PUSH_OUTS"], file)
    else:
        tag = args.config.replace(".yaml", "")
        file = os.path.join(os.environ["YUMI_PUSH_CONFIG"], args.config)
    if not os.path.isfile(file):
        raise IOError("File {} not existing !".format(file))
    # Load configuration and create output file (checkpoint).
    config = load_yaml(file)
    config["visualize"] = False
    config["verbose"] = True
    config["debug"] = False
    config["scene"] = "cubes_fixed"
    config["rew_schedule"] = False
    config["train_algo"] = ""
    cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/" +
                           config['camera_config'])
    ckp = Checkpoint(config, tag=tag + "_reward_map")
    # Task definition.
    task, sensor, actuator = init_task(config, cam_config, ckp)
    # Get number of pushing actions (not implemented for all actuators, so will
    # throw error for "too continuous" actuators).
    directions = actuator.directions()
    n_directions = len(directions)
    # Iterate over all actions and determine reward
    # (assume start_point are 1&2 action and 3 pushing direction).
    print("Start creating reward map ...")
    rewards = None
    # For discrete action space the actions simply are a integer number.
    if config["act_discrete"]:
        n_actions = task.action_space.n
        n_action_steps = int(np.sqrt(task.action_space.n / n_directions))
        rewards = np.ones(
            (n_action_steps, n_action_steps, n_directions)) * (-1000)
        for x in range(n_actions):
            task.reset()
            action = actuator.undiscretize(np.asarray(x, dtype=int))
            _, reward, done, _ = task.step(np.asarray(x, dtype=int))
            rewards[action[0], action[1], action[2]] = reward
            progress_bar(x, n_actions)
    # For continuous action space discretize the actions (assumes action to be
    # [starting point, pushing_direction]).
    else:
        res_cont = 0.1  #[m]
        n_action_steps = int(2.0 / res_cont)
        n_actions = n_directions * (n_action_steps**2)
        rewards = np.ones(
            (n_action_steps, n_action_steps, n_directions)) * (-1000)
        i = 0
        for ix in range(n_action_steps):
            for iy in range(n_action_steps):
                for id in range(n_directions):
                    x, y = -1.0 + ix * res_cont, -1.0 + iy * res_cont,
                    d = -0.9 + id * (2.0 / n_directions)
                    task.reset()
                    _, reward, done, _ = task.step(np.asarray([x, y, d]))
                    rewards[ix, iy, id] = reward
                    i = i + 1
                    progress_bar(i, n_actions)
    # Plotting output graphs.
    _, segmented, _ = sensor.get_state()
    draw_graphs(rewards, segmented, directions, ckp, name="rewards")
    # Clearning up directory.
    os.remove(os.path.join(ckp.dir, "log_num_steps.pdf"))
    os.remove(os.path.join(ckp.dir, "log_reward.pdf"))
    os.remove(os.path.join(ckp.dir, "log_success_rate.pdf"))
    os.remove(os.path.join(ckp.dir, "logging.txt"))
    print("... finished creating reward map.")
def main():
    parser = argparse.ArgumentParser(description="yumi_push_eval")
    parser.add_argument("--policy",
                        type=str,
                        default="",
                        help="policies directory that should be evaluated")
    parser.add_argument("--out",
                        type=str,
                        default="",
                        help="outs directory that should be evaluated")
    parser.add_argument("--visualize",
                        action="store_true",
                        help="enable visualization (default=False)")
    parser.add_argument("--debug",
                        action="store_true",
                        help="enable debug mode (default=False)")
    parser.add_argument("--activations",
                        action="store_true",
                        help="enable cnn layer visualization (default=False)")
    args = parser.parse_args()
    # Load and check directory.
    directory, path = parse_path(args)
    if not os.path.isdir(os.path.join(path, "checkpoints")):
        raise IOError("No model to load in {} !".format(path))
    if not os.path.isfile(os.path.join(path, "config.txt")):
        raise IOError("No config found in {} !".format(path))
    config = load_yaml(os.path.join(path, "config.txt"))
    config["visualize"] = args.visualize
    config["debug"] = args.debug
    config["working_dir"] = path
    cam_config = load_yaml(os.environ["YUMI_PUSH_CONFIG"] + "/" +
                           config['camera_config'])
    ckp = Checkpoint(config, tag=directory + "_eval")

    # Load task and components.
    world = sim_world.World(config, ckp)
    camera = sim_camera.RGBDCamera(world.physics_client, cam_config)
    robot = sim_robot.Robot2DHand(config, ckp, world)
    actuator = task_actuators.actuator_factory(config, ckp, robot)
    sensor = task_sensors.Sensor(config, ckp, camera=camera)
    reward = task_rewards.RewardFn(config, ckp, camera)

    task = task_task.Task(sensor, actuator, reward, world, config, ckp)
    learn_algo = config.get("train_algo", "ppo2")
    if learn_algo == "ppo2": ppo2(task, config, eval=True)
    elif learn_algo == "deepq": deepq(task, config, eval=True)
    else: raise ValueError("Invalid training algorithm {}!".format(learn_algo))

    # Plot activations (if activated).
    if args.activations:
        fig = plt.figure()  # an empty figure with no axes
        fig.suptitle(
            'No axes on this figure')  # Add a title so we know which it is
        fig, ax_lst = plt.subplots(2, 2)
        # Access layer activation of last evaluation by accessing the
        # tensorflow namespace, in which the network and all its parameters
        # are saved.
        session = model.sess
        imageToUse = sensor.get_state()
        graph = session.graph
        # First convolution.
        layer = graph.get_tensor_by_name("ppo2_model/pi/c1/Conv2D:0")
        tf_placeholder = session.graph.get_tensor_by_name("ppo2_model/Ob:0")
        getActivations(session, layer, tf_placeholder, imageToUse)
        # Second convolution.
        layer = graph.get_tensor_by_name("ppo2_model/pi/c2/Conv2D:0")
        tf_placeholder = session.graph.get_tensor_by_name("ppo2_model/Ob:0")
        getActivations(session, layer, tf_placeholder, imageToUse)

    task.close()