예제 #1
0
def main(_):
  # Map the embodiment to the x-MAGICAL env name.
  env_name = XMAGICAL_EMBODIMENT_TO_ENV_NAME[FLAGS.embodiment]

  # Generate a unique experiment name.
  experiment_name = string_from_kwargs(
      env_name=env_name,
      reward="sparse_env",
      uid=unique_id(),
  )
  logging.info("Experiment name: %s", experiment_name)

  # Execute each seed in parallel.
  procs = []
  for seed in range(*list(map(int, FLAGS.seeds))):
    procs.append(
        subprocess.Popen([  # pylint: disable=consider-using-with
            "python",
            "train_policy.py",
            "--experiment_name",
            experiment_name,
            "--env_name",
            f"{env_name}",
            "--config",
            f"{CONFIG_PATH}:{FLAGS.embodiment}",
            "--seed",
            f"{seed}",
            "--device",
            f"{FLAGS.device}",
        ]))

  # Wait for each seed to terminate.
  for p in procs:
    p.wait()
def main(_):
    embodiments = EMBODIMENTS if FLAGS.embodiment is None else [
        FLAGS.embodiment
    ]

    for embodiment in embodiments:
        # Generate a unique experiment name.
        kwargs = {
            "dataset": "xmagical",
            "mode": "cross",
            "algo": FLAGS.algo,
            "embodiment": embodiment,
        }
        if FLAGS.unique_name:
            kwargs["uid"] = unique_id()
        experiment_name = string_from_kwargs(**kwargs)
        logging.info("Experiment name: %s", experiment_name)

        # Train on all classes but the given embodiment.
        trainable_embs = tuple(EMBODIMENTS - set([embodiment]))

        subprocess.run(
            [
                "python",
                "pretrain.py",
                "--experiment_name",
                experiment_name,
                "--raw_imagenet" if FLAGS.algo == "raw_imagenet" else "",
                "--config",
                f"{ALGO_TO_CONFIG[FLAGS.algo]}",
                "--config.data.pretrain_action_class",
                f"{repr(trainable_embs)}",
                "--config.data.downstream_action_class",
                f"{repr(trainable_embs)}",
                "--config.data.max_vids_per_class",
                f"{MAX_DEMONSTRATIONS}",
            ],
            check=True,
        )

        # Note: This assumes that the config.root_dir value has not been
        # changed to its default value of 'tmp/xirl/pretrain_runs/'.
        exp_path = osp.join("/tmp/xirl/pretrain_runs/", experiment_name)

        # The 'goal_classifier' baseline does not need to compute a goal embedding.
        if FLAGS.algo != "goal_classifier":
            subprocess.run(
                [
                    "python",
                    "compute_goal_embedding.py",
                    "--experiment_path",
                    exp_path,
                ],
                check=True,
            )

        # Dump experiment metadata as yaml file.
        with open(osp.join(exp_path, "metadata.yaml"), "w") as fp:
            yaml.dump(kwargs, fp)
def main(_):
  with open(os.path.join(FLAGS.pretrained_path, "metadata.yaml"), "r") as fp:
    kwargs = yaml.load(fp, Loader=yaml.FullLoader)

  if kwargs["algo"] == "goal_classifier":
    reward_type = "goal_classifier"
  else:
    reward_type = "distance_to_goal"

  # Map the embodiment to the x-MAGICAL env name.
  env_name = XMAGICAL_EMBODIMENT_TO_ENV_NAME[kwargs["embodiment"]]

  # Generate a unique experiment name.
  experiment_name = string_from_kwargs(
      env_name=env_name,
      reward="learned",
      reward_type=reward_type,
      mode=kwargs["mode"],
      algo=kwargs["algo"],
      uid=unique_id(),
  )
  logging.info("Experiment name: %s", experiment_name)

  # Execute each seed in parallel.
  procs = []
  for seed in range(*list(map(int, FLAGS.seeds))):
    procs.append(
        subprocess.Popen([  # pylint: disable=consider-using-with
            "python",
            "train_policy.py",
            "--experiment_name",
            experiment_name,
            "--env_name",
            f"{env_name}",
            "--config",
            f"configs/xmagical/rl/env_reward.py:{kwargs['embodiment']}",
            "--config.reward_wrapper.pretrained_path",
            f"{FLAGS.pretrained_path}",
            "--config.reward_wrapper.type",
            f"{reward_type}",
            "--seed",
            f"{seed}",
            "--device",
            f"{FLAGS.device}",
        ]))

  # Wait for each seed to terminate.
  for p in procs:
    p.wait()