コード例 #1
0
def calc_rollout_metrics(rollouts):
    ev = DataEvalNL("", save_images=False, entire_trajectory=False)
    metrics = {}
    total_task_success = 0
    total_stop_success = 0
    coverages = []
    reward_keys = [k for k in rollouts[0][0].keys() if k.endswith("_reward")]
    rewards = {k: [] for k in reward_keys}

    for rollout in rollouts:
        success = ev.rollout_success(rollout[0]["env_id"],
                                     rollout[0]["set_idx"],
                                     rollout[0]["seg_idx"], rollout)

        # Take sum of rewards for each type of reward
        for k in reward_keys:
            v = sum([s[k] for s in rollout])
            rewards[k].append(v)

        visit_success = stop_success(rollout)
        total_stop_success += 1 if visit_success else 0
        total_task_success += 1 if success else 0

    task_success_rate = total_task_success / len(rollouts)
    visit_success_rate = total_stop_success / len(rollouts)

    metrics["task_success_rate"] = task_success_rate
    metrics["visit_success_rate"] = visit_success_rate

    rollout_lens = [len(rollout) for rollout in rollouts]
    metrics["mean_rollout_len"] = np.asarray(rollout_lens).mean()

    # Average each reward across rollouts
    rewards = {k: np.mean(np.asarray(l)) for k, l in rewards.items()}
    metrics = dict_merge(metrics, rewards)
    return metrics
コード例 #2
0
def collect_iteration_data(roller, iteration, train_envs, test_envs,
                           latest_model_filename):
    setup = P.get_current_parameters()["Setup"]

    # Collect data with current policy
    num_train_samples = PARAMS["train_envs_per_iteration"]
    train_envs_i = sample_n_from_list(train_envs, num_train_samples)
    if PARAMS["test_on_train"]:
        test_envs_i = train_envs_i
    else:
        test_envs_i = sample_n_from_list(test_envs,
                                         PARAMS["test_envs_per_iteration"])

    train_data_i = rollout_on_env_set(roller,
                                      train_envs_i,
                                      iteration,
                                      latest_model_filename,
                                      test=False)
    test_data_i = rollout_on_env_set(roller,
                                     test_envs_i,
                                     iteration,
                                     latest_model_filename,
                                     test=True)

    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"], save_images=False)
        evaler.evaluate_dataset(test_data_i)
        results = evaler.get_results()
        print("Results:", results)
        evaler.write_summaries(setup["run_name"], "dagger_eval", iteration)
    if setup["eval_nl"]:
        evaler = DataEvalNL(setup["run_name"],
                            entire_trajectory=not PARAMS["segment_level"],
                            save_images=False)
        evaler.evaluate_dataset(test_data_i)
        results = evaler.get_results()
        print("Results:", results)
        evaler.write_summaries(setup["run_name"], "dagger_eval", iteration)

    # Kill the simulators after each rollout to save CPU cycles and avoid the slowdown
    os.system("killall -9 MyProject5-Linux-Shipping")
    os.system("killall -9 MyProject5")

    return train_data_i, test_data_i
コード例 #3
0
def evaluate():
    P.initialize_experiment()
    params = P.get_current_parameters()
    setup = params["Setup"]

    # import pdb;pdb.set_trace()
    models = []
    for i in range(setup["num_workers"]):
        model, model_loaded = load_model()
        if setup["restore_weights_name"]:
            restore_pretrained_weights(model, setup["restore_weights_name"],
                                       setup["fix_restored_weights"])
        models.append(model)

    eval_envs = get_correct_eval_env_id_list()

    roll_out_params = RollOutParams() \
                        .setModelName(setup["model"]) \
                        .setModelFile(setup["model_file"]) \
                        .setRunName(setup["run_name"]) \
                        .setSetupName(P.get_setup_name()) \
                        .setEnvList(eval_envs) \
                        .setMaxDeviation(400) \
                        .setHorizon(100) \
                        .setStepsToForceStop(10) \
                        .setPlot(False) \
                        .setShowAction(False) \
                        .setIgnorePolicyStop(False) \
                        .setPlotDir("evaluate/" + setup["run_name"]) \
                        .setSavePlots(True) \
                        .setRealtimeFirstPerson(False) \
                        .setSaveSamples(False) \
                        .setBuildTrainData(False) \
                        .setSegmentReset("always") \
                        .setSegmentLevel(True) \
                        .setFirstSegmentOnly(False) \
                        .setDebug(setup["debug"]) \
                        .setCuda(setup["cuda"])

    custom_eval = "Eval" in params and params["Eval"]["custom_eval"]
    instructions = None
    if custom_eval:
        examples = params["Eval"]["examples"]
        eval_envs, eval_sets, eval_segs, instructions = tuple(
            map(lambda m: list(m), list(zip(*examples))))
        print("!! Running custom evaluation with the following setup:")
        print(examples)
        roll_out_params.setEnvList(eval_envs)
        roll_out_params.setSegList(eval_segs)
        roll_out_params.setCustomInstructions(instructions)

    if setup["num_workers"] > 1:
        roller = ParallelPolicyRoller(num_workers=setup["num_workers"])
    else:
        roller = PolicyRoller()

    dataset = roller.roll_out_policy(roll_out_params)

    results = {}
    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"])
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()
    if setup["eval_nl"]:
        evaler = DataEvalNL(setup["run_name"],
                            save_images=True,
                            entire_trajectory=False,
                            custom_instr=instructions)
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()

    print("Results:", results)
コード例 #4
0
def evaluate_saved_rollouts():
    params = P.get_current_parameters()
    setup = params["Setup"]
    model_name = setup["model"]
    run_name = setup["run_name"]
    eval_dname = get_eval_tmp_dataset_name(model_name, run_name)

    eval_envs = set(list(sorted(get_correct_eval_env_id_list())))
    rollouts = load_multiple_env_data(eval_dname)
    present_envs = set(
        [rollout[0]["env_id"] for rollout in rollouts if len(rollout) > 0])
    missing_envs = eval_envs - present_envs

    logdir = get_results_dir(run_name)

    if len(missing_envs) > 0:
        print(f"Warning! {len(missing_envs)} envs missing: {missing_envs}")
        #sys.exit(1)

    log("", logdir)
    log(
        "--------------------------------------------------------------------------------------------",
        logdir)
    log(f"Evaluating rollouts for run {run_name}", logdir)
    log(f"   using dataset {eval_dname}", logdir)
    log(f"   missing envs {missing_envs}", logdir)
    log(
        "--------------------------------------------------------------------------------------------",
        logdir)

    evaler1 = DataEvalNL(setup["run_name"] + "1-1",
                         save_images=False,
                         entire_trajectory=False,
                         aug_len=1)
    evaler1.evaluate_dataset(rollouts)
    results1 = evaler1.get_results()

    evaler2 = DataEvalNL(setup["run_name"] + "2-2",
                         save_images=False,
                         entire_trajectory=False,
                         aug_len=2)
    evaler2.evaluate_dataset(rollouts)
    results2 = evaler2.get_results()

    evalerf = DataEvalNL(setup["run_name"] + "1-2",
                         save_images=True,
                         entire_trajectory=False)
    evalerf.evaluate_dataset(rollouts)
    resultsf = evalerf.get_results()

    log(f"Results 1-1:{results1}", logdir)
    log(f"Results 2-2:{results2}", logdir)
    log(f"Results 1-2:{resultsf}", logdir)

    log(f" -- END EVALUATION FOR {run_name}-- ", logdir)
    log(
        "--------------------------------------------------------------------------------------------",
        logdir)
コード例 #5
0
ファイル: evaluate.py プロジェクト: pianpwk/drif
def evaluate():
    P.initialize_experiment()
    params = P.get_current_parameters()
    setup = params["Setup"]

    models = []
    for i in range(setup["num_workers"]):
        model, model_loaded = load_model()
        models.append(model)

    eval_envs = list(sorted(get_correct_eval_env_id_list()))

    round_size = P.get_current_parameters()["Data"].get("collect_n_at_a_time")

    # TODO: Scrap RollOutParams and use parameter server JSON params instead
    roll_out_params = RollOutParams() \
                        .setModelName(setup["model"]) \
                        .setModelFile(setup["model_file"]) \
                        .setRunName(setup["run_name"]) \
                        .setSetupName(P.get_setup_name()) \
                        .setEnvList(eval_envs) \
                        .setMaxDeviation(800) \
                        .setHorizon(setup["trajectory_length"]) \
                        .setStepsToForceStop(20) \
                        .setPlot(False) \
                        .setShowAction(False) \
                        .setIgnorePolicyStop(False) \
                        .setPlotDir("evaluate/" + setup["run_name"]) \
                        .setSavePlots(False) \
                        .setRealtimeFirstPerson(False) \
                        .setSaveSamples(False) \
                        .setBuildTrainData(False) \
                        .setSegmentReset("always") \
                        .setSegmentLevel(False) \
                        .setFirstSegmentOnly(False) \
                        .setDebug(setup["debug"]) \
                        .setCuda(setup["cuda"]) \
                        .setRealDrone(setup["real_drone"])

    custom_eval = "Eval" in params and params["Eval"]["custom_eval"]
    instructions = None
    if custom_eval:
        examples = params["Eval"]["examples"]
        eval_envs, eval_sets, eval_segs, instructions = tuple(
            map(lambda m: list(m), list(zip(*examples))))
        print("!! Running custom evaluation with the following setup:")
        print(examples)
        roll_out_params.setEnvList(eval_envs)
        roll_out_params.setSegList(eval_segs)
        roll_out_params.setCustomInstructions(instructions)

    if setup["num_workers"] > 1:
        roller = ParallelPolicyRoller(num_workers=setup["num_workers"])
    else:
        roller = PolicyRoller()

    if round_size:
        eval_dataset_name = data_io.paths.get_eval_tmp_dataset_name(
            setup["model"], setup["run_name"])
        eval_dataset_path = data_io.paths.get_dataset_dir(eval_dataset_name)

        cumulative_dataset = []
        if os.path.exists(eval_dataset_path):
            result = query_user_load_discard(eval_dataset_path)
            if result == "load":
                print("Loading dataset and continuing evaluation")
                cumulative_dataset = load_multiple_env_data_from_dir(
                    eval_dataset_path)
            elif result == "discard":
                print("Discarding existing evaluation data")
                shutil.rmtree(eval_dataset_path)
            elif result == "cancel":
                print("Cancelling evaluation")
                return

        os.makedirs(eval_dataset_path, exist_ok=True)

        collected_envs = set([
            rollout[0]["env_id"] for rollout in cumulative_dataset
            if len(rollout) > 0
        ])
        eval_envs = [e for e in eval_envs if e not in collected_envs]
        if setup.get("compute_results_no_rollout", False):
            eval_envs = []

        for i in range(0, len(eval_envs), round_size):
            j = min(len(eval_envs), i + round_size)
            round_envs = eval_envs[i:j]
            roll_out_params.setEnvList(round_envs)
            dataset = roller.roll_out_policy(roll_out_params)

            # Save this data
            for rollout in dataset:
                if len(rollout) == 0:
                    print(
                        "WARNING! DROPPING EMPTY ROLLOUTS! SHOULDN'T DO THIS")
                    continue
                ## rollout is a list of samples:
                env_id = rollout[0]["env_id"] if "metadata" in rollout[
                    0] else rollout[0]["env_id"]
                if True:
                    if len(rollout) > 0:
                        save_dataset_to_path(
                            os.path.join(eval_dataset_path, str(env_id)),
                            rollout)
                ## rollout is a list of segments, each is a list of samples
                else:
                    if len(rollout) > 0:
                        save_dataset_to_path(
                            os.path.join(eval_dataset_path, str(env_id)),
                            rollout)

            cumulative_dataset += dataset
            print(f"Saved cumulative dataset to: {eval_dataset_path}")

        dataset = cumulative_dataset
    else:
        dataset = roller.roll_out_policy(roll_out_params)

    results = {}
    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"],
                                      save_images=True,
                                      world_size=setup["world_size_m"])
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()
    if setup["eval_nl"]:
        evaler = DataEvalNL(setup["run_name"],
                            save_images=True,
                            entire_trajectory=False,
                            custom_instr=instructions)
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()

    print("Results:", results)
コード例 #6
0
ファイル: train_dagger_simple.py プロジェクト: pianpwk/drif
def train_dagger_simple():
    # ----------------------------------------------------------------------------------------------------------------
    # Load params and configure stuff

    P.initialize_experiment()
    params = P.get_current_parameters()["SimpleDagger"]
    setup = P.get_current_parameters()["Setup"]
    num_iterations = params["num_iterations"]
    sim_seed_dataset = params.get("sim_seed_dataset")
    run_name = setup["run_name"]
    device = params.get("device", "cuda:1")
    dataset_limit = params.get("dataset_size_limit_envs")
    seed_count = params.get("seed_count")

    # Trigger rebuild if necessary before going into all the threads and processes
    _ = get_restricted_env_id_lists(full=True)

    # Initialize the dataset
    if sim_seed_dataset:
        copy_seed_dataset(from_dataset=sim_seed_dataset,
                          to_dataset=dagger_dataset_name(run_name),
                          seed_count=seed_count or dataset_limit)
        gap = 0
    else:
        # TODO: Refactor this into a prompt function
        data_path = get_dataset_dir(dagger_dataset_name(run_name))
        if os.path.exists(data_path):
            print("DATASET EXISTS! Continue where left off?")
            c = input(" (y/n) >>> ")
            if c != "y":
                raise ValueError(
                    f"Not continuing: Dataset {data_path} exists. Delete it if you like and try again"
                )
        else:
            os.makedirs(data_path, exist_ok=True)
        gap = dataset_limit - len(os.listdir(data_path))

    print("SUPP: Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    # ----------------------------------------------------------------------------------------------------------------
    # Load / initialize model

    model = load_model(setup["model"], setup["model_file"],
                       domain="sim")[0].to(device)
    oracle = load_model("oracle")[0]

    # ----------------------------------------------------------------------------------------------------------------
    # Continue where we left off - load the model and set the iteration/epoch number

    for start_iteration in range(10000):
        epfname = epoch_dag_filename(run_name, start_iteration)
        path = os.path.join(get_model_dir(), str(epfname) + ".pytorch")
        if not os.path.exists(path):
            break
    if start_iteration > 0:
        print(
            f"DAG: CONTINUING DAGGER TRAINING FROM ITERATION: {start_iteration}"
        )
        load_pytorch_model(model,
                           epoch_dag_filename(run_name, start_iteration - 1))

    # ----------------------------------------------------------------------------------------------------------------
    # Intialize trainer

    trainer = Trainer(model,
                      epoch=start_iteration,
                      name=setup["model"],
                      run_name=setup["run_name"])
    trainer.set_dataset_names([dagger_dataset_name(run_name)])

    # ----------------------------------------------------------------------------------------------------------------
    # Initialize policy roller

    roller = SimpleParallelPolicyRoller(
        num_workers=params["num_workers"],
        device=params["device"],
        policy_name=setup["model"],
        policy_file=setup["model_file"],
        oracle=oracle,
        dataset_save_name=dagger_dataset_name(run_name),
        no_reward=True)
    rollout_sampler = RolloutSampler(roller)

    # ----------------------------------------------------------------------------------------------------------------
    # Train DAgger - loop over iteartions, in each, prune, rollout and train an epoch

    print("SUPP: Beginning training...")
    for iteration in range(start_iteration, num_iterations):
        print(f"DAG: Starting iteration {iteration}")

        # Remove extra rollouts to keep within DAggerFM limit
        prune_dataset(run_name, dataset_limit)

        # Rollout and collect more data for training and evaluation
        policy_state = model.get_policy_state()
        rollout_sampler.sample_n_rollouts(
            n=gap if iteration == 0 else params["train_envs_per_iteration"],
            policy_state=policy_state,
            sample=False,
            envs="train",
            dagger_beta=dagger_beta(params, iteration))

        eval_rollouts = rollout_sampler.sample_n_rollouts(
            n=params["eval_envs_per_iteration"],
            policy_state=policy_state,
            sample=False,
            envs="dev",
            dagger_beta=0)

        # Kill airsim instances so that they don't take up GPU memory and in general slow things down during training
        roller.kill_airsim()

        # Evaluate success / metrics and save to tensorboard
        if setup["eval_nl"]:
            evaler = DataEvalNL(run_name,
                                entire_trajectory=False,
                                save_images=False)
            evaler.evaluate_dataset(eval_rollouts)
            results = evaler.get_results()
            print("Results:", results)
            evaler.write_summaries(setup["run_name"], "dagger_eval", iteration)

        # Do one epoch of supervised training
        print("SUPP: Beginning Epoch")
        train_loss = trainer.train_epoch(train_envs=train_envs, eval=False)
        #test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True)

        # Save the model to file
        print("SUPP: Epoch", iteration, "train_loss:", train_loss)
        save_pytorch_model(model, epoch_dag_filename(run_name, iteration))