Exemple #1
0
def evaluate():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]

    # At this point test and dev have been swapped.
    # Whatever we've been developing on called "test" is hereafter called dev
    # Test is the data that hasn't been touched at all
    eval_envs = get_correct_eval_env_id_list()

    dataset = faux_dataset_random_pt(eval_envs)
    #dataset = faux_dataset_random_landmark(eval_envs)

    results = {}
    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"], save_images=False)
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()

    results["all_dist"] = []
    print("Results:", results)
Exemple #2
0
def evaluate():
    P.initialize_experiment()
    params = P.get_current_parameters()
    setup = params["Setup"]

    # import pdb;pdb.set_trace()
    models = []
    for i in range(setup["num_workers"]):
        model, model_loaded = load_model()
        if setup["restore_weights_name"]:
            restore_pretrained_weights(model, setup["restore_weights_name"],
                                       setup["fix_restored_weights"])
        models.append(model)

    eval_envs = get_correct_eval_env_id_list()

    roll_out_params = RollOutParams() \
                        .setModelName(setup["model"]) \
                        .setModelFile(setup["model_file"]) \
                        .setRunName(setup["run_name"]) \
                        .setSetupName(P.get_setup_name()) \
                        .setEnvList(eval_envs) \
                        .setMaxDeviation(400) \
                        .setHorizon(100) \
                        .setStepsToForceStop(10) \
                        .setPlot(False) \
                        .setShowAction(False) \
                        .setIgnorePolicyStop(False) \
                        .setPlotDir("evaluate/" + setup["run_name"]) \
                        .setSavePlots(True) \
                        .setRealtimeFirstPerson(False) \
                        .setSaveSamples(False) \
                        .setBuildTrainData(False) \
                        .setSegmentReset("always") \
                        .setSegmentLevel(True) \
                        .setFirstSegmentOnly(False) \
                        .setDebug(setup["debug"]) \
                        .setCuda(setup["cuda"])

    custom_eval = "Eval" in params and params["Eval"]["custom_eval"]
    instructions = None
    if custom_eval:
        examples = params["Eval"]["examples"]
        eval_envs, eval_sets, eval_segs, instructions = tuple(
            map(lambda m: list(m), list(zip(*examples))))
        print("!! Running custom evaluation with the following setup:")
        print(examples)
        roll_out_params.setEnvList(eval_envs)
        roll_out_params.setSegList(eval_segs)
        roll_out_params.setCustomInstructions(instructions)

    if setup["num_workers"] > 1:
        roller = ParallelPolicyRoller(num_workers=setup["num_workers"])
    else:
        roller = PolicyRoller()

    dataset = roller.roll_out_policy(roll_out_params)

    results = {}
    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"])
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()
    if setup["eval_nl"]:
        evaler = DataEvalNL(setup["run_name"],
                            save_images=True,
                            entire_trajectory=False,
                            custom_instr=instructions)
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()

    print("Results:", results)
Exemple #3
0
def collect_iteration_data(roller, iteration, train_envs, test_envs, latest_model_filename, dagger_data_dir, dataset_name):
    setup = P.get_current_parameters()["Setup"]
    # Collect data with current policy
    num_train_samples = PARAMS["train_envs_per_iteration_override"][iteration] if iteration in \
                        PARAMS["train_envs_per_iteration_override"] else PARAMS["train_envs_per_iteration"]

    train_envs_i = sample_n_from_list(train_envs, num_train_samples)
    if PARAMS["test_on_train"]:
        test_envs_i = train_envs_i
    else:
        test_envs_i = sample_n_from_list(test_envs, PARAMS["test_envs_per_iteration"])

    train_data_i = rollout_on_env_set(roller, train_envs_i, iteration, latest_model_filename, test=False)
    test_data_i = rollout_on_env_set(roller, test_envs_i, iteration, latest_model_filename, test=True)

    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"], save_images=False)
        evaler.evaluate_dataset(test_data_i)
        results = evaler.get_results()
        print("Results:", results)
        evaler.write_summaries(setup["run_name"], "dagger_eval", iteration)
    if setup["eval_nl"]:
        evaler = DataEvalNL(setup["run_name"], entire_trajectory=not PARAMS["segment_level"], save_images=False)
        evaler.evaluate_dataset(test_data_i)
        results = evaler.get_results()
        print("Results:", results)
        evaler.write_summaries(setup["run_name"], "dagger_eval", iteration)
    #TODO: Complete

    # Kill the simulators after each rollout to save CPU cycles and avoid the slowdown
    os.system("killall -9 MyProject5-Linux-Shipping")
    os.system("killall -9 MyProject5")

    #save_json(train_summary, dagger_data_dir + "dagger_train_summary_" + str(iteration) + ".json")
    #save_json(test_summary, dagger_data_dir + "dagger_test_summary_" + str(iteration) + ".json")
    data_io.train_data.save_dataset(dataset_name, train_data_i, dagger_data_dir + "train_" + str(iteration))
    data_io.train_data.save_dataset(dataset_name, test_data_i, dagger_data_dir + "test_" + str(iteration))

    return train_data_i, test_data_i
Exemple #4
0
def evaluate():
    P.initialize_experiment()
    params = P.get_current_parameters()
    setup = params["Setup"]

    models = []
    for i in range(setup["num_workers"]):
        model, model_loaded = load_model()
        models.append(model)

    eval_envs = list(sorted(get_correct_eval_env_id_list()))

    round_size = P.get_current_parameters()["Data"].get("collect_n_at_a_time")

    # TODO: Scrap RollOutParams and use parameter server JSON params instead
    roll_out_params = RollOutParams() \
                        .setModelName(setup["model"]) \
                        .setModelFile(setup["model_file"]) \
                        .setRunName(setup["run_name"]) \
                        .setSetupName(P.get_setup_name()) \
                        .setEnvList(eval_envs) \
                        .setMaxDeviation(800) \
                        .setHorizon(setup["trajectory_length"]) \
                        .setStepsToForceStop(20) \
                        .setPlot(False) \
                        .setShowAction(False) \
                        .setIgnorePolicyStop(False) \
                        .setPlotDir("evaluate/" + setup["run_name"]) \
                        .setSavePlots(False) \
                        .setRealtimeFirstPerson(False) \
                        .setSaveSamples(False) \
                        .setBuildTrainData(False) \
                        .setSegmentReset("always") \
                        .setSegmentLevel(False) \
                        .setFirstSegmentOnly(False) \
                        .setDebug(setup["debug"]) \
                        .setCuda(setup["cuda"]) \
                        .setRealDrone(setup["real_drone"])

    custom_eval = "Eval" in params and params["Eval"]["custom_eval"]
    instructions = None
    if custom_eval:
        examples = params["Eval"]["examples"]
        eval_envs, eval_sets, eval_segs, instructions = tuple(
            map(lambda m: list(m), list(zip(*examples))))
        print("!! Running custom evaluation with the following setup:")
        print(examples)
        roll_out_params.setEnvList(eval_envs)
        roll_out_params.setSegList(eval_segs)
        roll_out_params.setCustomInstructions(instructions)

    if setup["num_workers"] > 1:
        roller = ParallelPolicyRoller(num_workers=setup["num_workers"])
    else:
        roller = PolicyRoller()

    if round_size:
        eval_dataset_name = data_io.paths.get_eval_tmp_dataset_name(
            setup["model"], setup["run_name"])
        eval_dataset_path = data_io.paths.get_dataset_dir(eval_dataset_name)

        cumulative_dataset = []
        if os.path.exists(eval_dataset_path):
            result = query_user_load_discard(eval_dataset_path)
            if result == "load":
                print("Loading dataset and continuing evaluation")
                cumulative_dataset = load_multiple_env_data_from_dir(
                    eval_dataset_path)
            elif result == "discard":
                print("Discarding existing evaluation data")
                shutil.rmtree(eval_dataset_path)
            elif result == "cancel":
                print("Cancelling evaluation")
                return

        os.makedirs(eval_dataset_path, exist_ok=True)

        collected_envs = set([
            rollout[0]["env_id"] for rollout in cumulative_dataset
            if len(rollout) > 0
        ])
        eval_envs = [e for e in eval_envs if e not in collected_envs]
        if setup.get("compute_results_no_rollout", False):
            eval_envs = []

        for i in range(0, len(eval_envs), round_size):
            j = min(len(eval_envs), i + round_size)
            round_envs = eval_envs[i:j]
            roll_out_params.setEnvList(round_envs)
            dataset = roller.roll_out_policy(roll_out_params)

            # Save this data
            for rollout in dataset:
                if len(rollout) == 0:
                    print(
                        "WARNING! DROPPING EMPTY ROLLOUTS! SHOULDN'T DO THIS")
                    continue
                ## rollout is a list of samples:
                env_id = rollout[0]["env_id"] if "metadata" in rollout[
                    0] else rollout[0]["env_id"]
                if True:
                    if len(rollout) > 0:
                        save_dataset_to_path(
                            os.path.join(eval_dataset_path, str(env_id)),
                            rollout)
                ## rollout is a list of segments, each is a list of samples
                else:
                    if len(rollout) > 0:
                        save_dataset_to_path(
                            os.path.join(eval_dataset_path, str(env_id)),
                            rollout)

            cumulative_dataset += dataset
            print(f"Saved cumulative dataset to: {eval_dataset_path}")

        dataset = cumulative_dataset
    else:
        dataset = roller.roll_out_policy(roll_out_params)

    results = {}
    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"],
                                      save_images=True,
                                      world_size=setup["world_size_m"])
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()
    if setup["eval_nl"]:
        evaler = DataEvalNL(setup["run_name"],
                            save_images=True,
                            entire_trajectory=False,
                            custom_instr=instructions)
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()

    print("Results:", results)