Exemple #1
0
def train_sureal():
    P.initialize_experiment()
    ctx = mp.get_context("spawn")

    pipe_rl_end, pipe_sup_end = ctx.Pipe()

    rlsup_params = P.get_current_parameters()["RLSUP"]
    sim_seed_dataset = rlsup_params.get("sim_seed_dataset")
    run_name = P.get_current_parameters()["Setup"]["run_name"]

    # Trigger rebuild if necessary before going into all the threads and processes
    _ = get_restricted_env_id_lists()
    _ = get_restricted_env_id_lists(full=True)

    if sim_seed_dataset:
        copy_seed_dataset(from_dataset=sim_seed_dataset,
                          to_dataset=rl_dataset_name(run_name))

    if DEBUG_SUP:
        train_supervised_worker(pipe_sup_end)
    elif DEBUG_RL:
        # Start supervised learning in another process. Keep RL in main process.
        sup_proces = ctx.Process(target=train_supervised_worker,
                                 args=[pipe_sup_end])
        sup_proces.start()
        train_rl_worker(pipe_rl_end)
    else:
        rl_process = ctx.Process(target=train_rl_worker, args=[pipe_rl_end])
        sup_proces = ctx.Process(target=train_supervised_worker,
                                 args=[pipe_sup_end])

        rl_process.start()
        sup_proces.start()
def generate_rollout_amt_visualizations():

    setup = P.get_current_parameters()["Setup"]

    dataset_name = setup.get("viz_dataset_name") or get_eval_tmp_dataset_name(
        setup["model"], setup["run_name"])
    print(f"Generating AMT animations for dataset: {dataset_name}")
    pic_domain = "sim"
    data_domain = "real"
    # Some quick params. TODO: Bring this into json
    viz_params = {
        "draw_drone": True,
        "draw_trajectory": True,
        "draw_fov": True,
        "include_vdist": False,
        "include_layer": None,
        "include_instr": False
    }

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    # TODO: Grab the correct env list
    env_list = test_envs

    viz = RolloutVisualizer(resolution=400)
    base_dir = os.path.join(get_rollout_viz_dir(),
                            f"{dataset_name}-{data_domain}")
    os.makedirs(base_dir, exist_ok=True)

    for env_id in env_list:
        try:
            env_data = load_single_env_from_dataset(dataset_name, env_id,
                                                    "supervised")
        except FileNotFoundError as e:
            print(f"Skipping env: {env_id}")
            continue
        if len(env_data) == 0:
            print(f"Skipping env: {env_id}. Rollout exists but is EMPTY!")
            continue
        segs = split_into_segs(env_data)
        for seg in segs:
            seg_idx = seg[0]["seg_idx"]
            seg_name = f"{env_id}:0:{seg_idx}-{data_domain}"
            gif_filename = f"{seg_name}-roll.gif"
            instr_filename = f"{seg_name}-instr.txt"

            # Generate and save gif
            frames = viz.top_down_visualization(env_id, seg_idx, seg,
                                                pic_domain, viz_params)
            print("Saving GIF")
            viz.presenter.save_gif(frames,
                                   os.path.join(base_dir, gif_filename),
                                   fps=5.0)

            # Save instruction
            with open(os.path.join(base_dir, instr_filename), "w") as fp:
                fp.write(seg[0]["instruction"])

        print("ding")
Exemple #3
0
def build_noisy_pose_data():
    """
    Randomly sample pose noise for every observation in every environment for the RSS experiment with noisy poses.
    This needs to be pre-computed once before training to simulate the noise being measured during trajectory collection.
    If we were to randomize poses during training, that would be akin to regularization,
    which could actually improve instead of hurt performance.
    :return:
    """
    initialize_experiment()
    params = get_current_parameters()
    setup_params = params["Setup"]

    train_envs, dev_envs, test_envs = get_restricted_env_id_lists(
        max_envs=setup_params["max_envs"],
        prune_ambiguous=setup_params["prune_ambiguous"])

    envs = dev_envs + train_envs + test_envs
    print("Num envs:" + str(len(envs)))

    pos_noise = params["Data"]["noisy_pos_variance"]
    rot_noise = params["Data"]["noisy_rot_variance"]

    noisy_poses = get_pose_noise_np(setup_params["max_envs"], setup_params["trajectory_length"], pos_noise, rot_noise)
    save_noisy_poses(noisy_poses)
    print("saved noisy poses for " + str(setup_params["max_envs"]) + " envs")
Exemple #4
0
def count_avg_num_steps():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]

    P.get_current_parameters()["Data"]["locking"] = False
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    train_envs = [e for e in train_envs if e >= 6000]

    count_avg_num_steps_on_data(train_envs)
Exemple #5
0
def train_supervised():
    initialize_experiment()

    setup = get_current_parameters()["Setup"]
    supervised_params = get_current_parameters()["Supervised"]
    num_epochs = supervised_params["num_epochs"]

    model, model_loaded = load_model()

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()
    filename = "supervised_" + setup["model"] + "_" + setup["run_name"]
    start_filename = "tmp/" + filename + "_epoch_" + str(
        supervised_params["start_epoch"])
    if supervised_params["start_epoch"] > 0:
        if file_exists(start_filename, ""):
            load_pytorch_model(model, start_filename)
        else:
            print("Couldn't continue training. Model file doesn't exist at:")
            print(start_filename)
            exit(-1)

    trainer = Trainer(model,
                      epoch=supervised_params["start_epoch"],
                      name=setup["model"],
                      run_name=setup["run_name"])

    print("Beginning training...")
    best_test_loss = 1000
    for epoch in range(num_epochs):
        train_loss = trainer.train_epoch(train_data=None,
                                         train_envs=train_envs,
                                         eval=False)
        trainer.model.correct_goals = 0
        trainer.model.total_goals = 0
        test_loss = trainer.train_epoch(train_data=None,
                                        train_envs=dev_envs,
                                        eval=True)

        print("GOALS: ", trainer.model.correct_goals,
              trainer.model.total_goals)

        if test_loss < best_test_loss:
            best_test_loss = test_loss
            save_pytorch_model(trainer.model, filename)
            print("Saved model in:", filename)
        print("Epoch", epoch, "train_loss:", train_loss, "test_loss:",
              test_loss)
        save_pytorch_model(trainer.model,
                           "tmp/" + filename + "_epoch_" + str(epoch))
        if hasattr(trainer.model, "save"):
            trainer.model.save(epoch)
        save_pretrained_weights(trainer.model, setup["run_name"])
Exemple #6
0
def train_supervised_bidomain():
    P.initialize_experiment()

    setup = P.get_current_parameters()["Setup"]
    model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim")
    model_real, _ = load_model(setup["model"], setup["real_model_file"], domain="real")
    model_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"])

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    env_list_name = setup.get("eval_env_set", "dev")
    if env_list_name == "dev":
        print("Using DEV envs")
        use_envs = dev_envs
    elif env_list_name == "train":
        print("Using TRAIN envs")
        use_envs = train_envs
    elif env_list_name == "test":
        print("Using TEST envs")
        use_envs = test_envs
    else:
        raise ValueError(f"Unknown env set {env_list_name}")

    env_range_start = setup.get("env_range_start")
    if env_range_start > 0:
        use_envs = [e for e in use_envs if e >= env_range_start]
    env_range_end = setup.get("env_range_end")
    if env_range_end > 0:
        use_envs = [e for e in use_envs if e < env_range_end]

    restricted_domain = "simulator"
    if restricted_domain == "simulator":
        # Load dummy model for real domain
        model_real, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim")
        model_sim.set_save_path_overlays(True)
    elif restricted_domain == "real":
        # Load dummy model for sim domain
        model_sim, _ = load_model(setup["model"], setup["real_model_file"], domain="real")
        model_real.set_save_path_overlays(True)
    else:
        model_real.set_save_path_overlays(True)
        model_sim.set_save_path_overlays(True)

    trainer = TrainerBidomain(model_real, model_sim, model_critic, epoch=0)
    trainer.train_epoch(env_list=use_envs, eval=True, restricted_domain=restricted_domain)

    if restricted_domain != "simulator":
        model_real.print_metrics()
    if restricted_domain != "real":
        model_sim.print_metrics()
Exemple #7
0
def sample_real_data_subset():
    global env_list, num_env_groups
    P.initialize_experiment()
    if env_list == "DEV":
        train_i, dev_i, test_i = get_restricted_env_id_lists()
        env_list = dev_i
    elif env_list == "TEST":
        train_i, dev_i, test_i = get_restricted_env_id_lists()
        env_list = test_i

    # Each 5 subsequent environments are the same. First sample groups, then sample environments
    groups = set()
    for env in env_list:
        groups.add(int(env/5))

    groups = list(groups)
    group_envs_rel = {}
    pick_groups = random.sample(groups, num_env_groups)
    for group in pick_groups:
        group_envs_rel[group] = []
        i = 0
        while i < envs_each_group:
            rint = random.randint(0,4)
            if rint not in group_envs_rel[group]:
                group_envs_rel[group].append(rint)
                i += 1
            else:
                # Retry this loop iteration
                continue

    env_ids_out = []
    for group, env_rels in group_envs_rel.items():
        for env_rel in env_rels:
            env_id = group * 5 + env_rel
            env_ids_out.append(env_id)

    print(f"Sampled {len(env_ids_out)} envs:")
    print(list(sorted(env_ids_out)))
def collect_supervised_data():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]

    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()  #

    if P.get_current_parameters()["Setup"].get("env_set") == "train":
        print("Collecting data for training envs")
        collect_data_on_env_list(train_envs)
    elif P.get_current_parameters()["Setup"].get("env_set") == "dev":
        print("Collecting data for dev envs")
        collect_data_on_env_list(dev_envs)
    else:
        print("Collecting data for both training and dev envs")
        collect_data_on_env_list(train_envs)
        collect_data_on_env_list(dev_envs)
Exemple #9
0
def train_rl():
    initialize_experiment()

    setup = get_current_parameters()["Setup"]
    params = get_current_parameters()["RL"]

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    filename = "rl_" + setup["model"] + "_" + setup["run_name"]

    trainer = TrainerRL(params=dict_merge(setup, params))

    for start_epoch in range(10000):
        epfname = epoch_filename(filename, start_epoch)
        path = os.path.join(get_model_dir(), str(epfname) + ".pytorch")
        if not os.path.exists(path):
            break

    if start_epoch > 0:
        print(f"CONTINUING RL TRAINING FROM EPOCH: {start_epoch}")
        load_pytorch_model(trainer.full_model,
                           epoch_filename(filename, start_epoch - 1))
        trainer.set_start_epoch(start_epoch)

    print("Beginning training...")
    best_dev_reward = -1e+10
    for epoch in range(start_epoch, 10000):
        train_reward, metrics = trainer.train_epoch(eval=False, envs="train")
        # TODO: Test on just a few dev environments
        # TODO: Take most likely or mean action when testing
        dev_reward, metrics = trainer.train_epoch(eval=True, envs="dev")
        #dev_reward, metrics = trainer.train_epoch(eval=True, envs="dev")
        dev_reward = 0

        #if dev_reward >= best_dev_reward:
        #    best_dev_reward = dev_reward
        #    save_pytorch_model(trainer.full_model, filename)
        #    print("Saved model in:", filename)

        print("Epoch", epoch, "train reward:", train_reward, "dev reward:",
              dev_reward)
        save_pytorch_model(trainer.full_model, epoch_filename(filename, epoch))
        if hasattr(trainer.full_model, "save"):
            trainer.full_model.save(epoch)
Exemple #10
0
def view_collected_data(args):
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists(
        args.max_envs)
    total_len = 0
    count = 0
    maxlen = 0
    for env_id in train_envs:
        print("Showing env id: ", env_id)
        data = load_single_env_supervised_data(env_id)
        for sample in data:
            Presenter().show_sample(sample["state"], sample["action"], 0,
                                    sample["instruction"])
            print("Image size: ", sample["state"].image.shape)
            print("Pose: ", sample["state"].get_pos_3d(),
                  sample["state"].get_rot_euler())
            cv2.waitKey()

        total_len += len(data)
Exemple #11
0
def copy_seed_dataset(from_dataset, to_dataset, seed_count):
    from_dir = get_dataset_dir(from_dataset)
    to_dir = get_dataset_dir(to_dataset)
    if os.path.exists(to_dir):
        print("DATASET EXISTS! Continue where left off?")
        c = input(" (y/n) >>> ")
        if c == "y":
            return
        else:
            raise ValueError(
                f"Not continuing: Dataset {to_dataset} exists. Delete it if you like and try again"
            )
    os.makedirs(to_dir)
    from_files = os.listdir(from_dir)

    train_ids, dev_ids, test_ids = get_restricted_env_id_lists()
    train_ids = set(train_ids)
    dev_ids = set(dev_ids)
    test_ids = set(test_ids)

    file_envs = [
        int(f.split("supervised_train_data_env_")[1]) for f in from_files
    ]
    files_and_envs = list(zip(from_files, file_envs))
    random.shuffle(files_and_envs)

    files_to_copy = []
    train_envs_copied = 0
    for file, env in files_and_envs:
        if env in train_ids and train_envs_copied < seed_count:
            files_to_copy.append(file)
            if env in train_ids:
                train_envs_copied += 1

    print(
        f"Copying {train_envs_copied} train envs, and all dev/test envs from {from_dataset} to {to_dataset}"
    )

    for file in files_to_copy:
        from_path = os.path.join(from_dir, file)
        to_path = os.path.join(to_dir, file)
        shutil.copy(from_path, to_path)
Exemple #12
0
def browse_pvn_dataset():
    P.initialize_experiment()

    setup = P.get_current_parameters()["Setup"]
    model_sim, _ = load_model(setup["model"],
                              setup["sim_model_file"],
                              domain="sim")
    data_params = P.get_current_parameters()["Training"]

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    #dom="real"
    dom = "sim"

    dataset = model_sim.get_dataset(
        data=None,
        envs=train_envs,
        domain=dom,
        dataset_names=data_params[f"{dom}_dataset_names"],
        dataset_prefix="supervised",
        eval=False,
        halfway_only=False)

    p = Presenter()

    for example in dataset:
        if example is None:
            continue
        md = example["md"][0]
        print(
            f"Showing example: {md['env_id']}:{md['set_idx']}:{md['seg_idx']}")
        print(f"  instruction: {md['instruction']}")
        exec_len = len(example["images"])
        for i in range(exec_len):
            print(f"   timestep: {i}")
            img_i = example["images"][i]
            lm_fpv_i = example["lm_pos_fpv"][i]
            if lm_fpv_i is not None:
                img_i = p.plot_pts_on_torch_image(img_i, lm_fpv_i.long())
            p.show_image(img_i, "fpv_img_i", scale=4, waitkey=True)
def sample_human_envs():
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()
    random.shuffle(test_envs)
    human_envs = test_envs[:NUM_ENVS]
    human_envs = sorted(human_envs)
    save_json(human_envs, get_human_eval_envs_path())
Exemple #14
0
    c5 = (charray / 5).astype(np.int64)
    copies = [c5[c] in c5[:c] or c in c5[c+1:] for c in range(len(c5))]
    num_copies = np.asarray(copies).sum()
    print("num_envs_same: ", num_copies)
    if num_copies > 0:
        return False
    return True


if __name__ == "__main__":
    P.initialize_experiment()
    real_data_dir = get_dataset_dir("real")
    files = os.listdir(real_data_dir)

    available_env_ids = set([int(f.split("supervised_train_data_env_")[1]) for f in files])
    train_ids, dev_ids, test_ids = get_restricted_env_id_lists()
    train_ids = set(train_ids)
    dev_ids = set(dev_ids)
    test_ids = set(test_ids)

    avail_train_ids = list(train_ids.intersection(available_env_ids))
    avail_dev_ids = list(dev_ids.intersection(available_env_ids))
    avail_test_ids = list(test_ids.intersection(available_env_ids))

    print(f"Making subsets from total envs: {len(avail_train_ids)}")

    splits_out = {}

    choice = []
    prev_split = splits[0]
    while not first_choice_ok(choice):
Exemple #15
0
def train_top_down_pred(args, max_epoch=SUPERVISED_EPOCHS):
    initialize_experiment(args.run_name, args.setup_name)

    model, model_loaded = load_model()

    # TODO: Get batch size from global parameter server when it exists
    batch_size = 1 if \
        args.model == "top_down" or \
        args.model == "top_down_prior" or \
        args.model == "top_down_sm" or \
        args.model == "top_down_pretrain" or \
        args.model == "top_down_goal_pretrain" or \
        args.model == "top_down_nav" or \
        args.model == "top_down_cond" \
        else BATCH_SIZE

    lr = 0.001  # * batch_size
    trainer = Trainer(model,
                      epoch=args.start_epoch,
                      name=args.model,
                      run_name=args.run_name)

    train_envs, dev_envs, test_envs = get_restricted_env_id_lists(
        max_envs=args.max_envs)

    filename = "top_down_" + args.model + "_" + args.run_name

    if args.restore_weights_name is not None:
        restore_pretrained_weights(model, args.restore_weights_name,
                                   args.fix_restored_weights)

    print("Beginning training...")
    best_test_loss = 1000

    validation_loss = []

    for epoch in range(SUPERVISED_EPOCHS):
        train_loss = -1

        if not args.eval_pretrain:
            train_loss = trainer.train_epoch(train_envs=train_envs, eval=False)

        test_loss = trainer.train_epoch(train_envs=dev_envs, eval=True)
        validation_loss.append([epoch, test_loss])

        if not args.eval_pretrain:
            if test_loss < best_test_loss:
                best_test_loss = test_loss
                save_pytorch_model(trainer.model, filename)
                print("Saved model in:", filename)

            print("Epoch", epoch, "train_loss:", train_loss, "test_loss:",
                  test_loss)
            save_pytorch_model(trainer.model,
                               "tmp/" + filename + "_epoch_" + str(epoch))
            save_pretrained_weights(trainer.model, args.run_name)

        else:
            break

        if max_epoch is not None and epoch > max_epoch:
            print("Reached epoch limit!")
            break

    test_loss_dir = get_model_dir(
    ) + "/test_loss/" + filename + "_test_loss.csv"
    validation_loss = pd.DataFrame(validation_loss,
                                   columns=['epoch', "test_loss"])
    validation_loss.to_csv(test_loss_dir, index=False)
Exemple #16
0
from data_io.instructions import get_restricted_env_id_lists, get_all_instructions
import parameters.parameter_server as P

if __name__ == "__main__":
    P.initialize_experiment()
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()
    train_i, dev_i, test_i, corpus = get_all_instructions()

    train_i_envs = set([int(i) for i in train_i.keys()])

    for test_env in test_envs:
        assert test_env not in train_i_envs, "FAIL"

    for dev_env in test_envs:
        assert dev_env not in train_i_envs, "FAIL"

    print("OK")
Exemple #17
0
def train_supervised_worker(rl_process_conn):
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]
    rlsup = P.get_current_parameters()["RLSUP"]
    setup["trajectory_length"] = setup["sup_trajectory_length"]
    run_name = setup["run_name"]
    supervised_params = P.get_current_parameters()["Supervised"]
    num_epochs = supervised_params["num_epochs"]
    sup_device = rlsup.get("sup_device", "cuda:1")

    model_oracle_critic = None

    print("SUPP: Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    # Load the starter model and save it at epoch 0
    # Supervised worker to use GPU 1, RL will use GPU 0. Simulators run on GPU 2
    model_sim = load_model(setup["sup_model"],
                           setup["sim_model_file"],
                           domain="sim")[0].to(sup_device)
    model_real = load_model(setup["sup_model"],
                            setup["real_model_file"],
                            domain="real")[0].to(sup_device)
    model_critic = load_model(setup["sup_critic_model"],
                              setup["critic_model_file"])[0].to(sup_device)

    # ----------------------------------------------------------------------------------------------------------------

    print("SUPP: Initializing trainer")
    rlsup_params = P.get_current_parameters()["RLSUP"]
    sim_seed_dataset = rlsup_params.get("sim_seed_dataset")

    # TODO: Figure if 6000 or 7000 here
    trainer = TrainerBidomainBidata(model_real,
                                    model_sim,
                                    model_critic,
                                    model_oracle_critic,
                                    epoch=0)
    train_envs_common = [e for e in train_envs if 6000 <= e < 7000]
    train_envs_sim = [e for e in train_envs if e < 7000]
    dev_envs_common = [e for e in dev_envs if 6000 <= e < 7000]
    dev_envs_sim = [e for e in dev_envs if e < 7000]
    sim_datasets = [rl_dataset_name(run_name)]
    real_datasets = ["real"]
    trainer.set_dataset_names(sim_datasets=sim_datasets,
                              real_datasets=real_datasets)

    # ----------------------------------------------------------------------------------------------------------------
    for start_sup_epoch in range(10000):
        epfname = epoch_sup_filename(run_name,
                                     start_sup_epoch,
                                     model="stage1",
                                     domain="sim")
        path = os.path.join(get_model_dir(), str(epfname) + ".pytorch")
        if not os.path.exists(path):
            break
    if start_sup_epoch > 0:
        print(f"SUPP: CONTINUING SUP TRAINING FROM EPOCH: {start_sup_epoch}")
        load_pytorch_model(
            model_real,
            epoch_sup_filename(run_name,
                               start_sup_epoch - 1,
                               model="stage1",
                               domain="real"))
        load_pytorch_model(
            model_sim,
            epoch_sup_filename(run_name,
                               start_sup_epoch - 1,
                               model="stage1",
                               domain="sim"))
        load_pytorch_model(
            model_critic,
            epoch_sup_filename(run_name,
                               start_sup_epoch - 1,
                               model="critic",
                               domain="critic"))
        trainer.set_start_epoch(start_sup_epoch)

    # ----------------------------------------------------------------------------------------------------------------
    print("SUPP: Beginning training...")
    for epoch in range(start_sup_epoch, num_epochs):
        # Tell the RL process that a new Stage 1 model is ready for loading
        print("SUPP: Sending model to RL")
        model_sim.reset()
        rl_process_conn.send(
            ["stage1_model_state_dict",
             model_sim.state_dict()])
        if DEBUG_RL:
            while True:
                sleep(1)

        if not sim_seed_dataset:
            ddir = get_dataset_dir(rl_dataset_name(run_name))
            os.makedirs(ddir, exist_ok=True)
            while len(os.listdir(ddir)) < 20:
                print("SUPP: Waiting for rollouts to appear")
                sleep(3)

        print("SUPP: Beginning Epoch")
        train_loss = trainer.train_epoch(env_list_common=train_envs_common,
                                         env_list_sim=train_envs_sim,
                                         eval=False)
        test_loss = trainer.train_epoch(env_list_common=dev_envs_common,
                                        env_list_sim=dev_envs_sim,
                                        eval=True)
        print("SUPP: Epoch", epoch, "train_loss:", train_loss, "test_loss:",
              test_loss)
        save_pytorch_model(
            model_real,
            epoch_sup_filename(run_name, epoch, model="stage1", domain="real"))
        save_pytorch_model(
            model_sim,
            epoch_sup_filename(run_name, epoch, model="stage1", domain="sim"))
        save_pytorch_model(
            model_critic,
            epoch_sup_filename(run_name,
                               epoch,
                               model="critic",
                               domain="critic"))
Exemple #18
0
def generate_rollout_debug_visualizations():
    setup = P.get_current_parameters()["Setup"]

    dataset_name = setup.get("viz_dataset_name") or get_eval_tmp_dataset_name(setup["model"], setup["run_name"])
    domain = setup.get("viz_domain") or ("real" if setup.get("real_drone") else "sim")
    run_name = setup.get("original_run_name") or setup.get("run_name")
    specific_envs = setup.get("only_specific_envs")

    # For collecting information for visualization examples
    specific_segments = [
        # running example
        (6827, 0, 4),
        # successful examples
        (6169, 0, 9),
        (6825, 0, 8),
        (6857, 0, 9),
        # failure examples
        (6169, 0, 2),
        (6299, 0, 9),
        (6634, 0, 8),
        (6856, 0, 9),
        (6857, 0, 8),
    ]
    specific_segments += [
        # good sim, lousy real
        (6419, 0, 5),
        (6569, 0, 6),
        (6634, 0, 6),
        (6917, 0, 7),
    ]
    specific_envs = [s[0] for s in specific_segments]

    # Generate all
    #specific_envs = list(range(6000, 7000, 1))
    #specific_segments = None

    # Some quick params. TODO: Bring this into json
    viz_params = {
        "ego_vdist": False,
        "draw_landmarks": False,
        "draw_topdown": True,
        "draw_drone": True,
        "draw_trajectory": True,
        "draw_fov": False,
        "include_vdist": False,
        "include_layer": None,
        "include_instr": False
    }

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    # TODO: Grab the correct env list
    env_list = dev_envs

    viz = RolloutVisualizer(resolution=576)
    base_dir = os.path.join(get_rollout_debug_viz_dir(), f"{dataset_name}-{domain}")
    os.makedirs(base_dir, exist_ok=True)

    for env_id in env_list:
        if specific_envs and env_id not in specific_envs:
            print("Skipping", env_id)
            continue
        try:
            env_data = load_single_env_from_dataset(dataset_name, env_id, "supervised")
        except FileNotFoundError as e:
            print(f"Skipping env: {env_id}")
            continue
        if len(env_data) == 0:
            print(f"Skipping env: {env_id}. Rollout exists but is EMPTY!")
            continue
        segs = split_into_segs(env_data)
        for seg in segs:
            lag_start = 1.5
            end_lag = 1.5
            seg_idx = seg[0]["seg_idx"]
            if specific_segments and (env_id, 0, seg_idx) not in specific_segments:
                continue
            seg_name = f"{env_id}:0:{seg_idx}-{domain}"
            gif_filename = f"{seg_name}-roll"
            instr_filename = f"{seg_name}-instr.txt"
            this_dir = os.path.join(base_dir, gif_filename)
            os.makedirs(this_dir, exist_ok=True)
            base_path = os.path.join(this_dir, gif_filename)
            if os.path.exists(os.path.join(this_dir, instr_filename)):
                continue

            # Animation with just the drone
            frames = viz.top_down_visualization(env_id, seg_idx, seg, domain, viz_params)
            save_frames(viz, frames, f"{base_path}-exec", fps=5.0, start_lag=lag_start, end_lag=end_lag, formats=Y_FMT)

            # Save instructionto
            with open(os.path.join(this_dir, instr_filename), "w") as fp:
                fp.write(seg[0]["instruction"])

            # Animation of action
            frames = viz.action_visualization(env_id, seg_idx, seg, domain, "action")
            save_frames(viz, frames, f"{base_path}-action", fps=5.0, start_lag=lag_start, end_lag=end_lag)

            # Animation of actions
            #action_frames = viz.grab_frames(env_id, seg_idx, seg, domain, "action", scale=4)
            #save_frames(viz, action_frames, f"{base_path}-action", fps=5.0, start_lag=lag_start, end_lag=end_lag)

            # Generate and save gif
            # Bare top-down view
            mod_params = deepcopy(viz_params)
            mod_params["draw_drone"] = False
            mod_params["draw_trajectory"] = False
            frames = viz.top_down_visualization(env_id, seg_idx, seg, domain, mod_params)
            save_frames(viz, frames, f"{base_path}-top-down", fps=5.0, start_lag=lag_start, end_lag=end_lag)

            mod_params["draw_drone"] = True
            mod_params["draw_trajectory"] = False
            frames = viz.top_down_visualization(env_id, seg_idx, seg, domain, mod_params)
            save_frames(viz, frames, f"{base_path}-top-down-drn", fps=5.0, start_lag=lag_start, end_lag=end_lag)

            # Egocentric visitation distributions
            vdist_r_frames = viz.grab_frames(env_id, seg_idx, seg, domain, "v_dist_r_inner")
            save_frames(viz, vdist_r_frames, f"{base_path}-ego-vdist", fps=5.0, start_lag=lag_start, end_lag=end_lag)

            # Map struct
            map_struct_frames = viz.grab_frames(env_id, seg_idx, seg, domain, "map_struct")
            save_frames(viz, map_struct_frames, f"{base_path}-ego-map-struct", fps=5.0, start_lag=lag_start, end_lag=end_lag)

            # Egocentric observation mask
            ego_obs_mask_frames = viz.grab_frames(env_id, seg_idx, seg, domain, "ego_obs_mask")
            save_frames(viz, ego_obs_mask_frames, f"{base_path}-ego-obs-mask", fps=5.0, start_lag=lag_start, end_lag=end_lag)

            def save_map_permutations(file_prefix, incl_layer):
                mod_params = deepcopy(viz_params)
                if incl_layer == "vdist":
                    mod_params["include_vdist"] = True
                else:
                    mod_params["include_layer"] = incl_layer
                print(f"GENERATING: {file_prefix}")
                # Non-overlaid, without trajectory
                mod_params["draw_drone"] = False
                mod_params["draw_topdown"] = False
                mod_params["draw_trajectory"] = False
                frames = viz.top_down_visualization(env_id, seg_idx, seg, domain, mod_params)
                save_frames(viz, frames, f"{file_prefix}", fps=5.0, start_lag=lag_start, end_lag=end_lag, formats=Y_FMT)

                print(f"GENERATING: {file_prefix}-ov")
                # Overlaid, without trajectory
                mod_params["draw_topdown"] = True
                frames = viz.top_down_visualization(env_id, seg_idx, seg, domain, mod_params)
                save_frames(viz, frames, f"{file_prefix}-ov", fps=5.0, start_lag=lag_start, end_lag=end_lag, formats=D_FMT)

                print(f"GENERATING: {file_prefix}-ov-path")
                # Overlaid, with trajectory
                mod_params["draw_drone"] = True
                mod_params["draw_trajectory"] = True
                frames = viz.top_down_visualization(env_id, seg_idx, seg, domain, mod_params)
                save_frames(viz, frames, f"{file_prefix}-ov-path", fps=5.0, start_lag=lag_start, end_lag=end_lag, formats=Y_FMT)

                print(f"GENERATING: {file_prefix}-path")
                # Non-overlaid, with trajectory
                mod_params["draw_topdown"] = False
                mod_params["draw_drone"] = True
                mod_params["draw_trajectory"] = True
                frames = viz.top_down_visualization(env_id, seg_idx, seg, domain, mod_params)
                save_frames(viz, frames, f"{file_prefix}-path", fps=5.0, start_lag=lag_start, end_lag=end_lag, formats=D_FMT)

            save_map_permutations(f"{base_path}-vdist", "vdist")

            save_map_permutations(f"{base_path}-semantic-map", "S_W")

            save_map_permutations(f"{base_path}-semantic-map-gray", "S_W_Gray")

            save_map_permutations(f"{base_path}-proj-features", "F_W")

            save_map_permutations(f"{base_path}-grounding-map", "R_W")

            save_map_permutations(f"{base_path}-grounding-map-gray", "R_W_Gray")

            save_map_permutations(f"{base_path}-mask", "M_W")

            save_map_permutations(f"{base_path}-accum-mask", "M_W_accum")

            save_map_permutations(f"{base_path}-accum-mask-inv", "M_W_accum_inv")

            # Animation of FPV features
            fpv_feature_frames = viz.grab_frames(env_id, seg_idx, seg, domain, "F_C")
            save_frames(viz, fpv_feature_frames, f"{base_path}-features-fpv", fps=5.0, start_lag=lag_start, end_lag=end_lag)

            # Animation of FPV images
            fpv_image_frames = viz.grab_frames(env_id, seg_idx, seg, domain, "image", scale=4)
            save_frames(viz, fpv_image_frames, f"{base_path}-image", fps=5.0, start_lag=lag_start, end_lag=end_lag)

            frames = viz.overlay_frames(fpv_image_frames, fpv_feature_frames)
            save_frames(viz, frames, f"{base_path}-features-fpv-ov", fps=5.0, start_lag=lag_start, end_lag=end_lag)

            num_frames = len(frames)

            # Clip rollout videos to correct rollout duration and re-save
            rollout_dir = get_rollout_video_dir(run_name=run_name)
            if os.path.isdir(rollout_dir):
                print("Processing rollout videos")
                actual_rollout_duration = num_frames / 5.0
                ceiling_clip = viz.load_video_clip(env_id, seg_idx, seg, domain, "ceiling", rollout_dir)
                duration_with_lag = lag_start + actual_rollout_duration + end_lag
                try:
                    if ceiling_clip is not None:
                        if ceiling_clip.duration > duration_with_lag:
                            start = ceiling_clip.duration - end_lag - duration_with_lag
                            ceiling_clip = ceiling_clip.cutout(0, start)
                            #ceiling_clip = ceiling_clip.cutout(duration_with_lag, ceiling_clip.duration)
                        save_frames(viz, ceiling_clip, f"{base_path}-ceiing_cam-clipped", fps=ceiling_clip.fps)
                    corner_clip = viz.load_video_clip(env_id, seg_idx, seg, domain, "corner", rollout_dir)
                    if corner_clip is not None:
                        if corner_clip.duration > actual_rollout_duration + end_lag:
                            start = corner_clip.duration - end_lag - duration_with_lag
                            corner_clip = corner_clip.cutout(0, start)
                            #corner_clip = corner_clip.cutout(duration_with_lag, corner_clip.duration)
                        save_frames(viz, corner_clip, f"{base_path}-corner_cam-clipped", fps=corner_clip.fps)
                except Exception as e:
                    print("Video encoding error! Copying manually")
                    print(e)

                try:
                    in_ceil_file = os.path.join(rollout_dir, f"rollout_ceiling_{env_id}-0-{seg_idx}.mkv")
                    in_corn_file = os.path.join(rollout_dir, f"rollout_corner_{env_id}-0-{seg_idx}.mkv")
                    out_ceil_file = f"{base_path}-ceiling_cam-full.mkv"
                    out_corn_file = f"{base_path}-corner_cam-full.mkv"
                    shutil.copy(in_ceil_file, out_ceil_file)
                    shutil.copy(in_corn_file, out_corn_file)
                except Exception as e:
                    print("Failed copying videos! SKipping")

        print("ding")
Exemple #19
0
def train_dagger_simple():
    # ----------------------------------------------------------------------------------------------------------------
    # Load params and configure stuff

    P.initialize_experiment()
    params = P.get_current_parameters()["SimpleDagger"]
    setup = P.get_current_parameters()["Setup"]
    num_iterations = params["num_iterations"]
    sim_seed_dataset = params.get("sim_seed_dataset")
    run_name = setup["run_name"]
    device = params.get("device", "cuda:1")
    dataset_limit = params.get("dataset_size_limit_envs")
    seed_count = params.get("seed_count")

    # Trigger rebuild if necessary before going into all the threads and processes
    _ = get_restricted_env_id_lists(full=True)

    # Initialize the dataset
    if sim_seed_dataset:
        copy_seed_dataset(from_dataset=sim_seed_dataset,
                          to_dataset=dagger_dataset_name(run_name),
                          seed_count=seed_count or dataset_limit)
        gap = 0
    else:
        # TODO: Refactor this into a prompt function
        data_path = get_dataset_dir(dagger_dataset_name(run_name))
        if os.path.exists(data_path):
            print("DATASET EXISTS! Continue where left off?")
            c = input(" (y/n) >>> ")
            if c != "y":
                raise ValueError(
                    f"Not continuing: Dataset {data_path} exists. Delete it if you like and try again"
                )
        else:
            os.makedirs(data_path, exist_ok=True)
        gap = dataset_limit - len(os.listdir(data_path))

    print("SUPP: Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    # ----------------------------------------------------------------------------------------------------------------
    # Load / initialize model

    model = load_model(setup["model"], setup["model_file"],
                       domain="sim")[0].to(device)
    oracle = load_model("oracle")[0]

    # ----------------------------------------------------------------------------------------------------------------
    # Continue where we left off - load the model and set the iteration/epoch number

    for start_iteration in range(10000):
        epfname = epoch_dag_filename(run_name, start_iteration)
        path = os.path.join(get_model_dir(), str(epfname) + ".pytorch")
        if not os.path.exists(path):
            break
    if start_iteration > 0:
        print(
            f"DAG: CONTINUING DAGGER TRAINING FROM ITERATION: {start_iteration}"
        )
        load_pytorch_model(model,
                           epoch_dag_filename(run_name, start_iteration - 1))

    # ----------------------------------------------------------------------------------------------------------------
    # Intialize trainer

    trainer = Trainer(model,
                      epoch=start_iteration,
                      name=setup["model"],
                      run_name=setup["run_name"])
    trainer.set_dataset_names([dagger_dataset_name(run_name)])

    # ----------------------------------------------------------------------------------------------------------------
    # Initialize policy roller

    roller = SimpleParallelPolicyRoller(
        num_workers=params["num_workers"],
        device=params["device"],
        policy_name=setup["model"],
        policy_file=setup["model_file"],
        oracle=oracle,
        dataset_save_name=dagger_dataset_name(run_name),
        no_reward=True)
    rollout_sampler = RolloutSampler(roller)

    # ----------------------------------------------------------------------------------------------------------------
    # Train DAgger - loop over iteartions, in each, prune, rollout and train an epoch

    print("SUPP: Beginning training...")
    for iteration in range(start_iteration, num_iterations):
        print(f"DAG: Starting iteration {iteration}")

        # Remove extra rollouts to keep within DAggerFM limit
        prune_dataset(run_name, dataset_limit)

        # Rollout and collect more data for training and evaluation
        policy_state = model.get_policy_state()
        rollout_sampler.sample_n_rollouts(
            n=gap if iteration == 0 else params["train_envs_per_iteration"],
            policy_state=policy_state,
            sample=False,
            envs="train",
            dagger_beta=dagger_beta(params, iteration))

        eval_rollouts = rollout_sampler.sample_n_rollouts(
            n=params["eval_envs_per_iteration"],
            policy_state=policy_state,
            sample=False,
            envs="dev",
            dagger_beta=0)

        # Kill airsim instances so that they don't take up GPU memory and in general slow things down during training
        roller.kill_airsim()

        # Evaluate success / metrics and save to tensorboard
        if setup["eval_nl"]:
            evaler = DataEvalNL(run_name,
                                entire_trajectory=False,
                                save_images=False)
            evaler.evaluate_dataset(eval_rollouts)
            results = evaler.get_results()
            print("Results:", results)
            evaler.write_summaries(setup["run_name"], "dagger_eval", iteration)

        # Do one epoch of supervised training
        print("SUPP: Beginning Epoch")
        train_loss = trainer.train_epoch(train_envs=train_envs, eval=False)
        #test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True)

        # Save the model to file
        print("SUPP: Epoch", iteration, "train_loss:", train_loss)
        save_pytorch_model(model, epoch_dag_filename(run_name, iteration))
def train_supervised_bidomain():
    P.initialize_experiment()

    setup = P.get_current_parameters()["Setup"]
    supervised_params = P.get_current_parameters()["Supervised"]
    num_epochs = supervised_params["num_epochs"]

    model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim")
    model_real, _ = load_model(setup["model"], setup["real_model_file"], domain="real")
    model_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"])

    if P.get_current_parameters()["Training"].get("use_oracle_critic", False):
        model_oracle_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"])
        # This changes the name in the summary writer to get a different color plot
        oname = model_oracle_critic.model_name
        model_oracle_critic.set_model_name(oname + "_oracle")
        model_oracle_critic.model_name = oname
    else:
        model_oracle_critic = None

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    real_filename = f"supervised_{setup['model']}_{setup['run_name']}_real"
    sim_filename  = f"supervised_{setup['model']}_{setup['run_name']}_sim"
    critic_filename = f"supervised_{setup['critic_model']}_{setup['run_name']}_critic"

    # TODO: (Maybe) Implement continuing of training

    # Bidata means that we treat Lani++ and LaniOriginal examples differently, only computing domain-adversarial stuff on Lani++
    bidata = P.get_current_parameters()["Training"].get("bidata", False)
    if bidata == "v2":
        trainer = TrainerBidomainBidata(model_real, model_sim, model_critic, model_oracle_critic, epoch=0)
        train_envs_common = [e for e in train_envs if 6000 <= e < 7000]
        train_envs_sim = train_envs
        dev_envs_common = [e for e in dev_envs if 6000 <= e < 7000]
        dev_envs_sim = dev_envs
    elif bidata:
        trainer = TrainerBidomainBidata(model_real, model_sim, model_critic, model_oracle_critic, epoch=0)
        train_envs_common = [e for e in train_envs if 6000 <= e < 7000]
        train_envs_sim = [e for e in train_envs if e < 6000]
        dev_envs_common = [e for e in dev_envs if 6000 <= e < 7000]
        dev_envs_sim = [e for e in dev_envs if e < 6000]
    else:
        trainer = TrainerBidomain(model_real, model_sim, model_critic, model_oracle_critic, epoch=0)

    print("Beginning training...")
    best_test_loss = 1000
    for epoch in range(num_epochs):
        if bidata:
            train_loss = trainer.train_epoch(env_list_common=train_envs_common, env_list_sim=train_envs_sim, eval=False)
            test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True)
        else:
            train_loss = trainer.train_epoch(env_list=train_envs, eval=False)
            test_loss = trainer.train_epoch(env_list=dev_envs, eval=True)

        if test_loss < best_test_loss:
            best_test_loss = test_loss
            save_pytorch_model(model_real, real_filename)
            save_pytorch_model(model_sim, sim_filename)
            save_pytorch_model(model_critic, critic_filename)
            print(f"Saved models in: \n Real: {real_filename} \n Sim: {sim_filename} \n Critic: {critic_filename}")

        print ("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss)
        save_pytorch_model(model_real, f"tmp/{real_filename}_epoch_{epoch}")
        save_pytorch_model(model_sim, f"tmp/{sim_filename}_epoch_{epoch}")
        save_pytorch_model(model_critic, f"tmp/{critic_filename}_epoch_{epoch}")