Exemple #1
0
def load_latest_model(latest_model_filename):
    # If we retrain every iteration, don't load previously trained model, but train from scratch
    setup = P.get_current_parameters()["Setup"]
    if PARAMS["retrain_every_iteration"]:
        model, model_loaded = load_model(model_file_override="reset")
    # Otherwise load the latest model
    else:
        model, model_loaded = load_model(model_file_override=latest_model_filename)
    return model, model_loaded
Exemple #2
0
def load_dagger_model(latest_model_filename):
    setup = P.get_current_parameters()["Setup"]
    # Load and re-save the model to the continuously updated dagger filename
    if PARAMS["restore_latest"]:
        print("Loading latest model: ", latest_model_filename)
        model, model_loaded = load_model(model_file_override=latest_model_filename)
    elif PARAMS["restore"] == 0 or setup["restore_data_only"]:
        model, model_loaded = load_model()
    elif setup["restore"] > 0 and not setup["restore_data_only"]:
        model_name = get_model_filename_at_iteration(setup, setup["dagger_restore"] - 1)
        model, model_loaded = load_model(model_file_override=model_name)
    return model
Exemple #3
0
def train_supervised_bidomain():
    P.initialize_experiment()

    setup = P.get_current_parameters()["Setup"]
    model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim")
    model_real, _ = load_model(setup["model"], setup["real_model_file"], domain="real")
    model_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"])

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    env_list_name = setup.get("eval_env_set", "dev")
    if env_list_name == "dev":
        print("Using DEV envs")
        use_envs = dev_envs
    elif env_list_name == "train":
        print("Using TRAIN envs")
        use_envs = train_envs
    elif env_list_name == "test":
        print("Using TEST envs")
        use_envs = test_envs
    else:
        raise ValueError(f"Unknown env set {env_list_name}")

    env_range_start = setup.get("env_range_start")
    if env_range_start > 0:
        use_envs = [e for e in use_envs if e >= env_range_start]
    env_range_end = setup.get("env_range_end")
    if env_range_end > 0:
        use_envs = [e for e in use_envs if e < env_range_end]

    restricted_domain = "simulator"
    if restricted_domain == "simulator":
        # Load dummy model for real domain
        model_real, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim")
        model_sim.set_save_path_overlays(True)
    elif restricted_domain == "real":
        # Load dummy model for sim domain
        model_sim, _ = load_model(setup["model"], setup["real_model_file"], domain="real")
        model_real.set_save_path_overlays(True)
    else:
        model_real.set_save_path_overlays(True)
        model_sim.set_save_path_overlays(True)

    trainer = TrainerBidomain(model_real, model_sim, model_critic, epoch=0)
    trainer.train_epoch(env_list=use_envs, eval=True, restricted_domain=restricted_domain)

    if restricted_domain != "simulator":
        model_real.print_metrics()
    if restricted_domain != "real":
        model_sim.print_metrics()
Exemple #4
0
def test_rollout_sampler():
    policy, _ = load_model("pvn_full_bidomain")
    policy_state = policy.get_policy_state()
    from visualization import Presenter

    #roller = SimplePolicyRoller(policy_factory)
    roller = SimpleParallelPolicyRoller("pvn_full_bidomain", num_workers=4)
    rollout_sampler = RolloutSampler(roller)

    # TODO: Load some policy
    print("Sampling once")
    rollouts = rollout_sampler.sample_n_rollouts(12, policy_state)

    print("Sampling twice")
    rollouts += rollout_sampler.sample_n_rollouts(12, policy_state)

    print("Sampling thrice")
    rollouts += rollout_sampler.sample_n_rollouts(12, policy_state)

    for rollout in rollouts:
        print("Visualizing rollout")
        for sample in rollout:
            state = sample["state"]
            image = state.get_rgb_image()
            Presenter().show_image(image, "fpv", waitkey=True, scale=4)
        print("Done!")

    roller.__exit__()
    print("ding")
Exemple #5
0
def train_supervised():
    initialize_experiment()

    setup = get_current_parameters()["Setup"]
    supervised_params = get_current_parameters()["Supervised"]
    num_epochs = supervised_params["num_epochs"]

    model, model_loaded = load_model()

    print("Loading data")
    train_envs, dev_envs, test_envs = get_all_env_id_lists(max_envs=setup["max_envs"])

    if "split_train_data" in supervised_params and supervised_params["split_train_data"]:
        split_name = supervised_params["train_data_split"]
        split = load_env_split()[split_name]
        train_envs = [env_id for env_id in train_envs if env_id in split]
        print("Using " + str(len(train_envs)) + " envs from dataset split: " + split_name)

    filename = "supervised_" + setup["model"] + "_" + setup["run_name"]
    start_filename = "tmp/" + filename + "_epoch_" + str(supervised_params["start_epoch"])
    if supervised_params["start_epoch"] > 0:
        if file_exists(start_filename):
            load_pytorch_model(model, start_filename)
        else:
            print("Couldn't continue training. Model file doesn't exist at:")
            print(start_filename)
            exit(-1)

    if setup["restore_weights_name"]:
        restore_pretrained_weights(model, setup["restore_weights_name"], setup["fix_restored_weights"])

    trainer = Trainer(model, epoch=supervised_params["start_epoch"], name=setup["model"], run_name=setup["run_name"])

    print("Beginning training...")
    best_test_loss = 1000
    for epoch in range(num_epochs):
        train_loss = trainer.train_epoch(train_data=None, train_envs=train_envs, eval=False)

        trainer.model.correct_goals = 0
        trainer.model.total_goals = 0

        test_loss = trainer.train_epoch(train_data=None, train_envs=dev_envs, eval=True)

        print("GOALS: ", trainer.model.correct_goals, trainer.model.total_goals)

        if test_loss < best_test_loss:
            best_test_loss = test_loss
            save_pytorch_model(trainer.model, filename)
            print("Saved model in:", filename)
        print ("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss)
        save_pytorch_model(trainer.model, "tmp/" + filename + "_epoch_" + str(epoch))
        if hasattr(trainer.model, "save"):
            trainer.model.save(epoch)
        save_pretrained_weights(trainer.model, setup["run_name"])
Exemple #6
0
    def loadPolicy(self):
        P.initialize_experiment(self.setup_name)
        self.params = P.get_current_parameters()["Rollout"]
        if self.model_name is not None:
            print("RollOutParams loading model")
            print("Use cuda: " + str(self.cuda))
            self.policy, self.policy_loaded = \
                load_model(model_file_override=self.model_file)

            self.use_policy = True
            if self.policy is not None:
                print("Loaded policy: ", self.model_name)
            else:
                print("Error loading policy: ", self.model_name)
        else:
            print("Error! Requested loadPolicy, but model_name is None!")
        return self
Exemple #7
0
def evaluate_top_down_pred():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]

    model, model_loaded = load_model()

    eval_envs = get_correct_eval_env_id_list()

    dataset_name = P.get_current_parameters().get("Data").get("dataset_name")
    dataset = model.get_dataset(envs=eval_envs,
                                dataset_prefix=dataset_name,
                                dataset_prefix="supervised",
                                eval=eval)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            pin_memory=False)

    total_loss = 0
    count = 0
    num_batches = len(dataloader)
    for b, batch in enumerate(dataloader):
        loss_var = model.sup_loss_on_batch(batch, eval=True, viz=True)
        total_loss += loss_var.data[0]
        count += 1
        print("batch: " + str(b) + " / " + str(num_batches) + \
              " loss: " + str(loss_var.data[0]))
    avg_loss = total_loss / count

    results_dir = get_results_dir(setup["run_name"])
    results_json_path = get_results_path(setup["run_name"])
    os.makedirs(results_dir, exist_ok=True)

    viz = model.get_viz()
    for key, lst in viz.items():
        for i, img in enumerate(lst):
            img_path = os.path.join(
                results_dir, key + str(i) + "_" + setup["model"] + ".jpg")
            sp.misc.imsave(img_path, img)
            print("Saved image: " + img_path)

    with open(results_json_path, "w") as fp:
        json.dump({"loss": avg_loss}, fp)
Exemple #8
0
def browse_pvn_dataset():
    P.initialize_experiment()

    setup = P.get_current_parameters()["Setup"]
    model_sim, _ = load_model(setup["model"],
                              setup["sim_model_file"],
                              domain="sim")
    data_params = P.get_current_parameters()["Training"]

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    #dom="real"
    dom = "sim"

    dataset = model_sim.get_dataset(
        data=None,
        envs=train_envs,
        domain=dom,
        dataset_names=data_params[f"{dom}_dataset_names"],
        dataset_prefix="supervised",
        eval=False,
        halfway_only=False)

    p = Presenter()

    for example in dataset:
        if example is None:
            continue
        md = example["md"][0]
        print(
            f"Showing example: {md['env_id']}:{md['set_idx']}:{md['seg_idx']}")
        print(f"  instruction: {md['instruction']}")
        exec_len = len(example["images"])
        for i in range(exec_len):
            print(f"   timestep: {i}")
            img_i = example["images"][i]
            lm_fpv_i = example["lm_pos_fpv"][i]
            if lm_fpv_i is not None:
                img_i = p.plot_pts_on_torch_image(img_i, lm_fpv_i.long())
            p.show_image(img_i, "fpv_img_i", scale=4, waitkey=True)
Exemple #9
0
    def __init__(self, policy_name, policy_file, num_workers,
                 oracle=None, device=None, dataset_save_name="", restart_every_n=1000,
                 no_reward=False):
        self.num_workers = num_workers
        self.processes = []
        self.connections = []
        self.policy_name = policy_name
        self.shared_policy, _ = load_model(policy_name, policy_file)
        self.shared_policy.make_picklable()
        self.shared_policy = self.shared_policy.to(device)
        self.device = device
        self.dataset_save_name = dataset_save_name
        self.restart_every_n = restart_every_n
        self.rollout_num = 0
        self.no_reward = no_reward

        for i in range(self.num_workers):
            ctx = mp.get_context("spawn")
            parent_conn, child_conn = ctx.Pipe()
            print(f"LAUNCHING WORKER {i}")
            p = ctx.Process(target=worker_process, args=(child_conn, i, dataset_save_name, self.shared_policy, oracle, device, no_reward))
            self.processes.append(p)
            self.connections.append(parent_conn)
            p.start()
Exemple #10
0
def train_supervised_worker(rl_process_conn):
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]
    rlsup = P.get_current_parameters()["RLSUP"]
    setup["trajectory_length"] = setup["sup_trajectory_length"]
    run_name = setup["run_name"]
    supervised_params = P.get_current_parameters()["Supervised"]
    num_epochs = supervised_params["num_epochs"]
    sup_device = rlsup.get("sup_device", "cuda:1")

    model_oracle_critic = None

    print("SUPP: Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    # Load the starter model and save it at epoch 0
    # Supervised worker to use GPU 1, RL will use GPU 0. Simulators run on GPU 2
    model_sim = load_model(setup["sup_model"],
                           setup["sim_model_file"],
                           domain="sim")[0].to(sup_device)
    model_real = load_model(setup["sup_model"],
                            setup["real_model_file"],
                            domain="real")[0].to(sup_device)
    model_critic = load_model(setup["sup_critic_model"],
                              setup["critic_model_file"])[0].to(sup_device)

    # ----------------------------------------------------------------------------------------------------------------

    print("SUPP: Initializing trainer")
    rlsup_params = P.get_current_parameters()["RLSUP"]
    sim_seed_dataset = rlsup_params.get("sim_seed_dataset")

    # TODO: Figure if 6000 or 7000 here
    trainer = TrainerBidomainBidata(model_real,
                                    model_sim,
                                    model_critic,
                                    model_oracle_critic,
                                    epoch=0)
    train_envs_common = [e for e in train_envs if 6000 <= e < 7000]
    train_envs_sim = [e for e in train_envs if e < 7000]
    dev_envs_common = [e for e in dev_envs if 6000 <= e < 7000]
    dev_envs_sim = [e for e in dev_envs if e < 7000]
    sim_datasets = [rl_dataset_name(run_name)]
    real_datasets = ["real"]
    trainer.set_dataset_names(sim_datasets=sim_datasets,
                              real_datasets=real_datasets)

    # ----------------------------------------------------------------------------------------------------------------
    for start_sup_epoch in range(10000):
        epfname = epoch_sup_filename(run_name,
                                     start_sup_epoch,
                                     model="stage1",
                                     domain="sim")
        path = os.path.join(get_model_dir(), str(epfname) + ".pytorch")
        if not os.path.exists(path):
            break
    if start_sup_epoch > 0:
        print(f"SUPP: CONTINUING SUP TRAINING FROM EPOCH: {start_sup_epoch}")
        load_pytorch_model(
            model_real,
            epoch_sup_filename(run_name,
                               start_sup_epoch - 1,
                               model="stage1",
                               domain="real"))
        load_pytorch_model(
            model_sim,
            epoch_sup_filename(run_name,
                               start_sup_epoch - 1,
                               model="stage1",
                               domain="sim"))
        load_pytorch_model(
            model_critic,
            epoch_sup_filename(run_name,
                               start_sup_epoch - 1,
                               model="critic",
                               domain="critic"))
        trainer.set_start_epoch(start_sup_epoch)

    # ----------------------------------------------------------------------------------------------------------------
    print("SUPP: Beginning training...")
    for epoch in range(start_sup_epoch, num_epochs):
        # Tell the RL process that a new Stage 1 model is ready for loading
        print("SUPP: Sending model to RL")
        model_sim.reset()
        rl_process_conn.send(
            ["stage1_model_state_dict",
             model_sim.state_dict()])
        if DEBUG_RL:
            while True:
                sleep(1)

        if not sim_seed_dataset:
            ddir = get_dataset_dir(rl_dataset_name(run_name))
            os.makedirs(ddir, exist_ok=True)
            while len(os.listdir(ddir)) < 20:
                print("SUPP: Waiting for rollouts to appear")
                sleep(3)

        print("SUPP: Beginning Epoch")
        train_loss = trainer.train_epoch(env_list_common=train_envs_common,
                                         env_list_sim=train_envs_sim,
                                         eval=False)
        test_loss = trainer.train_epoch(env_list_common=dev_envs_common,
                                        env_list_sim=dev_envs_sim,
                                        eval=True)
        print("SUPP: Epoch", epoch, "train_loss:", train_loss, "test_loss:",
              test_loss)
        save_pytorch_model(
            model_real,
            epoch_sup_filename(run_name, epoch, model="stage1", domain="real"))
        save_pytorch_model(
            model_sim,
            epoch_sup_filename(run_name, epoch, model="stage1", domain="sim"))
        save_pytorch_model(
            model_critic,
            epoch_sup_filename(run_name,
                               epoch,
                               model="critic",
                               domain="critic"))
Exemple #11
0
def multiple_eval_rollout():

    params, system_namespaces = setup_parameter_namespaces()
    setup_overlay = params["MultipleEval"]["SetupOverlay"]
    domain = "real" if setup_overlay["real_drone"] else "sim"
    one_at_a_time = params["MultipleEval"]["one_at_a_time"]
    check_and_prompt_if_data_exists(system_namespaces)

    # Load the systems
    # TODO: Check how many can fit in GPU memory. If not too many, perhaps we can move them off-GPU between rounds
    policies = []
    for system_namespace in system_namespaces:
        P.switch_to_namespace(system_namespace)
        setup = P.get_current_parameters()["Setup"]
        policy, _ = load_model(setup["model"], setup["model_file"], domain)
        policies.append(policy)

    # ----------------------------------------------------------------------------------------
    # Initialize Roller
    # ----------------------------------------------------------------------------------------
    policy_roller = SimplePolicyRoller(instance_id=7,
                                       real_drone=setup_overlay["real_drone"],
                                       policy=None,
                                       oracle=None,
                                       no_reward=True)

    # ----------------------------------------------------------------------------------------
    # Collect rollouts
    # ----------------------------------------------------------------------------------------

    eval_envs = list(sorted(get_correct_eval_env_id_list()))
    count = 0

    # Loop over environments
    for env_id in eval_envs:
        seg_ids = get_segs_available_for_env(env_id, 0)
        env_ids = [env_id] * len(seg_ids)
        print("Beginning rollouts for env: {env_id}")
        if len(seg_ids) == 0:
            print("   NO SEGMENTS! Next...")
            continue

        # Loop over systems and save data
        for i, (policy,
                system_namespace) in enumerate(zip(policies,
                                                   system_namespaces)):
            print(
                f"Rolling policy in namespace {system_namespace} for env: {env_id}"
            )
            P.switch_to_namespace(system_namespace)
            setup = P.get_current_parameters()["Setup"]
            if env_data_already_collected(env_id, setup["model"],
                                          setup["run_name"]):
                print(f"Skipping env_id: {env_id}, policy: {setup['model']}")
                continue

            eval_dataset_name = get_eval_tmp_dataset_name(
                setup["model"], setup["run_name"])
            policy_roller.set_policy(policy)
            # when the last policy is done, we should land the drone
            policy_roller.rollout_segments(
                env_ids,
                seg_ids,
                None,
                False,
                0,
                save_dataset_name=eval_dataset_name,
                rl_rollout=False,
                land_afterwards=(i == len(policies) - 1))
            count += 1

        if one_at_a_time and count > 0:
            print("Stopping. Run again to roll-out on the next environment!")
            break

    print("Done")
Exemple #12
0
def evaluate():
    P.initialize_experiment()

    model, model_loaded = load_model()
    eval_envs = get_correct_eval_env_id_list()

    model.eval()
    dataset_name = P.get_current_parameters().get("Data").get("dataset_name")
    dataset = model.get_dataset(data=None,
                                envs=eval_envs,
                                dataset_prefix=dataset_name,
                                dataset_prefix="supervised",
                                eval=eval,
                                seg_level=False)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=4,
                            pin_memory=True,
                            timeout=0)

    count = 0
    success = 0
    total_dist = 0

    for batch in dataloader:
        if batch is None:
            print("None batch!")
            continue

        images = batch["images"]
        instructions = batch["instr"]
        label_masks = batch["traj_labels"]

        # Each of the above is a list of lists of tensors, where the outer list is over the batch and the inner list
        # is over the segments. Loop through and accumulate loss for each batch sequentially, and for each segment.
        # Reset model state (embedding etc) between batches, but not between segments.
        # We don't process each batch in batch-mode, because it's complicated, with the varying number of segments and all.
        # TODO: This code is outdated and wrongly discretizes the goal location. Grab the fixed version from the old branch.

        batch_size = len(images)
        print("batch: ", count)
        print("successes: ", success)

        for i in range(batch_size):
            num_segments = len(instructions[i])

            for s in range(num_segments):
                instruction = cuda_var(instructions[i][s], model.is_cuda,
                                       model.cuda_device)
                instruction_mask = torch.ones_like(instruction)
                image = cuda_var(images[i][s], model.is_cuda,
                                 model.cuda_device)
                label_mask = cuda_var(label_masks[i][s], model.is_cuda,
                                      model.cuda_device)

                label_mask = model.label_pool(label_mask)

                goal_mask_l = label_mask[0, 1, :, :]
                goal_mask_l_np = goal_mask_l.data.cpu().numpy()
                goal_mask_l_flat = np.reshape(goal_mask_l_np, [-1])
                max_index_l = np.argmax(goal_mask_l_flat)
                argmax_loc_l = np.asarray([
                    int(max_index_l / goal_mask_l_np.shape[1]),
                    int(max_index_l % goal_mask_l_np.shape[1])
                ])

                if np.sum(goal_mask_l_np) < 0.01:
                    continue

                mask_pred, features, emb_loss = model(image, instruction,
                                                      instruction_mask)
                goal_mask = mask_pred[0, 1, :, :]
                goal_mask_np = goal_mask.data.cpu().numpy()
                goal_mask_flat = np.reshape(goal_mask_np, [-1])
                max_index = np.argmax(goal_mask_flat)

                argmax_loc = np.asarray([
                    int(max_index / goal_mask_np.shape[1]),
                    int(max_index % goal_mask_np.shape[1])
                ])

                dist = np.linalg.norm(argmax_loc - argmax_loc_l)
                if dist < OK_DIST:
                    success += 1
                count += 1
                total_dist += dist

    print("Correct goal predictions: ", success)
    print("Total evaluations: ", count)
    print("total dist: ", total_dist)
    print("avg dist: ", total_dist / float(count))
    print("success rate: ", success / float(count))
Exemple #13
0
def automatic_demo():

    P.initialize_experiment()
    instruction_display = InstructionDisplay()

    rate = Rate(0.1)

    env = PomdpInterface(
        is_real=get_current_parameters()["Setup"]["real_drone"])
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
    )
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    # Run on dev set
    interact_instructions = dev_instructions

    env_range_start = get_current_parameters()["Setup"].get(
        "env_range_start", 0)
    env_range_end = get_current_parameters()["Setup"].get(
        "env_range_end", 10e10)
    interact_instructions = {
        k: v
        for k, v in interact_instructions.items()
        if env_range_start < k < env_range_end
    }

    model, _ = load_model(get_current_parameters()["Setup"]["model"])

    # Loop over the select few examples
    while True:

        for instruction_sets in interact_instructions.values():
            for set_idx, instruction_set in enumerate(instruction_sets):
                env_id = instruction_set['env']
                found_example = None
                for example in examples:
                    if example[0] == env_id:
                        found_example = example
                if found_example is None:
                    continue
                env.set_environment(env_id, instruction_set["instructions"])

                presenter = Presenter()
                cumulative_reward = 0
                for seg_idx in range(len(instruction_set["instructions"])):
                    if seg_idx != found_example[2]:
                        continue

                    print(f"RUNNING ENV {env_id} SEG {seg_idx}")

                    real_instruction_str = instruction_set["instructions"][
                        seg_idx]["instruction"]
                    instruction_display.show_instruction(real_instruction_str)
                    valid_segment = env.set_current_segment(seg_idx)
                    if not valid_segment:
                        continue
                    state = env.reset(seg_idx)

                    for i in range(START_PAUSE):
                        instruction_display.tick()
                        time.sleep(1)

                        tok_instruction = tokenize_instruction(
                            real_instruction_str, word2token)

                    state = env.reset(seg_idx)
                    print("Executing: f{instruction_str}")
                    while True:
                        instruction_display.tick()
                        rate.sleep()
                        action, internals = model.get_action(
                            state, tok_instruction)
                        state, reward, done, expired, oob = env.step(action)
                        cumulative_reward += reward
                        #presenter.show_sample(state, action, reward, cumulative_reward, real_instruction_str)
                        #show_depth(state.image)
                        if done:
                            break

                    for i in range(END_PAUSE):
                        instruction_display.tick()
                        time.sleep(1)
                        print("Segment finished!")
                    instruction_display.show_instruction("...")

            print("Env finished!")
Exemple #14
0
def train_top_down_pred(args, max_epoch=SUPERVISED_EPOCHS):
    initialize_experiment(args.run_name, args.setup_name)

    model, model_loaded = load_model()

    # TODO: Get batch size from global parameter server when it exists
    batch_size = 1 if \
        args.model == "top_down" or \
        args.model == "top_down_prior" or \
        args.model == "top_down_sm" or \
        args.model == "top_down_pretrain" or \
        args.model == "top_down_goal_pretrain" or \
        args.model == "top_down_nav" or \
        args.model == "top_down_cond" \
        else BATCH_SIZE

    lr = 0.001  # * batch_size
    trainer = Trainer(model,
                      epoch=args.start_epoch,
                      name=args.model,
                      run_name=args.run_name)

    train_envs, dev_envs, test_envs = get_all_env_id_lists(
        max_envs=args.max_envs)

    filename = "top_down_" + args.model + "_" + args.run_name

    if args.restore_weights_name is not None:
        restore_pretrained_weights(model, args.restore_weights_name,
                                   args.fix_restored_weights)

    print("Beginning training...")
    best_test_loss = 1000

    validation_loss = []

    for epoch in range(SUPERVISED_EPOCHS):
        train_loss = -1

        if not args.eval_pretrain:
            train_loss = trainer.train_epoch(train_envs=train_envs, eval=False)

        test_loss = trainer.train_epoch(train_envs=dev_envs, eval=True)
        validation_loss.append([epoch, test_loss])

        if not args.eval_pretrain:
            if test_loss < best_test_loss:
                best_test_loss = test_loss
                save_pytorch_model(trainer.model, filename)
                print("Saved model in:", filename)

            print("Epoch", epoch, "train_loss:", train_loss, "test_loss:",
                  test_loss)
            save_pytorch_model(trainer.model,
                               "tmp/" + filename + "_epoch_" + str(epoch))
            save_pretrained_weights(trainer.model, args.run_name)

        else:
            break

        if max_epoch is not None and epoch > max_epoch:
            print("Reached epoch limit!")
            break

    test_loss_dir = get_model_dir(
    ) + "/test_loss/" + filename + "_test_loss.csv"
    validation_loss = pd.DataFrame(validation_loss,
                                   columns=['epoch', "test_loss"])
    validation_loss.to_csv(test_loss_dir, index=False)
Exemple #15
0
def interactive_demo():

    P.initialize_experiment()
    InteractAPI.launch_ui()

    rate = Rate(0.1)

    env = PomdpInterface(
        is_real=get_current_parameters()["Setup"]["real_drone"])
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
    )
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    # Run on dev set
    interact_instructions = dev_instructions

    env_range_start = get_current_parameters()["Setup"].get(
        "env_range_start", 0)
    env_range_end = get_current_parameters()["Setup"].get(
        "env_range_end", 10e10)
    interact_instructions = {
        k: v
        for k, v in interact_instructions.items()
        if env_range_start < k < env_range_end
    }

    count = 0
    stuck_count = 0

    model, _ = load_model(get_current_parameters()["Setup"]["model"])

    InteractAPI.write_empty_instruction()
    InteractAPI.write_real_instruction("None")
    instruction_str = InteractAPI.read_instruction_file()
    print("Initial instruction: ", instruction_str)

    for instruction_sets in interact_instructions.values():
        for set_idx, instruction_set in enumerate(instruction_sets):
            env_id = instruction_set['env']
            env.set_environment(env_id, instruction_set["instructions"])

            presenter = Presenter()
            cumulative_reward = 0
            for seg_idx in range(len(instruction_set["instructions"])):

                print(f"RUNNING ENV {env_id} SEG {seg_idx}")

                real_instruction_str = instruction_set["instructions"][
                    seg_idx]["instruction"]
                InteractAPI.write_real_instruction(real_instruction_str)
                valid_segment = env.set_current_segment(seg_idx)
                if not valid_segment:
                    continue
                state = env.reset(seg_idx)

                keep_going = True
                while keep_going:
                    InteractAPI.write_real_instruction(real_instruction_str)

                    while True:
                        cv2.waitKey(200)
                        instruction = InteractAPI.read_instruction_file()
                        if instruction == "CMD: Next":
                            print("Advancing")
                            keep_going = False
                            InteractAPI.write_empty_instruction()
                            break
                        elif instruction == "CMD: Reset":
                            print("Resetting")
                            env.reset(seg_idx)
                            InteractAPI.write_empty_instruction()
                        elif len(instruction.split(" ")) > 1:
                            instruction_str = instruction
                            break

                    if not keep_going:
                        continue

                    env.override_instruction(instruction_str)
                    tok_instruction = tokenize_instruction(
                        instruction_str, word2token)

                    state = env.reset(seg_idx)
                    print("Executing: f{instruction_str}")
                    while True:
                        rate.sleep()
                        action, internals = model.get_action(
                            state, tok_instruction)

                        state, reward, done, expired, oob = env.step(action)
                        cumulative_reward += reward
                        presenter.show_sample(state, action, reward,
                                              cumulative_reward,
                                              instruction_str)
                        #show_depth(state.image)
                        if done:
                            break
                    InteractAPI.write_empty_instruction()
                    print("Segment finished!")
        print("Env finished!")
Exemple #16
0
def evaluate():
    P.initialize_experiment()
    params = P.get_current_parameters()
    setup = params["Setup"]

    models = []
    for i in range(setup["num_workers"]):
        model, model_loaded = load_model()
        models.append(model)

    eval_envs = list(sorted(get_correct_eval_env_id_list()))

    round_size = P.get_current_parameters()["Data"].get("collect_n_at_a_time")

    # TODO: Scrap RollOutParams and use parameter server JSON params instead
    roll_out_params = RollOutParams() \
                        .setModelName(setup["model"]) \
                        .setModelFile(setup["model_file"]) \
                        .setRunName(setup["run_name"]) \
                        .setSetupName(P.get_setup_name()) \
                        .setEnvList(eval_envs) \
                        .setMaxDeviation(800) \
                        .setHorizon(setup["trajectory_length"]) \
                        .setStepsToForceStop(20) \
                        .setPlot(False) \
                        .setShowAction(False) \
                        .setIgnorePolicyStop(False) \
                        .setPlotDir("evaluate/" + setup["run_name"]) \
                        .setSavePlots(False) \
                        .setRealtimeFirstPerson(False) \
                        .setSaveSamples(False) \
                        .setBuildTrainData(False) \
                        .setSegmentReset("always") \
                        .setSegmentLevel(False) \
                        .setFirstSegmentOnly(False) \
                        .setDebug(setup["debug"]) \
                        .setCuda(setup["cuda"]) \
                        .setRealDrone(setup["real_drone"])

    custom_eval = "Eval" in params and params["Eval"]["custom_eval"]
    instructions = None
    if custom_eval:
        examples = params["Eval"]["examples"]
        eval_envs, eval_sets, eval_segs, instructions = tuple(
            map(lambda m: list(m), list(zip(*examples))))
        print("!! Running custom evaluation with the following setup:")
        print(examples)
        roll_out_params.setEnvList(eval_envs)
        roll_out_params.setSegList(eval_segs)
        roll_out_params.setCustomInstructions(instructions)

    if setup["num_workers"] > 1:
        roller = ParallelPolicyRoller(num_workers=setup["num_workers"])
    else:
        roller = PolicyRoller()

    if round_size:
        eval_dataset_name = data_io.paths.get_eval_tmp_dataset_name(
            setup["model"], setup["run_name"])
        eval_dataset_path = data_io.paths.get_dataset_dir(eval_dataset_name)

        cumulative_dataset = []
        if os.path.exists(eval_dataset_path):
            result = query_user_load_discard(eval_dataset_path)
            if result == "load":
                print("Loading dataset and continuing evaluation")
                cumulative_dataset = load_multiple_env_data_from_dir(
                    eval_dataset_path)
            elif result == "discard":
                print("Discarding existing evaluation data")
                shutil.rmtree(eval_dataset_path)
            elif result == "cancel":
                print("Cancelling evaluation")
                return

        os.makedirs(eval_dataset_path, exist_ok=True)

        collected_envs = set([
            rollout[0]["env_id"] for rollout in cumulative_dataset
            if len(rollout) > 0
        ])
        eval_envs = [e for e in eval_envs if e not in collected_envs]
        if setup.get("compute_results_no_rollout", False):
            eval_envs = []

        for i in range(0, len(eval_envs), round_size):
            j = min(len(eval_envs), i + round_size)
            round_envs = eval_envs[i:j]
            roll_out_params.setEnvList(round_envs)
            dataset = roller.roll_out_policy(roll_out_params)

            # Save this data
            for rollout in dataset:
                if len(rollout) == 0:
                    print(
                        "WARNING! DROPPING EMPTY ROLLOUTS! SHOULDN'T DO THIS")
                    continue
                ## rollout is a list of samples:
                env_id = rollout[0]["env_id"] if "metadata" in rollout[
                    0] else rollout[0]["env_id"]
                if True:
                    if len(rollout) > 0:
                        save_dataset_to_path(
                            os.path.join(eval_dataset_path, str(env_id)),
                            rollout)
                ## rollout is a list of segments, each is a list of samples
                else:
                    if len(rollout) > 0:
                        save_dataset_to_path(
                            os.path.join(eval_dataset_path, str(env_id)),
                            rollout)

            cumulative_dataset += dataset
            print(f"Saved cumulative dataset to: {eval_dataset_path}")

        dataset = cumulative_dataset
    else:
        dataset = roller.roll_out_policy(roll_out_params)

    results = {}
    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"],
                                      save_images=True,
                                      world_size=setup["world_size_m"])
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()
    if setup["eval_nl"]:
        evaler = DataEvalNL(setup["run_name"],
                            save_images=True,
                            entire_trajectory=False,
                            custom_instr=instructions)
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()

    print("Results:", results)
Exemple #17
0
def train_dagger_simple():
    # ----------------------------------------------------------------------------------------------------------------
    # Load params and configure stuff

    P.initialize_experiment()
    params = P.get_current_parameters()["SimpleDagger"]
    setup = P.get_current_parameters()["Setup"]
    num_iterations = params["num_iterations"]
    sim_seed_dataset = params.get("sim_seed_dataset")
    run_name = setup["run_name"]
    device = params.get("device", "cuda:1")
    dataset_limit = params.get("dataset_size_limit_envs")
    seed_count = params.get("seed_count")

    # Trigger rebuild if necessary before going into all the threads and processes
    _ = get_restricted_env_id_lists(full=True)

    # Initialize the dataset
    if sim_seed_dataset:
        copy_seed_dataset(from_dataset=sim_seed_dataset,
                          to_dataset=dagger_dataset_name(run_name),
                          seed_count=seed_count or dataset_limit)
        gap = 0
    else:
        # TODO: Refactor this into a prompt function
        data_path = get_dataset_dir(dagger_dataset_name(run_name))
        if os.path.exists(data_path):
            print("DATASET EXISTS! Continue where left off?")
            c = input(" (y/n) >>> ")
            if c != "y":
                raise ValueError(
                    f"Not continuing: Dataset {data_path} exists. Delete it if you like and try again"
                )
        else:
            os.makedirs(data_path, exist_ok=True)
        gap = dataset_limit - len(os.listdir(data_path))

    print("SUPP: Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    # ----------------------------------------------------------------------------------------------------------------
    # Load / initialize model

    model = load_model(setup["model"], setup["model_file"],
                       domain="sim")[0].to(device)
    oracle = load_model("oracle")[0]

    # ----------------------------------------------------------------------------------------------------------------
    # Continue where we left off - load the model and set the iteration/epoch number

    for start_iteration in range(10000):
        epfname = epoch_dag_filename(run_name, start_iteration)
        path = os.path.join(get_model_dir(), str(epfname) + ".pytorch")
        if not os.path.exists(path):
            break
    if start_iteration > 0:
        print(
            f"DAG: CONTINUING DAGGER TRAINING FROM ITERATION: {start_iteration}"
        )
        load_pytorch_model(model,
                           epoch_dag_filename(run_name, start_iteration - 1))

    # ----------------------------------------------------------------------------------------------------------------
    # Intialize trainer

    trainer = Trainer(model,
                      epoch=start_iteration,
                      name=setup["model"],
                      run_name=setup["run_name"])
    trainer.set_dataset_names([dagger_dataset_name(run_name)])

    # ----------------------------------------------------------------------------------------------------------------
    # Initialize policy roller

    roller = SimpleParallelPolicyRoller(
        num_workers=params["num_workers"],
        device=params["device"],
        policy_name=setup["model"],
        policy_file=setup["model_file"],
        oracle=oracle,
        dataset_save_name=dagger_dataset_name(run_name),
        no_reward=True)
    rollout_sampler = RolloutSampler(roller)

    # ----------------------------------------------------------------------------------------------------------------
    # Train DAgger - loop over iteartions, in each, prune, rollout and train an epoch

    print("SUPP: Beginning training...")
    for iteration in range(start_iteration, num_iterations):
        print(f"DAG: Starting iteration {iteration}")

        # Remove extra rollouts to keep within DAggerFM limit
        prune_dataset(run_name, dataset_limit)

        # Rollout and collect more data for training and evaluation
        policy_state = model.get_policy_state()
        rollout_sampler.sample_n_rollouts(
            n=gap if iteration == 0 else params["train_envs_per_iteration"],
            policy_state=policy_state,
            sample=False,
            envs="train",
            dagger_beta=dagger_beta(params, iteration))

        eval_rollouts = rollout_sampler.sample_n_rollouts(
            n=params["eval_envs_per_iteration"],
            policy_state=policy_state,
            sample=False,
            envs="dev",
            dagger_beta=0)

        # Kill airsim instances so that they don't take up GPU memory and in general slow things down during training
        roller.kill_airsim()

        # Evaluate success / metrics and save to tensorboard
        if setup["eval_nl"]:
            evaler = DataEvalNL(run_name,
                                entire_trajectory=False,
                                save_images=False)
            evaler.evaluate_dataset(eval_rollouts)
            results = evaler.get_results()
            print("Results:", results)
            evaler.write_summaries(setup["run_name"], "dagger_eval", iteration)

        # Do one epoch of supervised training
        print("SUPP: Beginning Epoch")
        train_loss = trainer.train_epoch(train_envs=train_envs, eval=False)
        #test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True)

        # Save the model to file
        print("SUPP: Epoch", iteration, "train_loss:", train_loss)
        save_pytorch_model(model, epoch_dag_filename(run_name, iteration))
Exemple #18
0
def evaluate():
    P.initialize_experiment()
    params = P.get_current_parameters()
    setup = params["Setup"]

    # import pdb;pdb.set_trace()
    models = []
    for i in range(setup["num_workers"]):
        model, model_loaded = load_model()
        if setup["restore_weights_name"]:
            restore_pretrained_weights(model, setup["restore_weights_name"],
                                       setup["fix_restored_weights"])
        models.append(model)

    eval_envs = get_correct_eval_env_id_list()

    roll_out_params = RollOutParams() \
                        .setModelName(setup["model"]) \
                        .setModelFile(setup["model_file"]) \
                        .setRunName(setup["run_name"]) \
                        .setSetupName(P.get_setup_name()) \
                        .setEnvList(eval_envs) \
                        .setMaxDeviation(400) \
                        .setHorizon(100) \
                        .setStepsToForceStop(10) \
                        .setPlot(False) \
                        .setShowAction(False) \
                        .setIgnorePolicyStop(False) \
                        .setPlotDir("evaluate/" + setup["run_name"]) \
                        .setSavePlots(True) \
                        .setRealtimeFirstPerson(False) \
                        .setSaveSamples(False) \
                        .setBuildTrainData(False) \
                        .setSegmentReset("always") \
                        .setSegmentLevel(True) \
                        .setFirstSegmentOnly(False) \
                        .setDebug(setup["debug"]) \
                        .setCuda(setup["cuda"])

    custom_eval = "Eval" in params and params["Eval"]["custom_eval"]
    instructions = None
    if custom_eval:
        examples = params["Eval"]["examples"]
        eval_envs, eval_sets, eval_segs, instructions = tuple(
            map(lambda m: list(m), list(zip(*examples))))
        print("!! Running custom evaluation with the following setup:")
        print(examples)
        roll_out_params.setEnvList(eval_envs)
        roll_out_params.setSegList(eval_segs)
        roll_out_params.setCustomInstructions(instructions)

    if setup["num_workers"] > 1:
        roller = ParallelPolicyRoller(num_workers=setup["num_workers"])
    else:
        roller = PolicyRoller()

    dataset = roller.roll_out_policy(roll_out_params)

    results = {}
    if setup["eval_landmark_side"]:
        evaler = DataEvalLandmarkSide(setup["run_name"])
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()
    if setup["eval_nl"]:
        evaler = DataEvalNL(setup["run_name"],
                            save_images=True,
                            entire_trajectory=False,
                            custom_instr=instructions)
        evaler.evaluate_dataset(dataset)
        results = evaler.get_results()

    print("Results:", results)
Exemple #19
0
def train_dagger():
    P.initialize_experiment()
    global PARAMS
    PARAMS = P.get_current_parameters()["Dagger"]
    setup = P.get_current_parameters()["Setup"]
    roller = pick_policy_roller(setup)

    save_json(PARAMS,
              get_dagger_data_dir(setup, real_drone=False) + "run_params.json")

    # Load less tf data, but sample dagger rollouts from more environments to avoid overfitting.
    train_envs, dev_envs, test_envs = data_io.instructions.get_restricted_env_id_lists(
        max_envs=PARAMS["max_envs_dag"])

    all_train_data_real, all_dev_data_real = \
        data_io.train_data.load_supervised_data("real", max_envs=PARAMS["max_envs_sup"], split_segments=PARAMS["segment_level"])
    all_train_data_sim, all_dev_data_sim = \
        data_io.train_data.load_supervised_data("simulator", max_envs=PARAMS["max_envs_sup"], split_segments=PARAMS["segment_level"])

    print("Loaded data: ")
    print(
        f"   Real train {len(all_train_data_real)}, dev {len(all_dev_data_real)}"
    )
    print(
        f"   Sim train {len(all_train_data_sim)}, dev {len(all_dev_data_sim)}")

    # Load and re-save models from supervised learning stage
    model_sim, _ = load_model(setup["model"],
                              setup["sim_model_file"],
                              domain="sim")
    model_real, _ = load_model(setup["model"],
                               setup["real_model_file"],
                               domain="real")
    model_critic, _ = load_model(setup["critic_model"],
                                 setup["critic_model_file"])
    data_io.model_io.save_pytorch_model(
        model_sim, get_latest_model_filename(setup, "sim"))
    data_io.model_io.save_pytorch_model(
        model_real, get_latest_model_filename(setup, "real"))
    data_io.model_io.save_pytorch_model(
        model_critic, get_latest_model_filename(setup, "critic"))

    last_trainer_state = None

    for iteration in range(0, PARAMS["max_iterations"]):
        gc.collect()
        print("-------------------------------")
        print("DAGGER ITERATION : ", iteration)
        print("-------------------------------")

        # If we have too many training examples in memory, discard uniformly at random to keep a somewhat fixed bound
        max_samples = PARAMS["max_samples_in_memory"]
        all_train_data_real = discard_if_too_many(all_train_data_real,
                                                  max_samples)
        all_train_data_sim = discard_if_too_many(all_train_data_sim,
                                                 max_samples)

        # Roll out new data in simulation only
        latest_model_filename_sim = get_latest_model_filename(setup, "sim")
        train_data_i_sim, dev_data_i_sim = collect_iteration_data(
            roller, iteration, train_envs, test_envs,
            latest_model_filename_sim)

        # TODO: Save
        #data_io.train_data.save_dataset(dataset_name, train_data_i, dagger_data_dir + "train_" + str(iteration))
        #data_io.train_data.save_dataset(dataset_name, test_data_i, dagger_data_dir + "test_" + str(iteration))

        # Aggregate the dataset
        all_train_data_sim += train_data_i_sim
        all_dev_data_sim += dev_data_i_sim
        print("Aggregated dataset!)")
        print("Total samples: ", len(all_train_data_sim))
        print("New samples: ", len(train_data_i_sim))

        data_io.train_data.save_dataset(
            "sim_dagger", all_train_data_sim,
            get_dagger_data_dir(setup, False) + "train_latest")
        data_io.train_data.save_dataset(
            "sim_dagger", dev_data_i_sim,
            get_dagger_data_dir(setup, False) + "test_latest")

        model_sim, _ = load_model(setup["model"],
                                  get_latest_model_filename(setup, "sim"),
                                  domain="sim")
        model_real, _ = load_model(setup["model"],
                                   get_latest_model_filename(setup, "real"),
                                   domain="real")
        model_critic, _ = load_model(
            setup["critic_model"], get_latest_model_filename(setup, "critic"))

        trainer = TrainerBidomain(model_real,
                                  model_sim,
                                  model_critic,
                                  state=last_trainer_state)

        # Hacky reset of the rollout flag after doing the rollouts
        import rollout.run_metadata as run_md
        run_md.IS_ROLLOUT = False

        # Train on the newly aggregated dataset
        num_epochs = PARAMS["epochs_per_iteration"]
        for epoch in range(num_epochs):

            loss = trainer.train_epoch(data_list_real=all_train_data_real,
                                       data_list_sim=all_train_data_sim)
            dev_loss = trainer.train_epoch(data_list_real=all_dev_data_real,
                                           data_list_sim=dev_data_i_sim,
                                           eval=True)

            data_io.model_io.save_pytorch_model(
                model_sim, get_latest_model_filename(setup, "sim"))
            data_io.model_io.save_pytorch_model(
                model_real, get_latest_model_filename(setup, "real"))
            data_io.model_io.save_pytorch_model(
                model_critic, get_latest_model_filename(setup, "critic"))

            print("Epoch", epoch, "Loss: Train:", loss, "Test:", dev_loss)

        data_io.model_io.save_pytorch_model(
            model_real,
            get_model_filename_at_iteration(setup, iteration, "real"))
        data_io.model_io.save_pytorch_model(
            model_sim, get_model_filename_at_iteration(setup, iteration,
                                                       "sim"))
        data_io.model_io.save_pytorch_model(
            model_critic,
            get_model_filename_at_iteration(setup, iteration, "critic"))

        last_trainer_state = trainer.get_state()
def train_supervised_bidomain():
    P.initialize_experiment()

    setup = P.get_current_parameters()["Setup"]
    supervised_params = P.get_current_parameters()["Supervised"]
    num_epochs = supervised_params["num_epochs"]

    model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim")
    model_real, _ = load_model(setup["model"], setup["real_model_file"], domain="real")
    model_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"])

    if P.get_current_parameters()["Training"].get("use_oracle_critic", False):
        model_oracle_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"])
        # This changes the name in the summary writer to get a different color plot
        oname = model_oracle_critic.model_name
        model_oracle_critic.set_model_name(oname + "_oracle")
        model_oracle_critic.model_name = oname
    else:
        model_oracle_critic = None

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    real_filename = f"supervised_{setup['model']}_{setup['run_name']}_real"
    sim_filename  = f"supervised_{setup['model']}_{setup['run_name']}_sim"
    critic_filename = f"supervised_{setup['critic_model']}_{setup['run_name']}_critic"

    # TODO: (Maybe) Implement continuing of training

    # Bidata means that we treat Lani++ and LaniOriginal examples differently, only computing domain-adversarial stuff on Lani++
    bidata = P.get_current_parameters()["Training"].get("bidata", False)
    if bidata == "v2":
        trainer = TrainerBidomainBidata(model_real, model_sim, model_critic, model_oracle_critic, epoch=0)
        train_envs_common = [e for e in train_envs if 6000 <= e < 7000]
        train_envs_sim = train_envs
        dev_envs_common = [e for e in dev_envs if 6000 <= e < 7000]
        dev_envs_sim = dev_envs
    elif bidata:
        trainer = TrainerBidomainBidata(model_real, model_sim, model_critic, model_oracle_critic, epoch=0)
        train_envs_common = [e for e in train_envs if 6000 <= e < 7000]
        train_envs_sim = [e for e in train_envs if e < 6000]
        dev_envs_common = [e for e in dev_envs if 6000 <= e < 7000]
        dev_envs_sim = [e for e in dev_envs if e < 6000]
    else:
        trainer = TrainerBidomain(model_real, model_sim, model_critic, model_oracle_critic, epoch=0)

    print("Beginning training...")
    best_test_loss = 1000
    for epoch in range(num_epochs):
        if bidata:
            train_loss = trainer.train_epoch(env_list_common=train_envs_common, env_list_sim=train_envs_sim, eval=False)
            test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True)
        else:
            train_loss = trainer.train_epoch(env_list=train_envs, eval=False)
            test_loss = trainer.train_epoch(env_list=dev_envs, eval=True)

        if test_loss < best_test_loss:
            best_test_loss = test_loss
            save_pytorch_model(model_real, real_filename)
            save_pytorch_model(model_sim, sim_filename)
            save_pytorch_model(model_critic, critic_filename)
            print(f"Saved models in: \n Real: {real_filename} \n Sim: {sim_filename} \n Critic: {critic_filename}")

        print ("Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss)
        save_pytorch_model(model_real, f"tmp/{real_filename}_epoch_{epoch}")
        save_pytorch_model(model_sim, f"tmp/{sim_filename}_epoch_{epoch}")
        save_pytorch_model(model_critic, f"tmp/{critic_filename}_epoch_{epoch}")
Exemple #21
0
def train_supervised():
    initialize_experiment()

    setup = get_current_parameters()["Setup"]
    supervised_params = get_current_parameters()["Supervised"]
    num_epochs = supervised_params["num_epochs"]

    model, model_loaded = load_model()
    # import pdb; pdb.set_trace()
    # import pickle
    # with open('/storage/dxsun/model_input.pickle', 'rb') as f: data = pickle.load(f)
    # g = model(data['images'], data['states'], data['instructions'], data['instr_lengths'], data['has_obs'], data['plan'], data['save_maps_only'], data['pos_enc'], data['noisy_poses'], data['start_poses'], data['firstseg'])
    print("model:", model)
    print("model type:", type(model))
    print("Loading data")
    # import pdb;pdb.set_trace()
    train_envs, dev_envs, test_envs = get_all_env_id_lists(
        max_envs=setup["max_envs"])
    if "split_train_data" in supervised_params and supervised_params[
            "split_train_data"]:
        split_name = supervised_params["train_data_split"]
        split = load_env_split()[split_name]
        train_envs = [env_id for env_id in train_envs if env_id in split]
        print("Using " + str(len(train_envs)) + " envs from dataset split: " +
              split_name)

    filename = "supervised_" + setup["model"] + "_" + setup["run_name"]

    # Code looks weird here because load_pytorch_model adds ".pytorch" to end of path, but
    # file_exists doesn't
    model_path = "tmp/" + filename + "_epoch_" + str(
        supervised_params["start_epoch"])
    model_path_with_extension = model_path + ".pytorch"
    print("model path:", model_path_with_extension)
    if supervised_params["start_epoch"] > 0:
        if file_exists(model_path_with_extension):
            print("THE FILE EXISTS code1")
            load_pytorch_model(model, model_path)
        else:
            print("Couldn't continue training. Model file doesn't exist at:")
            print(model_path_with_extension)
            exit(-1)
    # import pdb;pdb.set_trace()
    ## If you just want to use the pretrained model
    # load_pytorch_model(model, "supervised_pvn_stage1_train_corl_pvn_stage1")

    # all_train_data, all_test_data = data_io.train_data.load_supervised_data(max_envs=100)
    if setup["restore_weights_name"]:
        restore_pretrained_weights(model, setup["restore_weights_name"],
                                   setup["fix_restored_weights"])

    # Add a tensorboard logger to the model and trainer
    tensorboard_dir = get_current_parameters(
    )['Environment']['tensorboard_dir']
    logger = Logger(tensorboard_dir)
    model.logger = logger
    if hasattr(model, "goal_good_criterion"):
        print("gave logger to goal evaluator")
        model.goal_good_criterion.logger = logger

    trainer = Trainer(model,
                      epoch=supervised_params["start_epoch"],
                      name=setup["model"],
                      run_name=setup["run_name"])

    trainer.logger = logger

    # import pdb;pdb.set_trace()
    print("Beginning training...")
    best_test_loss = 1000

    continue_epoch = supervised_params["start_epoch"] + 1 if supervised_params[
        "start_epoch"] > 0 else 0
    rng = range(0, num_epochs)
    print("filename:", filename)

    import pdb
    pdb.set_trace()

    for epoch in rng:
        # import pdb;pdb.set_trace()
        train_loss = trainer.train_epoch(train_data=None,
                                         train_envs=train_envs,
                                         eval=False)
        # train_loss = trainer.train_epoch(train_data=all_train_data, train_envs=train_envs, eval=False)

        trainer.model.correct_goals = 0
        trainer.model.total_goals = 0

        test_loss = trainer.train_epoch(train_data=None,
                                        train_envs=dev_envs,
                                        eval=True)

        print("GOALS: ", trainer.model.correct_goals,
              trainer.model.total_goals)

        if test_loss < best_test_loss:
            best_test_loss = test_loss
            save_pytorch_model(trainer.model, filename)
            print("Saved model in:", filename)
        print("Epoch", epoch, "train_loss:", train_loss, "test_loss:",
              test_loss)
        save_pytorch_model(trainer.model,
                           "tmp/" + filename + "_epoch_" + str(epoch))
        if hasattr(trainer.model, "save"):
            trainer.model.save(epoch)
        save_pretrained_weights(trainer.model, setup["run_name"])
def train_top_down_pred():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]
    launch_ui()

    env = PomdpInterface()

    print("model_name:", setup["top_down_model"])
    print("model_file:", setup["top_down_model_file"])

    model, model_loaded = load_model(
        model_name_override=setup["top_down_model"],
        model_file_override=setup["top_down_model_file"])

    exec_model, wrapper_model_loaded = load_model(
        model_name_override=setup["wrapper_model"],
        model_file_override=setup["wrapper_model_file"])

    affine2d = Affine2D()
    if model.is_cuda:
        affine2d.cuda()

    eval_envs = get_correct_eval_env_id_list()
    print("eval_envs:", eval_envs)
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
        max_size=setup["max_envs"])
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    dataset = model.get_dataset(envs=eval_envs,
                                dataset_name="supervised",
                                eval=True,
                                seg_level=False)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            pin_memory=True)

    for b, batch in list(enumerate(dataloader)):
        print("batch:", batch)
        images = batch["images"]
        instructions = batch["instr"]
        label_masks = batch["traj_labels"]
        affines = batch["affines_g_to_s"]
        env_ids = batch["env_id"]
        set_idxs = batch["set_idx"]
        seg_idxs = batch["seg_idx"]

        env_id = env_ids[0][0]
        set_idx = set_idxs[0][0]
        print("env_id of this batch:", env_id)
        env.set_environment(
            env_id, instruction_set=all_instr[env_id][set_idx]["instructions"])
        env.reset(0)

        num_segments = len(instructions[0])
        print("num_segments in this batch:", num_segments)
        write_instruction("")
        write_real_instruction("None")
        instruction_str = read_instruction_file()
        print("Initial instruction: ", instruction_str)

        # TODO: Reset model state here if we keep any temporal memory etc
        for s in range(num_segments):
            start_state = env.reset(s)
            keep_going = True
            real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0)
            tmp = list(real_instruction.data.cpu()[0].numpy())
            real_instruction_str = debug_untokenize_instruction(tmp)
            write_real_instruction(real_instruction_str)
            #write_instruction(real_instruction_str)
            #instruction_str = real_instruction_str

            image = cuda_var(images[0][s], setup["cuda"], 0)
            label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0)
            affine_g_to_s = affines[0][s]
            print("Your current environment:")
            with open(
                    "/storage/dxsun/unreal_config_nl/configs/configs/random_config_"
                    + str(env_id) + ".json") as fp:
                config = json.load(fp)
            print(config)
            while keep_going:
                write_real_instruction(real_instruction_str)

                while True:
                    cv2.waitKey(200)
                    instruction = read_instruction_file()
                    if instruction == "CMD: Next":
                        print("Advancing")
                        keep_going = False
                        write_empty_instruction()
                        break
                    elif instruction == "CMD: Reset":
                        print("Resetting")
                        env.reset(s)
                        write_empty_instruction()
                    elif len(instruction.split(" ")) > 1:
                        instruction_str = instruction
                        print("Executing: ", instruction_str)
                        break

                if not keep_going:
                    continue

                #instruction_str = read_instruction_file()
                # TODO: Load instruction from file
                tok_instruction = tokenize_instruction(instruction_str,
                                                       word2token)
                instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0)
                instruction_v = cuda_var(instruction_t, setup["cuda"], 0)
                instruction_mask = torch.ones_like(instruction_v)
                tmp = list(instruction_t[0].numpy())
                instruction_dbg_str = debug_untokenize_instruction(
                    tmp, token2term)

                # import matplotlib.pyplot as plt
                #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy())
                #plt.show()

                res = model(image, instruction_v, instruction_mask)
                mask_pred = res[0]
                shp = mask_pred.shape
                mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp)
                #mask_pred = softmax2d(mask_pred)

                # TODO: Rotate the mask_pred to the global frame
                affine_s_to_g = np.linalg.inv(affine_g_to_s)
                S = 8.0
                affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]])
                affine_scale_down = np.linalg.inv(affine_scale_up)

                affine_pred_to_g = np.dot(
                    affine_scale_down, np.dot(affine_s_to_g, affine_scale_up))
                #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float()

                mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose(
                    1, 2, 0)
                mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g,
                                              32, 32)
                print("Sum of global mask: ", mask_pred_g_np.sum())
                mask_pred_g = torch.from_numpy(
                    mask_pred_g_np.transpose(2, 0,
                                             1)).float()[np.newaxis, :, :, :]
                exec_model.set_ground_truth_visitation_d(mask_pred_g)

                # Create a batch axis for pytorch
                #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :])

                mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min()
                mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9)
                mask_pred_np[:, :, 0] *= 2.0
                mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min()
                mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9)

                presenter = Presenter()
                presenter.show_image(mask_pred_g_np,
                                     "mask_pred_g",
                                     torch=False,
                                     waitkey=1,
                                     scale=4)
                #import matplotlib.pyplot as plt
                #print("image.data shape:", image.data.cpu().numpy().shape)
                #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy())
                #plt.show()
                # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4)
                #import pdb; pdb.set_trace()
                pred_viz_np = presenter.overlaid_image(image.data,
                                                       mask_pred_np,
                                                       channel=0)
                # TODO: Don't show labels
                # TODO: OpenCV colours
                #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0)
                labl_viz_np = presenter.overlaid_image(image.data,
                                                       label_mask.data,
                                                       channel=0)
                viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1)
                viz_img_np = pred_viz_np

                viz_img = presenter.overlay_text(viz_img_np,
                                                 instruction_dbg_str)
                cv2.imshow("interactive viz", viz_img)
                cv2.waitKey(100)

                rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s],
                              seg_idxs[0][s], tok_instruction)
                write_instruction("")
Exemple #23
0
    def __init__(self, params, save_rollouts_to_dataset="", device=None):
        self.iterations_per_epoch = params.get("iterations_per_epoch", 1)
        self.test_iterations_per_epoch = params.get(
            "test_iterations_per_epoch", 1)
        self.num_workers = params.get("num_workers")
        self.num_rollouts_per_iter = params.get("num_rollouts_per_iter")
        self.model_name = params.get("model") or params.get("rl_model")
        self.init_model_file = params.get("model_file")
        self.num_steps = params.get("trajectory_len")
        self.device = device

        self.summary_every_n = params.get("plot_every_n")

        self.roller = SimpleParallelPolicyRoller(
            num_workers=self.num_workers,
            device=self.device,
            policy_name=self.model_name,
            policy_file=self.init_model_file,
            dataset_save_name=save_rollouts_to_dataset)

        self.rollout_sampler = RolloutSampler(self.roller)

        # This should load it's own weights from file based on
        self.full_model, _ = load_model(self.model_name)
        self.full_model = self.full_model.to(self.device)
        self.actor_critic = self.full_model.stage2_action_generation
        # Train in eval mode to disable dropout
        #self.actor_critic.eval()
        self.full_model.stage1_visitation_prediction.eval()
        self.writer = LoggingSummaryWriter(
            log_dir=f"{get_logging_dir()}/runs/{params['run_name']}/ppo")

        self.global_step = 0
        self.stage1_updates = 0

        clip_param = params.get("clip")
        num_mini_batch = params.get("num_mini_batch")
        value_loss_coef = params.get("value_loss_coef")
        lr = params.get("lr")
        eps = params.get("eps")
        max_grad_norm = params.get("max_grad_norm")
        use_clipped_value_loss = params.get("use_clipped_value_loss")

        self.entropy_coef = params.get("entropy_coef")
        self.entropy_schedule_epochs = params.get("entropy_schedule_epochs",
                                                  [])
        self.entropy_schedule_multipliers = params.get(
            "entropy_schedule_multipliers", [])

        self.minibatch_size = params.get("minibatch_size")

        self.use_gae = params.get("use_gae")
        self.gamma = params.get("gamma")
        self.gae_lambda = params.get("gae_lambda")
        self.intrinsic_reward_only = params.get("intrinsic_reward_only")

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)

        print(
            f"PPO trainable parameters: {get_n_trainable_params(self.actor_critic)}"
        )
        print(
            f"PPO actor-critic all parameters: {get_n_params(self.actor_critic)}"
        )

        self.ppo = PPO(self.actor_critic,
                       clip_param=clip_param,
                       ppo_epoch=1,
                       num_mini_batch=num_mini_batch,
                       value_loss_coef=value_loss_coef,
                       entropy_coef=self.entropy_coef,
                       lr=lr,
                       eps=eps,
                       max_grad_norm=max_grad_norm,
                       use_clipped_value_loss=use_clipped_value_loss)