Beispiel #1
0
def take_pics():
    P.initialize_experiment()
    train_i, dev_i, test_i, _ = get_all_instructions()
    all_instructions = {**train_i, **dev_i, **test_i}

    save_dir = paths.get_env_image_path(0)
    os.makedirs(os.path.dirname(save_dir), exist_ok=True)

    keylist = list(all_instructions.keys())

    envs = [PomdpInterface(instance_id=i) for i in range(0, NUM_WORKERS)]
    env_id_splits = [[] for _ in range(NUM_WORKERS)]
    keylist = [6825]

    for i, key in enumerate(keylist):
        env_id_splits[i % NUM_WORKERS].append(key)


    time.sleep(1.0)
    for i in range(len(keylist)):

        d = False
        # For each worker, start the correct env
        for w in range(NUM_WORKERS):
            if i >= len(env_id_splits[w]):
                continue
            env_id = env_id_splits[w][i]
            # FIXME: :This assumes that there is only 1 instruction set per env!
            fname = paths.get_env_image_path(env_id)
            if os.path.isfile(fname):
                print("Img exists: " + fname)
                continue

            d = True
            instruction_set = all_instructions[env_id][0]
            envs[w].set_environment(env_id, instruction_set["instructions"], fast=True)
            print("setting env on worker " + str(w) + " iter " + str(i) + " env_id: " + str(env_id))

        # Then for each worker, take a picture and save it
        if d:
            time.sleep(0.1)
        for w in range(NUM_WORKERS):
            if i >= len(env_id_splits[w]):
                continue
            env_id = env_id_splits[w][i]
            fname = paths.get_env_image_path(env_id)
            if os.path.isfile(fname):
                print("Img exists: " + fname)
                continue
            envs[w].snap_birdseye(fast=True, small_env=SMALL_ENV)
            image = envs[w].snap_birdseye(fast=True, small_env=SMALL_ENV)
            image = np.flip(image, 0)
            imsave(fname, image)
            print("saving pic on worker " + str(w) + " iter " + str(i) + " env_id: " + str(env_id))
Beispiel #2
0
    def __init__(self,
                 instance_id=0,
                 real_drone=False,
                 policy=None,
                 oracle=None,
                 no_reward=False):

        self.presenter = Presenter()
        self.instance_id = instance_id

        self.word2token = None
        self.all_instructions = None
        self.all_env_ids, self.all_instructions, self.corpus, self.token2term, self.word2token = self.load_all_envs(
        )

        self.env = PomdpInterface(instance_id=self.instance_id,
                                  is_real=real_drone)
        self.policy = policy
        self.oracle = oracle
        self.no_reward = no_reward
Beispiel #3
0
        self.thread.daemon = True
        self.thread.start()

    def run(self):
        self.mon.run()

    def get_command(self):
        return self.mon.current_vel


initialize_experiment("nl_datacollect_cage")

teleoper = KeyTeleop()
rate = Rate(0.1)

env = PomdpInterface()

train_instructions, dev_instructions, test_instructions, _ = get_all_instructions(
)

count = 0
stuck_count = 0


def show_depth(image):
    grayscale = np.mean(image[:, :, 0:3], axis=2)
    depth = image[:, :, 3]
    comb = np.stack([grayscale, grayscale, depth], axis=2)
    comb -= comb.min()
    comb /= (comb.max() + 1e-9)
    Presenter().show_image(comb,
Beispiel #4
0
from pykeyboard import PyKeyboardEvent

from drones.airsim_interface.rate import Rate
from data_io.instructions import get_all_instructions
from pomdp.pomdp_interface import PomdpInterface
from visualization import Presenter

from parameters.parameter_server import initialize_experiment, get_current_parameters
from utils.keyboard import KeyTeleop

initialize_experiment()

teleoper = KeyTeleop()
rate = Rate(0.1)

env = PomdpInterface(is_real=get_current_parameters()["Setup"]["real_drone"])

train_instructions, dev_instructions, test_instructions, _ = get_all_instructions(
)

count = 0
stuck_count = 0


def show_depth(image):
    grayscale = np.mean(image[:, :, 0:3], axis=2)
    depth = image[:, :, 3]
    comb = np.stack([grayscale, grayscale, depth], axis=2)
    comb -= comb.min()
    comb /= (comb.max() + 1e-9)
    Presenter().show_image(comb,
Beispiel #5
0
def interactive_demo():

    P.initialize_experiment()
    InteractAPI.launch_ui()

    rate = Rate(0.1)

    env = PomdpInterface(
        is_real=get_current_parameters()["Setup"]["real_drone"])
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
    )
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    # Run on dev set
    interact_instructions = dev_instructions

    env_range_start = get_current_parameters()["Setup"].get(
        "env_range_start", 0)
    env_range_end = get_current_parameters()["Setup"].get(
        "env_range_end", 10e10)
    interact_instructions = {
        k: v
        for k, v in interact_instructions.items()
        if env_range_start < k < env_range_end
    }

    count = 0
    stuck_count = 0

    model, _ = load_model(get_current_parameters()["Setup"]["model"])

    InteractAPI.write_empty_instruction()
    InteractAPI.write_real_instruction("None")
    instruction_str = InteractAPI.read_instruction_file()
    print("Initial instruction: ", instruction_str)

    for instruction_sets in interact_instructions.values():
        for set_idx, instruction_set in enumerate(instruction_sets):
            env_id = instruction_set['env']
            env.set_environment(env_id, instruction_set["instructions"])

            presenter = Presenter()
            cumulative_reward = 0
            for seg_idx in range(len(instruction_set["instructions"])):

                print(f"RUNNING ENV {env_id} SEG {seg_idx}")

                real_instruction_str = instruction_set["instructions"][
                    seg_idx]["instruction"]
                InteractAPI.write_real_instruction(real_instruction_str)
                valid_segment = env.set_current_segment(seg_idx)
                if not valid_segment:
                    continue
                state = env.reset(seg_idx)

                keep_going = True
                while keep_going:
                    InteractAPI.write_real_instruction(real_instruction_str)

                    while True:
                        cv2.waitKey(200)
                        instruction = InteractAPI.read_instruction_file()
                        if instruction == "CMD: Next":
                            print("Advancing")
                            keep_going = False
                            InteractAPI.write_empty_instruction()
                            break
                        elif instruction == "CMD: Reset":
                            print("Resetting")
                            env.reset(seg_idx)
                            InteractAPI.write_empty_instruction()
                        elif len(instruction.split(" ")) > 1:
                            instruction_str = instruction
                            break

                    if not keep_going:
                        continue

                    env.override_instruction(instruction_str)
                    tok_instruction = tokenize_instruction(
                        instruction_str, word2token)

                    state = env.reset(seg_idx)
                    print("Executing: f{instruction_str}")
                    while True:
                        rate.sleep()
                        action, internals = model.get_action(
                            state, tok_instruction)

                        state, reward, done, expired, oob = env.step(action)
                        cumulative_reward += reward
                        presenter.show_sample(state, action, reward,
                                              cumulative_reward,
                                              instruction_str)
                        #show_depth(state.image)
                        if done:
                            break
                    InteractAPI.write_empty_instruction()
                    print("Segment finished!")
        print("Env finished!")
def train_top_down_pred():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]
    launch_ui()

    env = PomdpInterface()

    print("model_name:", setup["top_down_model"])
    print("model_file:", setup["top_down_model_file"])

    model, model_loaded = load_model(
        model_name_override=setup["top_down_model"],
        model_file_override=setup["top_down_model_file"])

    exec_model, wrapper_model_loaded = load_model(
        model_name_override=setup["wrapper_model"],
        model_file_override=setup["wrapper_model_file"])

    affine2d = Affine2D()
    if model.is_cuda:
        affine2d.cuda()

    eval_envs = get_correct_eval_env_id_list()
    print("eval_envs:", eval_envs)
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
        max_size=setup["max_envs"])
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    dataset = model.get_dataset(envs=eval_envs,
                                dataset_name="supervised",
                                eval=True,
                                seg_level=False)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            pin_memory=True)

    for b, batch in list(enumerate(dataloader)):
        print("batch:", batch)
        images = batch["images"]
        instructions = batch["instr"]
        label_masks = batch["traj_labels"]
        affines = batch["affines_g_to_s"]
        env_ids = batch["env_id"]
        set_idxs = batch["set_idx"]
        seg_idxs = batch["seg_idx"]

        env_id = env_ids[0][0]
        set_idx = set_idxs[0][0]
        print("env_id of this batch:", env_id)
        env.set_environment(
            env_id, instruction_set=all_instr[env_id][set_idx]["instructions"])
        env.reset(0)

        num_segments = len(instructions[0])
        print("num_segments in this batch:", num_segments)
        write_instruction("")
        write_real_instruction("None")
        instruction_str = read_instruction_file()
        print("Initial instruction: ", instruction_str)

        # TODO: Reset model state here if we keep any temporal memory etc
        for s in range(num_segments):
            start_state = env.reset(s)
            keep_going = True
            real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0)
            tmp = list(real_instruction.data.cpu()[0].numpy())
            real_instruction_str = debug_untokenize_instruction(tmp)
            write_real_instruction(real_instruction_str)
            #write_instruction(real_instruction_str)
            #instruction_str = real_instruction_str

            image = cuda_var(images[0][s], setup["cuda"], 0)
            label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0)
            affine_g_to_s = affines[0][s]
            print("Your current environment:")
            with open(
                    "/storage/dxsun/unreal_config_nl/configs/configs/random_config_"
                    + str(env_id) + ".json") as fp:
                config = json.load(fp)
            print(config)
            while keep_going:
                write_real_instruction(real_instruction_str)

                while True:
                    cv2.waitKey(200)
                    instruction = read_instruction_file()
                    if instruction == "CMD: Next":
                        print("Advancing")
                        keep_going = False
                        write_empty_instruction()
                        break
                    elif instruction == "CMD: Reset":
                        print("Resetting")
                        env.reset(s)
                        write_empty_instruction()
                    elif len(instruction.split(" ")) > 1:
                        instruction_str = instruction
                        print("Executing: ", instruction_str)
                        break

                if not keep_going:
                    continue

                #instruction_str = read_instruction_file()
                # TODO: Load instruction from file
                tok_instruction = tokenize_instruction(instruction_str,
                                                       word2token)
                instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0)
                instruction_v = cuda_var(instruction_t, setup["cuda"], 0)
                instruction_mask = torch.ones_like(instruction_v)
                tmp = list(instruction_t[0].numpy())
                instruction_dbg_str = debug_untokenize_instruction(
                    tmp, token2term)

                # import matplotlib.pyplot as plt
                #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy())
                #plt.show()

                res = model(image, instruction_v, instruction_mask)
                mask_pred = res[0]
                shp = mask_pred.shape
                mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp)
                #mask_pred = softmax2d(mask_pred)

                # TODO: Rotate the mask_pred to the global frame
                affine_s_to_g = np.linalg.inv(affine_g_to_s)
                S = 8.0
                affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]])
                affine_scale_down = np.linalg.inv(affine_scale_up)

                affine_pred_to_g = np.dot(
                    affine_scale_down, np.dot(affine_s_to_g, affine_scale_up))
                #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float()

                mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose(
                    1, 2, 0)
                mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g,
                                              32, 32)
                print("Sum of global mask: ", mask_pred_g_np.sum())
                mask_pred_g = torch.from_numpy(
                    mask_pred_g_np.transpose(2, 0,
                                             1)).float()[np.newaxis, :, :, :]
                exec_model.set_ground_truth_visitation_d(mask_pred_g)

                # Create a batch axis for pytorch
                #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :])

                mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min()
                mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9)
                mask_pred_np[:, :, 0] *= 2.0
                mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min()
                mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9)

                presenter = Presenter()
                presenter.show_image(mask_pred_g_np,
                                     "mask_pred_g",
                                     torch=False,
                                     waitkey=1,
                                     scale=4)
                #import matplotlib.pyplot as plt
                #print("image.data shape:", image.data.cpu().numpy().shape)
                #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy())
                #plt.show()
                # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4)
                #import pdb; pdb.set_trace()
                pred_viz_np = presenter.overlaid_image(image.data,
                                                       mask_pred_np,
                                                       channel=0)
                # TODO: Don't show labels
                # TODO: OpenCV colours
                #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0)
                labl_viz_np = presenter.overlaid_image(image.data,
                                                       label_mask.data,
                                                       channel=0)
                viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1)
                viz_img_np = pred_viz_np

                viz_img = presenter.overlay_text(viz_img_np,
                                                 instruction_dbg_str)
                cv2.imshow("interactive viz", viz_img)
                cv2.waitKey(100)

                rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s],
                              seg_idxs[0][s], tok_instruction)
                write_instruction("")
Beispiel #7
0
    def roll_out_policy(self, params):
        """
        Given the provided rollout parameters, spawn a simulator instance and execute the specified policy on all
        environments specified in params.setEnvIds.

        Awful function that really needs to be simplified.
        A lot of the code is simply checking various error conditions, because the data has issues, and logging the outcome.
        The actual rollout is a very small part of the code.
        :param params: RollOutParams instance defining the parameters of the rollout
        :return: Aggregated dataset with images, states and oracle actions.
        If params.isSegmentLevel(), the returned dataset will be a list (over environments) of samples
        otherwise it will be a list (over environments) of lists (over segments) of samples
        """

        if params.isDebug():
            run_metadata.WRITE_DEBUG_DATA = True

        dataset = []
        try:
            # Load the neural network policy from file
            # We can't just pass a neural network into this function, because it can't be pickled
            params.loadPolicy()
            assert params.hasPolicy()

            self.env = PomdpInterface(instance_id=self.instance_id,
                                      is_real=params.real_drone)

            all_env_ids, all_instructions, corpus, token2term, self.word2token = self.load_all_envs(
            )
            env_ids = params.envs  # if params.envs is not None else all_env_ids
            seg_indices = params.seg_list
            custom_instructions = params.custom_instructions

            # Filter out the envs that are not in all_instructions (we don't have instructions available for them)
            valid_env_ids = [i for i in env_ids if i in all_instructions]

            count = 0

            # Loop through environments
            for i, env_id in enumerate(valid_env_ids):
                #print ("Rolling out on env: " + str(env_id))
                # Loop through all non-empty sets of instructions for each pomdp
                instruction_sets = [
                    s for s in all_instructions[env_id] if len(s) > 0
                ]

                if len(instruction_sets) == 0:
                    print("No instruction sets for env: " + str(env_id))

                for j, instructions_set in enumerate(instruction_sets):
                    count += 1
                    try:
                        seg_id = seg_indices[
                            i] if seg_indices is not None else None
                        custom_instr = custom_instructions[
                            i] if custom_instructions is not None else None
                        import rollout.run_metadata as md
                        md.CUSTOM_INSTR_NO = i
                        # TODO: Check if this works!
                        dataset.append(
                            self.roll_out_on_env(params, instructions_set, j,
                                                 seg_id, custom_instr))
                        #log("Path finished!")
                        DebugWriter().commit()

                        if params.isRealDrone():
                            break

                    except Exception as e:
                        import traceback
                        from utils.colors import print_error
                        print_error("Error encountered during policy rollout!")
                        print_error(e)
                        print_error(traceback.format_exc())
                        continue

        except Exception as e:
            import traceback
            from utils.colors import print_error
            print_error("Error encountered during policy rollout!")
            print_error(e)
            print_error(traceback.format_exc())

        self.env.land()

        return dataset
Beispiel #8
0
def automatic_demo():

    P.initialize_experiment()
    instruction_display = InstructionDisplay()

    rate = Rate(0.1)

    env = PomdpInterface(
        is_real=get_current_parameters()["Setup"]["real_drone"])
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
    )
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    # Run on dev set
    interact_instructions = dev_instructions

    env_range_start = get_current_parameters()["Setup"].get(
        "env_range_start", 0)
    env_range_end = get_current_parameters()["Setup"].get(
        "env_range_end", 10e10)
    interact_instructions = {
        k: v
        for k, v in interact_instructions.items()
        if env_range_start < k < env_range_end
    }

    model, _ = load_model(get_current_parameters()["Setup"]["model"])

    # Loop over the select few examples
    while True:

        for instruction_sets in interact_instructions.values():
            for set_idx, instruction_set in enumerate(instruction_sets):
                env_id = instruction_set['env']
                found_example = None
                for example in examples:
                    if example[0] == env_id:
                        found_example = example
                if found_example is None:
                    continue
                env.set_environment(env_id, instruction_set["instructions"])

                presenter = Presenter()
                cumulative_reward = 0
                for seg_idx in range(len(instruction_set["instructions"])):
                    if seg_idx != found_example[2]:
                        continue

                    print(f"RUNNING ENV {env_id} SEG {seg_idx}")

                    real_instruction_str = instruction_set["instructions"][
                        seg_idx]["instruction"]
                    instruction_display.show_instruction(real_instruction_str)
                    valid_segment = env.set_current_segment(seg_idx)
                    if not valid_segment:
                        continue
                    state = env.reset(seg_idx)

                    for i in range(START_PAUSE):
                        instruction_display.tick()
                        time.sleep(1)

                        tok_instruction = tokenize_instruction(
                            real_instruction_str, word2token)

                    state = env.reset(seg_idx)
                    print("Executing: f{instruction_str}")
                    while True:
                        instruction_display.tick()
                        rate.sleep()
                        action, internals = model.get_action(
                            state, tok_instruction)
                        state, reward, done, expired, oob = env.step(action)
                        cumulative_reward += reward
                        #presenter.show_sample(state, action, reward, cumulative_reward, real_instruction_str)
                        #show_depth(state.image)
                        if done:
                            break

                    for i in range(END_PAUSE):
                        instruction_display.tick()
                        time.sleep(1)
                        print("Segment finished!")
                    instruction_display.show_instruction("...")

            print("Env finished!")