Exemple #1
0
def check_gpu_usage_and_restart_env(env, nav_env):
    command = "nvidia-smi --query-gpu=memory.free --format=csv"
    out = subprocess.check_output(
        shlex.split(command)).decode("utf-8").split("\n")
    mb_remain = int(out[1].split()[0])
    if mb_remain < 256:
        env.controller.end_scene(None, None)
        new_env = McsEnv()
        new_nav_env = McsNavWrapper(new_env)
    else:
        new_env = env
        new_nav_env = nav_env
    return new_env, new_nav_env
Exemple #2
0
def train(rank, args, shared_model, counter, lock, optimizer):
    torch.manual_seed(args.seed + rank)

    env = McsEnv(seed=args.seed + rank, task="interaction_scenes", scene_type="traversal")
    nav_env, navigator, model, _, _ = get_model_from_task(env, args.task)
    nav_env.reset(random_init=True)
    set_object_goal(navigator, env.scene_config)

    model = model.to(args.device)
    model.train()

    state = navigator.get_observation(nav_env.step_output)
    done = True

    # monitoring
    total_reward_for_num_steps_list = []
    episode_total_rewards_list = []
    avg_reward_for_num_steps_list = []

    total_length = 0
    episode_length = 0
    n_episode = 0
    all_rewards_in_episode = []
    while True:
        # Sync with the shared model

        model.load_state_dict(shared_model.state_dict())

        done_mask = torch.zeros(size=(1,1)).to(args.device)
        undone_mask = torch.ones(size=(1,1)).to(args.device)

        if done:
            rnn_hidden_states = torch.zeros(size=(model.net.num_recurrent_layers, 1, 512)).to(args.device)
            prev_action = torch.zeros(1, 1).to(args.device)
            mask = done_mask
        else:
            rnn_hidden_states = rnn_hidden_states.detach()

        values = []
        log_probs = []
        rewards = []
        entropies = []

        for step in range(args.num_steps):
            episode_length += 1
            total_length += 1

            batch = batch_obs(state, args.device)
            value, action, action_log_probs, rnn_hidden_states = model.act(batch, rnn_hidden_states, prev_action, mask)
            # torch.cuda.empty_cache()

            prev_action.copy_(action)
            mask = undone_mask

            entropies.append(-action_log_probs * torch.exp(action_log_probs))
            log_probs.append(action_log_probs)

            action_int = action.cpu().numpy()[0][0].item()

            reward, done = navigator.navigation_step_with_reward(
                nav_env, action_int, episode_length >= args.max_episode_length
            )
            state = navigator.get_observation(nav_env.step_output)

            values.append(value)
            rewards.append(reward)
            all_rewards_in_episode.append(reward)

            with lock:
                counter.value += 1

            if done:
                total_length -= 1
                total_reward_for_episode = sum(all_rewards_in_episode)
                episode_total_rewards_list.append(total_reward_for_episode)
                all_rewards_in_episode = []
                episode_success = (reward == 9.99)
                print('Process {} Episode {} Over with Length: {} and Reward: {: .3f}, Success: {}. Total Trained Length: {}'.format(
                    rank, n_episode, episode_length, total_reward_for_episode, episode_success, total_length))

                # if args.device != "cpu:":
                #     env, nav_env = check_gpu_usage_and_restart_env(env, nav_env)
                if episode_success:
                    nav_env.reset(random_init=True)
                else:
                    nav_env.reset(random_init=True, repeat_current=False)
                set_object_goal(navigator, env.scene_config)
                state = navigator.get_observation(nav_env.step_output)
                sys.stdout.flush()
                episode_length = 0
                n_episode += 1
                break


        total_reward_for_num_steps = sum(rewards)
        total_reward_for_num_steps_list.append(total_reward_for_num_steps)
        avg_reward_for_num_steps = total_reward_for_num_steps / len(rewards)
        avg_reward_for_num_steps_list.append(avg_reward_for_num_steps)

        # Backprop and optimisation
        R = torch.zeros(1, 1).to(args.device)
        gae = torch.zeros(1, 1).to(args.device)
        batch = batch_obs(state, args.device)

        if not done:  # to change last reward to predicted value to ....
            value, _, _, _ = model.act(batch, rnn_hidden_states, prev_action, mask)
            R = value.detach()

        values.append(R)
        policy_loss = 0
        value_loss = 0
        # import pdb;pdb.set_trace() # good place to breakpoint to see training cycle

        for i in reversed(range(len(rewards))):
            R = args.gamma * R + rewards[i]
            advantage = R - values[i]
            value_loss = value_loss + 0.5 * advantage.pow(2)

            # Generalized Advantage Estimation
            delta_t = rewards[i] + args.gamma * values[i + 1] - values[i]
            gae = gae * args.gamma * args.tau + delta_t

            policy_loss = policy_loss - log_probs[i] * gae.detach() - \
                          args.entropy_coef * entropies[i]

        optimizer.zero_grad()

        (policy_loss + args.value_loss_coef * value_loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

        ensure_shared_grads(model, shared_model)
        optimizer.step()
from gym_ai2thor.envs.mcs_env import McsEnv
from meta_ontroller.meta_controller import MetaController
import sys


if __name__ == "__main__":
    env = McsEnv(task="interaction_scenes", scene_type="transferral", start_scene_number=2)
    metaController = MetaController(env)

    while env.current_scene < len(env.all_scenes) - 1:
        env.reset()
        result = metaController.excecute()
        sys.stdout.flush()



Exemple #4
0
from gym_ai2thor.envs.mcs_env import McsEnv
from int_phy.scene_state import SceneState
import matplotlib.pyplot as plt

scene_name = "object_permanence"
start_scene_number = 3
env = McsEnv(task="intphys_scenes",
             scene_type=scene_name,
             start_scene_number=start_scene_number)

object_states = []

for _ in range(1):
    env.reset(random_init=False)
    # print(env.current_scene, env.scene_config['answer'], len(env.scene_config['goal']['action_list']))
    scene_state = None
    for i, x in enumerate(env.scene_config['goal']['action_list']):
        # print(i)
        if i == 0:
            scene_state = SceneState(env.step_output)
        else:
            scene_state.update(env.step_output)
        env.step(action=x[0])

    object_states.append(scene_state)
    env.controller.end_scene(None, None)

for i, scene_state in enumerate(object_states):
    plt.figure()
    for j, (id, obj_state) in enumerate(scene_state.object_state_dict.items()):
        v_xs = [v[0] for v in obj_state.velocity_history]
Exemple #5
0
SHAPE_TYPES = ["cylinder", "sphere", "cube"]

def set_scale(config, scale):
    config['objects'][0]['shows'][0]['scale']['x'] = scale
    config['objects'][0]['shows'][0]['scale']['y'] = scale
    config['objects'][0]['shows'][0]['scale']['z'] = scale


if __name__ == "__main__":
    scene_name = "github_scenes" + "/collect_object_shape_data"

    for _, shape_type in enumerate(SHAPE_TYPES):
        print("Scene: {}".format(shape_type))
        os.makedirs(os.path.join("appearance", "object_mask_frame", shape_type), exist_ok=True)
        env = McsEnv(task="intphys_scenes", scene_type=scene_name)
        env.reset(random_init=False)
        env.scene_config['objects'][0]['type'] = shape_type
        env.step_output = env.controller.start_scene(env.scene_config)

        object_frames = []
        for scale in [0.2 + 0.1*i for i in range(10)]:
            set_scale(env.scene_config, scale)
            env.step_output = env.controller.start_scene(env.scene_config)
            for i, action in enumerate(env.scene_config['goal']['action_list']):
                env.step(action=action[0])
                assert len(env.step_output.object_list) <= 1
                if len(env.step_output.object_list) == 1:
                    obj_state = ObjectState(
                        env.step_output.object_list[0], env.step_output.depth_mask_list[-1], env.step_output.object_mask_list[-1]
                    )
from gym_ai2thor.envs.mcs_env import McsEnv
from meta_ontroller.meta_controller import MetaController
import sys
from copy import deepcopy
import json
import os
from planner.ff_planner_handler import PlanParser

if __name__ == "__main__":
    env = McsEnv(task="interaction_scenes", scene_type="transferral")
    # env = McsEnv(task="searchObjeInReceptacletraining")
    env.reset()
    # metaController = MetaController(env)
    # result = metaController.excecute()
    # exit(0)

    while env.current_scence <= len(env.all_scenes):
        print(env.current_scence)
        metaController = MetaController(env)
        meta_stage = 0
        search_cnt = 0
        while True:
            print("Meta-Stage: {}".format(meta_stage))
            result_plan = metaController.plan_on_current_state()
            for plan in result_plan:
                print(plan)
                break
            if result_plan[0]['action'] == "LookForObjectInReceptacle":
                new_config = deepcopy(metaController.env.scene_config)
                new_config['performerStart']['position'] = {
                    "x": metaController.env.step_output.position['x'],
from gym_ai2thor.envs.mcs_env import McsEnv
from meta_ontroller.meta_controller import MetaController
import sys

if __name__ == "__main__":
    env = McsEnv(task="playroom", scene_type=None, start_scene_number=0)
    metaController = MetaController(env)

    while env.current_scene < len(env.all_scenes) - 1:
        env.reset()
        result = metaController.excecute(replan=False)
        sys.stdout.flush()
Exemple #8
0
from gym_ai2thor.envs.mcs_env import McsEnv
from locomotion.network import Position_Embbedding_Network, HIDDEN_STATE_SIZE, NUM_HIDDEN_LAYER
from locomotion.train import MODEL_SAVE_DIR
from int_phy_recollect_position import get_locomotion_feature
import matplotlib.pyplot as plt
import torch
import os

scene_name = "object_permanence"
start_scene_number = 0
env = McsEnv(task="intphys_scenes",
             scene_type=scene_name,
             start_scene_number=start_scene_number)

net = Position_Embbedding_Network()
net.eval()
net.load_state_dict(
    torch.load(
        os.path.join(MODEL_SAVE_DIR,
                     "model_{}layerGRU.pth".format(NUM_HIDDEN_LAYER))))

colors = ['ob', 'og', 'or', 'oc']

for _ in range(30):
    env.reset(random_init=False)
    n_object_hidden_state = {
        obj['id']: []
        for obj in env.scene_config['objects'] if "occluder" not in obj['id']
    }
    obj_seen = {
        obj['id']: False
Exemple #9
0
                                exist_ok=True)
                else:
                    os.makedirs(os.path.join(DATA_SAVE_DIR, "without_occluder",
                                             shape_type, scene_type),
                                exist_ok=True)

        object_locomotions = {}
        start_scene_number = 0
        for n_restart in range(N_RESTART):

            object_locomotions = {}
            for _, shape_type in enumerate(SHAPE_TYPES):
                object_locomotions[shape_type] = []

            env = McsEnv(task="intphys_scenes/validation_intphys_scenes_true",
                         scene_type=scene_type,
                         start_scene_number=start_scene_number)
            start_scene_number += SAVE_SCENE_LENGTH
            for _ in range(SAVE_SCENE_LENGTH):
                env.reset(random_init=False)
                if len(env.scene_config['goal']['action_list']) != 40:
                    continue
                env_new_objects = []
                env_occluders = []
                env_ramps = []
                for obj in env.scene_config['objects']:
                    if "occluder" in obj['id']:
                        env_occluders.append(obj)
                        continue
                    if "ramp" in obj['id']:
                        env_ramps.append(obj)
        self.episode['obs'] = self.episode['obs'][-10:]
        self.episode['action'] = self.episode['action'][-10:]
        self.episode['next_obs'] = self.episode['next_obs'][-10:]


    @staticmethod
    def preprocess(img):
        img = img.resize((50, 50), Image.ANTIALIAS)
        return np.transpose(np.array(img), (2, 0, 1)) / 255




if __name__ == "__main__":
    import time
    env = McsEnv()

    domain_file = "planner/domains/Playroom_domain.pddl"
    facts_file = "planner/sample_problems/playroom_facts.pddl"

    parser = PlanParser()
    replay_buffer = []
    metaController = MetaController(env)
    episode = 0
    while episode < 100:
        print("Episode: {}".format(episode))
        env.reset()
        PlanParser.scene_config_to_pddl(env.scene_config, random_pick_up(env.scene_config), facts_file)
        result_plan = parser.get_plan_from_file(domain_file, facts_file)
        epsd_collector = Episode_collector()
        for action in result_plan:
Exemple #11
0
def test(rank, args, shared_model, counter):
    torch.manual_seed(args.seed + rank)
    env = McsEnv(seed=args.seed + rank,
                 task="interaction_scenes",
                 scene_type="traversal")
    nav_env, navigator, model = get_model_from_task(env, args.task)
    nav_env.reset(random_init=True)
    set_object_goal(navigator, env.scene_config)

    model = model.to(args.device)
    model.eval()

    state = navigator.get_observation(nav_env.step_output)
    reward_sum = 0
    done = True

    save = 'steps{}-process{}-lr{}-entropy_coef{}-max_grad_norm{}'.format(
        args.num_steps, args.num_processes, args.lr, args.entropy_coef,
        args.max_grad_norm)
    save = os.path.join('logs', save)
    os.makedirs(save, exist_ok=True)

    logger = CSVLogger(os.path.join(save, 'test.csv'))
    fileds = ['episode_success_rate', 'frames_rendered']
    logger.log(fileds)

    start_time = time.time()

    episode_length = 0
    ckpt_counter = 0
    n_test_episode = 40
    while True:
        success_cnt = 0
        for _ in range(n_test_episode):
            while True:
                done_mask = torch.zeros(size=(1, 1)).to(args.device)
                undone_mask = torch.ones(size=(1, 1)).to(args.device)
                episode_length += 1
                # Sync with the shared model
                if done:
                    model.load_state_dict(deepcopy(shared_model.state_dict()))
                    rnn_hidden_states = torch.zeros(
                        size=(model.net.num_recurrent_layers, 1,
                              512)).to(args.device)
                    prev_action = torch.zeros(1, 1).to(args.device)
                    mask = done_mask
                else:
                    rnn_hidden_states = rnn_hidden_states.detach()

                with torch.no_grad():
                    batch = batch_obs(state, args.device)
                    value, action, action_log_probs, rnn_hidden_states = model.act(
                        batch, rnn_hidden_states, prev_action, mask)
                    # torch.cuda.empty_cache()

                prev_action.copy_(action)
                mask = undone_mask

                action_int = action.cpu().numpy()[0][0].item()
                reward, done = navigator.navigation_step_with_reward(
                    nav_env, action_int,
                    episode_length >= args.max_episode_length)
                state = navigator.get_observation(nav_env.step_output)
                reward_sum += reward

                if done:
                    episode_success = (reward == 9.99)
                    if episode_success:
                        success_cnt += 1
                    print(
                        "Time {}, num steps over all threads {}, FPS {:.0f}, episode reward {: .3f}, success {}, episode length {}"
                        .format(
                            time.strftime(
                                "%Hh %Mm %Ss",
                                time.gmtime(time.time() - start_time)),
                            counter.value,
                            counter.value / (time.time() - start_time),
                            reward_sum, episode_success, episode_length))
                    # if args.device != "cpu:":
                    #     env, nav_env = check_gpu_usage_and_restart_env(env, nav_env)

                    reward_sum = 0
                    episode_length = 0
                    nav_env.reset(random_init=True)
                    set_object_goal(navigator, env.scene_config)
                    state = navigator.get_observation(nav_env.step_output)
                    break

        torch.save(model.state_dict(),
                   os.path.join(save, "ckpt{}.pth".format(ckpt_counter)))
        logger.log(
            ["{: .2f}".format(success_cnt / n_test_episode), counter.value])
        time.sleep(args.test_sleep_time)
        ckpt_counter += 1
        if ckpt_counter == 48 * 2:
            env.controller.end_scene(None, None)
            logger.close()
            break
    return math.sqrt(x**2 + y**2)


# scene_name = "github_scenes/spatio_temporal_continuity/implausible"
scene_name = "object_permanence"

net = Position_Embbedding_Network()
net.eval()
net.load_state_dict(
    torch.load(
        os.path.join(MODEL_SAVE_DIR,
                     "model_{}_hidden_state.pth".format(HIDDEN_STATE_SIZE))))

start_scene_number = 0
env_1 = McsEnv(task="intphys_scenes",
               scene_type=scene_name,
               start_scene_number=start_scene_number)
for _ in range(10):

    env_1.reset(random_init=False)
    env_new_objects = []
    env_occluders = []
    for obj in env_1.scene_config['objects']:
        if "occluder" not in obj['id']:
            env_new_objects.append(obj)
        else:
            env_occluders.append(obj)

    for one_obj in env_new_objects:
        plt.figure(figsize=(6, 4))
        plt.xlim((-5, 5))
Exemple #13
0
if __name__ == '__main__':
    mp.set_start_method("spawn")
    os.environ['OMP_NUM_THREADS'] = '1'
    # os.environ['CUDA_VISIBLE_DEVICES'] = ""

    args = parser.parse_args()
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    if args.cuda:
        print('Using', torch.cuda.get_device_name(0))
        torch.cuda.init()
        args.device = "cuda:0"
    else:
        args.device = "cpu"

    torch.manual_seed(args.seed)
    env = McsEnv()

    _, _, model, train_fun, test_fun = get_model_from_task(env, args.task)
    shared_model = model

    # if args.model:
    #     print("{} loaded".format(args.model))
    #     shared_model.load_state_dict(torch.load(os.path.join(os.getcwd(), args.model)))

    if args.cuda:
        shared_model = shared_model.cuda()
    shared_model.share_memory()

    # env.controller.end_scene(None, None)  # above env initialisation was only to find certain params needed

    optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr)
Exemple #14
0
import matplotlib.pyplot as plt
from pathfinding.core.grid import Grid
from pathfinding.finder.a_star import AStarFinder
from pathfinding.core.diagonal_movement import DiagonalMovement
from agent_util import *
from gym_ai2thor.envs.mcs_env import McsEnv
from meta_controller.meta_controller import MetaController
import sys
from frame_collector import Frame_collector

if __name__ == "__main__":
    collector = Frame_collector(scene_dir="intphy_task_img",
                                start_scene_number=0)
    env = McsEnv(task="eval3_dataset",
                 scene_type="agent_obj_preference",
                 seed=50,
                 start_scene_number=0,
                 frame_collector=collector,
                 set_trophy=False)

    # Assumes that these videos are of equal length/frames!
    # mask_cap = cv2.VideoCapture('test_single/50_mask.mkv')
    # color_cap = cv2.VideoCapture('test_single/50_color.mkv')

    M_wall, M_gnd = get_homographies()

    def step(cam_im, mask_im, info, first_frame=False):

        gnd_mask = gnd_trans(mask_im, M_gnd)
        gnd_rgb = gnd_trans(cam_im, M_gnd)

        trans_im = wall_trans(cam_im, M_wall)
if __name__ == "__main__":
    for scene_type in SCENE_TYPES:
        for _, shape_type in enumerate(SHAPE_TYPES):
            os.makedirs(os.path.join(DATA_SAVE_DIR, "ground", shape_type,
                                     scene_type),
                        exist_ok=True)

        object_locomotions = {}
        start_scene_number = 0
        for n_restart in range(N_RESTART):
            object_locomotions = {}
            for _, shape_type in enumerate(SHAPE_TYPES):
                object_locomotions[shape_type] = []

            env = McsEnv(task="intphys_scenes",
                         scene_type=scene_type,
                         start_scene_number=start_scene_number)
            start_scene_number += SAVE_SCENE_LENGTH
            for _ in range(SAVE_SCENE_LENGTH):
                env.reset(random_init=False)
                env_new_objects = []
                env_occluders = []
                env_ramps = []
                for obj in env.scene_config['objects']:
                    if "occluder" in obj['id']:
                        env_occluders.append(obj)
                        continue
                    if "ramp" in obj['id']:
                        env_ramps.append(obj)
                        continue
                    if obj['type'] in SHAPE_TYPES: