def write_to_file():
    out_file_train = open(
        "youtube_scrape/urls_" + misc_util.get_time_str() +
        "_relevance_train.csv", "w")
    out_file_val = open(
        "youtube_scrape/urls_" + misc_util.get_time_str() +
        "_relevance_val.csv", "w")
    vid_ids = {}
    num_add_attempt = 0
    while True:
        search_tuple, urls = queue.get()
        if urls is None:
            break
        if len(urls) == 0:
            continue
        for url in urls:
            num_add_attempt += 1
            if url not in vid_ids:
                vid_ids[url] = []
            vid_ids[url].append(": ".join(search_tuple))
    print("num total", num_add_attempt, "num final", len(vid_ids))
    sorted_vid_ids = sorted(vid_ids.keys())
    random.shuffle(sorted_vid_ids)
    lines_val = sorted_vid_ids[:65536]
    lines_train = sorted_vid_ids[65536:]
    lines_val.sort()
    lines_train.sort()

    lines_train = [
        '"' + key + '", "' + '", "'.join(vid_ids[key]) + '"'
        for key in lines_train
    ]
    lines_val = [
        '"' + key + '", "' + '", "'.join(vid_ids[key]) + '"'
        for key in lines_val
    ]
    out_file_train.write("\n".join(lines_train) + "\n")
    out_file_train.close()
    out_file_val.write("\n".join(lines_val) + "\n")
    out_file_val.close()
def main():
    args = parser.parse_args()
    args.tensorboard = not args.no_tensorboard
    args.load_model = not args.clear_weights
    args.save_checkpoints = not args.no_save_checkpoints

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn(
            "You have chosen to seed training. "
            "This will turn on the CUDNN deterministic setting, "
            "which can slow down your training considerably! "
            "You may see unexpected behavior when restarting "
            "from checkpoints."
        )

    log_prefix = args.log_prefix
    time_str = misc_util.get_time_str()
    checkpoint_dir = os.path.join(log_prefix, args.checkpoint_dirname, time_str)

    torch_devices = [int(gpu_id.strip()) for gpu_id in args.pytorch_gpu_ids.split(",")]
    args.gpu = torch_devices[0]
    device = "cuda:" + str(torch_devices[0])

    model = ImagenetModel()
    model = pt_util.get_data_parallel(model, torch_devices)
    model.to(device)

    start_iter = 0
    if args.load_model:
        start_iter = pt_util.restore_from_folder(model, os.path.join(log_prefix, args.checkpoint_dirname, "*"))
    args.start_epoch = start_iter

    train_logger = None
    test_logger = None
    if args.tensorboard:
        train_logger = tensorboard_logger.Logger(
            os.path.join(log_prefix, args.tensorboard_dirname, time_str + "_train")
        )
        test_logger = tensorboard_logger.Logger(os.path.join(log_prefix, args.tensorboard_dirname, time_str + "_test"))

    main_worker(model, args.gpu, args, train_logger, test_logger, checkpoint_dir)
Exemple #3
0
    def setup(self, create_decoder):
        self.setup_device()
        render_gpus = [
            int(gpu_id.strip())
            for gpu_id in self.shell_args.render_gpu_ids.split(",")
        ]
        self.configs = []
        self.env_types = []
        for proc in range(self.shell_args.num_processes):
            extra_task_sensors = set()

            extra_agent_sensors = set()
            if self.shell_args.record_video or self.shell_args.update_encoder_features:
                extra_agent_sensors.add("DEPTH_SENSOR")

            if "SEMANTIC_SENSOR" in extra_agent_sensors:
                extra_task_sensors.append("CLASS_SEGMENTATION_SENSOR")

            if self.shell_args.dataset == "suncg":
                data_path = "data/datasets/pointnav/suncg/v1/{split}/{split}.json.gz"
            elif self.shell_args.dataset == "mp3d":
                data_path = "data/datasets/pointnav/mp3d/v1/{split}/{split}.json.gz"
            elif self.shell_args.dataset == "gibson":
                data_path = "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz"
            else:
                raise NotImplementedError("No rule for this dataset.")

            config = get_dataset_config(
                data_path,
                self.shell_args.data_subset,
                self.shell_args.max_episode_length,
                render_gpus[proc % len(render_gpus)],
                list(extra_task_sensors),
                list(extra_agent_sensors),
            )
            config.TASK.NUM_EPISODES_BEFORE_JUMP = self.shell_args.num_processes

            if self.shell_args.blind and not self.shell_args.record_video:
                config.SIMULATOR.RGB_SENSOR.HEIGHT = 2
                config.SIMULATOR.RGB_SENSOR.WIDTH = 2
            if self.shell_args.task == "pointnav":
                config.TASK.SUCCESS_REWARD = 2
                config.TASK.SUCCESS_DISTANCE = 0.2
                config.TASK.COLLISION_REWARD = 0
                config.TASK.ENABLE_STOP_ACTION = False
                if self.shell_args.task == "pointnav":
                    self.env_types.append(PointnavRLEnv)
            elif self.shell_args.task == "exploration":
                config.TASK.GRID_SIZE = 1
                assert config.TASK.GRID_SIZE >= config.SIMULATOR.FORWARD_STEP_SIZE
                config.TASK.NEW_GRID_CELL_REWARD = 0.1
                config.TASK.COLLISION_REWARD = 0  # -0.1
                config.TASK.RETURN_VISITED_GRID = self.shell_args.record_video
                config.ENVIRONMENT.MAX_EPISODE_STEPS = 250
                config.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = False
                self.env_types.append(ExplorationRLEnv)
                if self.shell_args.dataset == "suncg":
                    config.TASK.NUM_EPISODES_BEFORE_JUMP = 5
                else:
                    config.TASK.NUM_EPISODES_BEFORE_JUMP = 5
            elif self.shell_args.task == "flee":
                config.TASK.COLLISION_REWARD = 0  # -0.1
                config.ENVIRONMENT.MAX_EPISODE_STEPS = 250
                config.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = False
                self.env_types.append(RunAwayRLEnv)
                if self.shell_args.dataset == "suncg":
                    config.TASK.NUM_EPISODES_BEFORE_JUMP = 5
                else:
                    config.TASK.NUM_EPISODES_BEFORE_JUMP = 5
            else:
                raise NotImplementedError("Unknown task type")

            if self.shell_args.record_video:
                config.TASK.NUM_EPISODES_BEFORE_JUMP = -1
                config.TASK.STEP_SIZE = config.SIMULATOR.FORWARD_STEP_SIZE
                config.TASK.TOP_DOWN_MAP.MAX_EPISODE_STEPS = config.ENVIRONMENT.MAX_EPISODE_STEPS
                config.TASK.TOP_DOWN_MAP.MAP_RESOLUTION = 1250

            config.TASK.OBSERVE_BEST_NEXT_ACTION = self.shell_args.algo == "supervised"

            self.configs.append(config)
        if self.shell_args.debug:
            print("Config\n", self.configs[0])

        self.shell_args.cuda = not self.shell_args.no_cuda and torch.cuda.is_available(
        )

        if self.shell_args.blind:
            decoder_output_info = []
        else:
            decoder_output_info = [("reconstruction", 3), ("depth", 1),
                                   ("surface_normals", 3)]

        if self.shell_args.encoder_network_type == "ShallowVisualEncoder":
            encoder_type = networks.ShallowVisualEncoder
        elif self.shell_args.encoder_network_type == "ResNetEncoder":
            encoder_type = networks.ResNetEncoder
        else:
            raise NotImplementedError("Unknown network type.")

        self.gym_action_space = gym.spaces.discrete.Discrete(len(ACTION_SPACE))
        target_vector_size = None
        if self.shell_args.task == "pointnav":
            target_vector_size = 2
        elif self.shell_args.task == "exploration" or self.shell_args.task == "flee":
            target_vector_size = 0
        self.agent = VisualPolicy(
            self.gym_action_space,
            base=networks.RLBaseWithVisualEncoder,
            base_kwargs=dict(
                encoder_type=encoder_type,
                decoder_output_info=decoder_output_info,
                recurrent=True,
                end_to_end=self.shell_args.end_to_end,
                hidden_size=256,
                target_vector_size=target_vector_size,
                action_size=len(ACTION_SPACE),
                gpu_ids=self.torch_devices,
                create_decoder=create_decoder,
                blind=self.shell_args.blind,
            ),
        )

        if self.shell_args.debug:
            print("actor critic", self.agent)
        self.agent.to(self.device)
        self.time_str = misc_util.get_time_str()

        visual_layers = self.agent.base.visual_encoder.module
        if self.shell_args.freeze_encoder_features:
            # Not necessary, but probably lets pytorch be more space efficient.
            for param in visual_layers.encoder.parameters():
                param.requires_grad = False

        if self.shell_args.freeze_visual_decoder_features:
            if hasattr(visual_layers, "bridge"):
                for param in visual_layers.bridge.parameters():
                    param.requires_grad = False
            if hasattr(visual_layers, "decoder"):
                for param in visual_layers.decoder.parameters():
                    param.requires_grad = False
            if hasattr(visual_layers, "out"):
                for param in visual_layers.out.parameters():
                    param.requires_grad = False
            if hasattr(visual_layers, "class_pred_layer"):
                if visual_layers.class_pred_layer is not None:
                    for param in visual_layers.class_pred_layer.parameters():
                        param.requires_grad = False

        if self.shell_args.freeze_motion_decoder_features and self.shell_args.freeze_policy_decoder_features:
            for param in self.agent.base.visual_projection.parameters():
                param.requires_grad = False

        if self.shell_args.freeze_motion_decoder_features:
            for param in self.agent.base.egomotion_layer.parameters():
                param.requires_grad = False
            for param in self.agent.base.motion_model_layer.parameters():
                param.requires_grad = False

        if self.shell_args.freeze_policy_decoder_features:
            for param in self.agent.base.gru.parameters():
                param.requires_grad = False
            for param in self.agent.base.rl_layers.parameters():
                param.requires_grad = False
            for param in self.agent.base.critic_linear.parameters():
                param.requires_grad = False
            for param in self.agent.dist.parameters():
                param.requires_grad = False

        if self.shell_args.algo == "ppo":
            self.optimizer = optimizers.VisualPPO(
                self.agent,
                self.shell_args.clip_param,
                self.shell_args.ppo_epoch,
                self.shell_args.num_mini_batch,
                self.shell_args.value_loss_coef,
                self.shell_args.entropy_coef,
                lr=self.shell_args.lr,
                eps=self.shell_args.eps,
                max_grad_norm=self.shell_args.max_grad_norm,
            )
        elif self.shell_args.algo == "supervised":
            self.optimizer = optimizers.BehavioralCloningOptimizer(
                self.agent,
                self.shell_args.clip_param,
                self.shell_args.ppo_epoch,
                self.shell_args.num_mini_batch,
                self.shell_args.value_loss_coef,
                self.shell_args.entropy_coef,
                lr=self.shell_args.lr,
                eps=self.shell_args.eps,
            )
        else:
            raise NotImplementedError("No such algorithm")

        height = self.configs[0].SIMULATOR.RGB_SENSOR.HEIGHT
        width = self.configs[0].SIMULATOR.RGB_SENSOR.WIDTH
        self.observation_space = {
            "pointgoal": ((2, ), np.dtype(np.float32)),
            "prev_action_one_hot":
            ((len(ACTION_SPACE), ), np.dtype(np.float32)),
        }
        self.compute_surface_normals = self.shell_args.record_video or self.shell_args.update_encoder_features
        if self.shell_args.algo == "supervised":
            self.observation_space["best_next_action"] = ((
                len(ACTION_SPACE), ), np.dtype(np.float32))
        if self.shell_args.update_encoder_features:
            self.observation_space["depth"] = ((1, height, width),
                                               np.dtype(np.float32))
            if self.compute_surface_normals:
                self.observation_space["surface_normals"] = ((3, height,
                                                              width),
                                                             np.dtype(
                                                                 np.float32))
        if not self.shell_args.end_to_end:
            self.observation_space["visual_encoder_features"] = (
                (self.agent.base.num_output_channels, 256 // 2**5,
                 256 // 2**5),
                np.dtype(np.float32),
            )

        # Send dummy batch through to allocate memory before vecenv
        print("Feeding dummy batch")
        dummy_start = time.time()

        self.agent.act(
            {
                "images":
                torch.rand((
                    self.shell_args.num_processes,
                    3,
                    self.configs[0].SIMULATOR.RGB_SENSOR.HEIGHT,
                    self.configs[0].SIMULATOR.RGB_SENSOR.WIDTH,
                )).to(self.device),
                "target_vector":
                torch.rand(self.shell_args.num_processes,
                           target_vector_size).to(self.device),
                "prev_action_one_hot":
                torch.rand(self.shell_args.num_processes,
                           self.gym_action_space.n).to(self.device),
            },
            torch.rand(self.shell_args.num_processes,
                       self.agent.recurrent_hidden_state_size).to(self.device),
            torch.rand(self.shell_args.num_processes, 1).to(self.device),
        )
        print("Done feeding dummy batch %.3f" % (time.time() - dummy_start))
        self.start_iter = 0
        self.checkpoint_dir = os.path.join(self.shell_args.log_prefix,
                                           self.shell_args.checkpoint_dirname,
                                           self.time_str)
def main():
    torch_devices = [
        int(gpu_id.strip()) for gpu_id in args.pytorch_gpu_ids.split(",")
    ]
    render_gpus = [
        int(gpu_id.strip()) for gpu_id in args.render_gpu_ids.split(",")
    ]
    device = "cuda:" + str(torch_devices[0])

    decoder_output_info = [("reconstruction", 3), ("depth", 1),
                           ("surface_normals", 3)]
    if USE_SEMANTIC:
        decoder_output_info.append(("semantic", 41))

    model = ShallowVisualEncoder(decoder_output_info)
    model = pt_util.get_data_parallel(model, torch_devices)
    model = pt_util.DummyScope(model, ["base", "visual_encoder"])
    model.to(device)

    print("Model constructed")
    print(model)

    train_transforms = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(224)
    ])

    train_transforms_depth = transforms.Compose([
        PIL.Image.fromarray,
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(224), np.array
    ])

    train_transforms_semantic = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(224)
    ])

    sensors = ["RGB_SENSOR", "DEPTH_SENSOR"
               ] + (["SEMANTIC_SENSOR"] if USE_SEMANTIC else [])
    if args.dataset == "suncg":
        data_train = HabitatImageGenerator(
            render_gpus,
            "suncg",
            args.data_subset,
            "data/dumps/suncg/{split}/dataset_one_ep_per_scene.json.gz",
            images_before_reset=1000,
            sensors=sensors,
            transform=train_transforms,
            depth_transform=train_transforms_depth,
            semantic_transform=train_transforms_semantic,
        )
        print("Num train images", len(data_train))

        data_test = HabitatImageGenerator(
            render_gpus,
            "suncg",
            "val",
            "data/dumps/suncg/{split}/dataset_one_ep_per_scene.json.gz",
            images_before_reset=1000,
            sensors=sensors,
        )
    elif args.dataset == "mp3d":
        data_train = HabitatImageGenerator(
            render_gpus,
            "mp3d",
            args.data_subset,
            "data/dumps/mp3d/{split}/dataset_one_ep_per_scene.json.gz",
            images_before_reset=1000,
            sensors=sensors,
            transform=train_transforms,
            depth_transform=train_transforms_depth,
            semantic_transform=train_transforms_semantic,
        )
        print("Num train images", len(data_train))

        data_test = HabitatImageGenerator(
            render_gpus,
            "mp3d",
            "val",
            "data/dumps/mp3d/{split}/dataset_one_ep_per_scene.json.gz",
            images_before_reset=1000,
            sensors=sensors,
        )
    elif args.dataset == "gibson":
        data_train = HabitatImageGenerator(
            render_gpus,
            "gibson",
            args.data_subset,
            "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz",
            images_before_reset=1000,
            sensors=sensors,
            transform=train_transforms,
            depth_transform=train_transforms_depth,
            semantic_transform=train_transforms_semantic,
        )
        print("Num train images", len(data_train))

        data_test = HabitatImageGenerator(
            render_gpus,
            "gibson",
            "val",
            "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz",
            images_before_reset=1000,
            sensors=sensors,
        )
    else:
        raise NotImplementedError("No rule for this dataset.")

    print("Num train images", len(data_train))
    print("Num val images", len(data_test))

    print("Using device", device)
    print("num cpus:", args.num_processes)

    train_loader = torch.utils.data.DataLoader(
        data_train,
        batch_size=BATCH_SIZE,
        num_workers=args.num_processes,
        worker_init_fn=data_train.worker_init_fn,
        shuffle=False,
        pin_memory=True,
    )
    test_loader = torch.utils.data.DataLoader(
        data_test,
        batch_size=TEST_BATCH_SIZE,
        num_workers=len(render_gpus) if args.num_processes > 0 else 0,
        worker_init_fn=data_test.worker_init_fn,
        shuffle=False,
        pin_memory=True,
    )

    log_prefix = args.log_prefix
    time_str = misc_util.get_time_str()
    checkpoint_dir = os.path.join(log_prefix, args.checkpoint_dirname,
                                  time_str)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    start_iter = 0
    if args.load_model:
        start_iter = pt_util.restore_from_folder(
            model, os.path.join(log_prefix, args.checkpoint_dirname, "*"))

    train_logger = None
    test_logger = None
    if args.tensorboard:
        train_logger = tensorboard_logger.Logger(
            os.path.join(log_prefix, args.tensorboard_dirname,
                         time_str + "_train"))
        test_logger = tensorboard_logger.Logger(
            os.path.join(log_prefix, args.tensorboard_dirname,
                         time_str + "_test"))

    total_num_steps = start_iter

    if args.save_checkpoints and not args.no_weight_update:
        pt_util.save(model,
                     checkpoint_dir,
                     num_to_keep=5,
                     iteration=total_num_steps)

    evaluate_model(model, device, test_loader, total_num_steps, test_logger,
                   decoder_output_info)

    for epoch in range(0, EPOCHS + 1):
        total_num_steps = train_model(model, device, train_loader, optimizer,
                                      total_num_steps, train_logger,
                                      decoder_output_info, checkpoint_dir)
        evaluate_model(model, device, test_loader, total_num_steps,
                       test_logger, decoder_output_info)
Exemple #5
0
import numpy
import numpy as np
import torch
from dg_util.python_utils import misc_util
from torch import nn

numpy.set_printoptions(precision=4)
torch.set_printoptions(precision=4, sci_mode=False)


def batch_norm_layer(channels):
    return nn.BatchNorm2d(channels)


def nonlinearity():
    return nn.ReLU(inplace=True)


NONLINEARITY = nonlinearity
NORM_LAYER = batch_norm_layer
TIME_STR = misc_util.get_time_str()
BASE_LOG_DIR = "logs"

CHECK_FOR_NEW_DATA = False

IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32) * 255
IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32) * 255
COOKIE_PATH = os.path.join(os.path.dirname(__file__), "youtube_scrape",
                           "cookies.txt")