Exemple #1
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg

        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency_step,
                             agent=cfg.agent.name,
                             action_repeat=cfg.action_repeat)

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.env = make_env(cfg)

        cfg.agent.params.obs_shape = self.env.observation_space.shape
        print(self.env.action_space.shape)
        cfg.agent.params.action_shape = (self.env.action_space.n,)
        cfg.agent.params.action_range = [
            0,
            12
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = ReplayBuffer(self.env.observation_space.shape,
                                          self.env.action_space.shape,
                                          cfg.replay_buffer_capacity,
                                          self.cfg.image_pad, self.device)

        self.video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = 0
Exemple #2
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg
        self.observation_space_shape = (16, 16)
        self.device = device
        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency,
                             agent=cfg.agent.name)

        utils.set_seed_everywhere(cfg.seed)
        self.env = make_env(cfg.env)
        self.max_episode_steps = cfg.max_episode_steps

        cfg.agent.params.obs_dim = self.observation_space_shape
        # SET action_dim = env.action_space.n
        cfg.agent.params.action_dim = (self.env.action_space.n)
        cfg.agent.params.action_range = [
            float(0), float(self.env.action_space.n)
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = ReplayBuffer(self.observation_space_shape,
                                          (self.env.action_space.n),
                                          int(cfg.replay_buffer_capacity),
                                          self.device)
        '''
        self.video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        '''
        self.step = 0
Exemple #3
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg

        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency,
                             agent=cfg.agent.name)

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.env = DummyWrapper({'steps': 100})

        cfg.agent.params.obs_dim = 1152
        cfg.agent.params.action_dim = self.env.action_space.shape[0]
        cfg.agent.params.action_range = [
            float(self.env.action_space.low.min()),
            float(self.env.action_space.high.max())
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = ReplayMemoryFast(memory_size=1024)

        self.step = 0
Exemple #4
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg

        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency,
                             agent=cfg.agent.name)

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.env = utils.make_env(cfg)

        cfg.agent.params.obs_dim = self.env.observation_space.shape[0]
        cfg.agent.params.action_dim = self.env.action_space.shape[0]
        cfg.agent.params.action_range = [
            float(self.env.action_space.low.min()),
            float(self.env.action_space.high.max())
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = ReplayBuffer(self.env.observation_space.shape,
                                          self.env.action_space.shape,
                                          int(cfg.replay_buffer_capacity),
                                          self.device)

        self.video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = 0
Exemple #5
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg

        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency,
                             agent=cfg.agent.name)

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.train_envs, self.test_envs = utils.make_env(cfg)

        cfg.agent.params.obs_dim = self.train_envs[0].observation_space.shape[0] + cfg.noise_dims
        cfg.agent.params.action_dim = self.train_envs[0].action_space.shape[0]
        if cfg.agent.name != 'sac':
            cfg.agent.params.num_envs = cfg.num_train_envs
        cfg.agent.params.action_range = [
            float(self.train_envs[0].action_space.low.min()),
            float(self.train_envs[0].action_space.high.max())
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)
        self.agent.seq_len = cfg.seq_len

        self.replay_buffer = MultiEnvReplayBuffer((cfg.agent.params.obs_dim,),  # hard coded
                                          self.train_envs[0].action_space.shape,
                                          int(cfg.replay_buffer_capacity),
                                          self.device, num_envs=cfg.num_train_envs, seq_len=cfg.seq_len)

        self.video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = [0] * cfg.num_train_envs
def load_static(args):
    device, n_gpu = setup_device()
    set_seed_everywhere(args.seed, n_gpu)

    schemas_raw, schemas_dict = spider_utils.load_schema(args.data_dir)

    grammar = semQL.Grammar()
    model = IRNet(args, device, grammar)
    model.to(device)
    # load the pre-trained parameters
    model.load_state_dict(
        torch.load(args.model_to_load, map_location=torch.device('cpu')))
    model.eval()
    print("Load pre-trained model from '{}'".format(args.model_to_load))

    nlp = English()
    tokenizer = nlp.Defaults.create_tokenizer(nlp)

    with open(os.path.join(args.conceptNet, 'english_RelatedTo.pkl'),
              'rb') as f:
        related_to_concept = pickle.load(f)

    with open(os.path.join(args.conceptNet, 'english_IsA.pkl'), 'rb') as f:
        is_a_concept = pickle.load(f)

    return args, grammar, model, nlp, tokenizer, related_to_concept, is_a_concept, schemas_raw, schemas_dict
Exemple #7
0
def run(args):
    import hyperparameters

    cfg = hyperparameters.get_config(args)
    cfg.layers_per_scale = 1
    # cfg.layers_per_scale=1
    # cfg.num_keypoints=32
    # cfg.batch_size = 25

    utils.set_seed_everywhere(args.seed)

    imgs_to_keyp_model = ImagesToKeypEncoder(cfg, (8, 3, 64, 64), debug=True)
    keyp_to_imgs_model = KeypToImagesDecoder(cfg, (8, 3, 64, 64), debug=True)

    keyp_pred_net = KeypPredictor(cfg)
    keyp_inverse_net = KeypInverseModel(cfg)

    print(imgs_to_keyp_model)
    print(keyp_to_imgs_model)
    print(keyp_pred_net)

    # summary(model, input_size=(2, 3, 64, 64))
    img = 0.5 * torch.ones((1, 4, 3, 64, 64))
    action = 0.4 * torch.ones((1, 4, 4))
    k, h = imgs_to_keyp_model(img)

    r = keyp_to_imgs_model(k, img[:, 0], k[:, 0])

    print(k.shape, h.shape, r.shape)

    pred_k = keyp_pred_net(k[Ellipsis, :2], action)

    pred_action = keyp_inverse_net(k[Ellipsis, :2])

    print("Pred_k: ", pred_k.shape, "Pred_action:", pred_action.shape)

    b = sum([
        np.prod(list(params.size()))
        for params in imgs_to_keyp_model.parameters()
    ])
    print("Encodeer params: ", b)
    c = sum([
        np.prod(list(params.size()))
        for params in keyp_to_imgs_model.parameters()
    ])
    print("Decoder params: ", c)
    d = sum([
        np.prod(list(params.size())) for params in keyp_pred_net.parameters()
    ])
    print("Keyp Predictor params: ", d)

    print("Model parameters: ", b + c + d)

    for n in range(k.shape[1]):
        print(pred_k[0, 2, n, :2], k[0, 2, n, :2])

    print(
        F.mse_loss(pred_k, k[:, 1:, :, :2], reduction='sum') /
        (pred_k.shape[0] * pred_k.shape[1]))
Exemple #8
0
 def __init__(self, cfg):
     self.cfg = cfg
     seeds = torch.randint(0, 2**32, torch.Size([4]))
     set_seed_everywhere(seeds[0])
     self.save_dir = os.path.join(self.cfg.gen.base_dir, 'results/unsup_cl',
                                  self.cfg.gen.target_dir,
                                  str(seeds[0].item()))
     self.log_dir = os.path.join(self.save_dir, 'logs')
def init_env(args):
    utils.set_seed_everywhere(args.seed)
    return make_pad_env(domain_name=args.domain_name,
                        task_name=args.task_name,
                        seed=args.seed,
                        episode_length=args.episode_length,
                        action_repeat=args.action_repeat,
                        mode=args.mode)
Exemple #10
0
def main(args):
    utils.set_seed_everywhere(args.seed)

    cfg = hyperparameters.get_config(args)
    cfg.seed = args.seed

    args.cuda = not args.no_cuda and torch.cuda.is_available()

    time_str = datetime.now(
        timezone('US/Eastern')).strftime("%Y-%m-%d-%H-%M-%S")
    exp_dir = os.path.join(cfg.base_dir, time_str)
    checkpoint_dir = os.path.join(exp_dir, cfg.checkpoint_dir)
    log_dir = os.path.join(exp_dir, cfg.log_dir)

    save_config(cfg, exp_dir, "config.json")

    print("Log path: ", log_dir, "Checkpoint Dir: ", checkpoint_dir)

    num_timsteps = cfg.observed_steps + cfg.predicted_steps
    data_shape = {'image': (None, num_timsteps, 3, 64, 64)}
    cfg.data_shapes = data_shape

    model = KeypointModel(cfg)

    cp_callback = ModelCheckpoint(filepath=os.path.join(
        checkpoint_dir, "model_"),
                                  period=25,
                                  save_top_k=-1)

    logger = TensorBoardLogger(log_dir, name="", version=None)

    gpus = 1 if args.cuda else None

    if args.pretrained_path:
        checkpoint_path = get_latest_checkpoint(args.pretrained_path)
        import json
        model = KeypointModel.load_from_checkpoint(checkpoint_path)
        print(json.dumps(model.cfg, indent=4))

    print("On GPU Device: ", gpus)
    trainer = Trainer(
        max_epochs=args.num_epochs,
        logger=logger,
        checkpoint_callback=cp_callback,
        gpus=gpus,
        #distributed_backend='dp',
        progress_bar_refresh_rate=1,
        #gradient_clip_val=cfg.clipnorm,
        fast_dev_run=False,
        #train_percent_check=0.1,val_percent_check=0.0,
        #val_percent_check=0.3,
        track_grad_norm=2,
        show_progress_bar=True)
    trainer.fit(model)
    save_path = os.path.join(checkpoint_dir,
                             "model_final_" + str(args.num_epochs) + ".ckpt")
    print("Saving model finally:")
    trainer.save_checkpoint(save_path)
Exemple #11
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')
        self.model_dir = utils.make_dir(self.work_dir, 'model')
        self.buffer_dir = utils.make_dir(self.work_dir, 'buffer')

        self.cfg = cfg

        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency_step,
                             action_repeat=cfg.action_repeat,
                             agent=cfg.agent.name)

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.env = dmc.make(cfg.env, cfg.frame_stack, cfg.action_repeat,
                            cfg.seed)
        self.eval_env = dmc.make(cfg.env, cfg.frame_stack, cfg.action_repeat,
                                 cfg.seed + 1)

        obs_spec = self.env.observation_spec()['pixels']
        action_spec = self.env.action_spec()

        cfg.agent.params.obs_shape = obs_spec.shape
        cfg.agent.params.action_shape = action_spec.shape
        cfg.agent.params.action_range = [
            float(action_spec.minimum.min()),
            float(action_spec.maximum.max())
        ]
        # exploration agent uses intrinsic reward
        self.expl_agent = hydra.utils.instantiate(cfg.agent,
                                                  task_agnostic=True)
        # task agent uses extr extrinsic reward
        self.task_agent = hydra.utils.instantiate(cfg.agent,
                                                  task_agnostic=False)
        self.task_agent.assign_modules_from(self.expl_agent)

        if cfg.load_pretrained:
            pretrained_path = utils.find_pretrained_agent(
                cfg.pretrained_dir, cfg.env, cfg.seed, cfg.pretrained_step)
            print(f'snapshot is taken from: {pretrained_path}')
            pretrained_agent = utils.load(pretrained_path)
            self.task_agent.assign_modules_from(pretrained_agent)

        # buffer for the task-agnostic phase
        self.expl_buffer = ReplayBuffer(obs_spec.shape, action_spec.shape,
                                        cfg.replay_buffer_capacity,
                                        self.device)
        # buffer for task-specific phase
        self.task_buffer = ReplayBuffer(obs_spec.shape, action_spec.shape,
                                        cfg.replay_buffer_capacity,
                                        self.device)

        self.eval_video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = 0
Exemple #12
0
 def __init__(self, cfg):
     self.cfg = cfg
     seeds = torch.randint(0, 2 ** 32, torch.Size([4]))
     set_seed_everywhere(seeds[0])
     self.save_dir = os.path.join(self.cfg.gen.base_dir, 'results/unsup_cl_rag', self.cfg.gen.target_dir, str(seeds[0].item()))
     self.log_dir = os.path.join(self.save_dir, 'logs')
     print("embeddings are on sphere")
     print(f"save dir: {self.save_dir}")
     print(f"log dir: {self.log_dir}")
def init_env(args):
    utils.set_seed_everywhere(args.seed)
    return make_locomotion_env(env_name=args.env_name,
                               seed=args.seed,
                               episode_length=args.episode_length,
                               from_pixels=args.pixel_obs,
                               action_repeat=args.action_repeat,
                               obs_height=args.obs_height,
                               obs_width=args.obs_width,
                               camera_id=args.env_camera_id,
                               mode=args.mode)
Exemple #14
0
def run_final_test(args):
    utils.set_seed_everywhere(args.seed)
    cfg = hyperparameters.get_config(args)

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if args.cuda else "cpu")

    l_dir = cfg.train_dir if args.is_train else args.test_dir
    print("Data loader: ", l_dir)
    loader, data_shapes = datasets.get_sequence_dataset(
        data_dir=os.path.join(cfg.data_dir, l_dir),
        batch_size=cfg.batch_size,
        num_timesteps=2 * args.timesteps,
        shuffle=True)

    cfg.log_training = args.log_training
    cfg.log_training_path = os.path.join(args.log_training_path)

    cfg.data_shapes = data_shapes
    if args.no_first:
        if args.keyp_pred:
            print("Loding keyp pred")
            model = train_keyp_pred.KeypointModel(cfg).to(device)
        elif args.keyp_inverse:
            print("Loding Inverse Model")
            model = train_keyp_inverse.KeypointModel(cfg).to(device)
        else:
            pass
    else:
        model = train.KeypointModel(cfg).to(device)

    if args.pretrained_path:
        if args.ckpt:
            checkpoint_path = os.path.join(
                args.pretrained_path, "_ckpt_epoch_" + args.ckpt + ".ckpt")
        else:
            print("Loading latest")
            checkpoint_path = get_latest_checkpoint(args.pretrained_path)
        checkpoint = torch.load(checkpoint_path,
                                map_location=lambda storage, loc: storage)
        print("Loading model from: ", checkpoint_path)
        model.load_state_dict(checkpoint['state_dict'])
        model.eval()
        print("Load complete")

    trainer = Trainer(gpus=1,
                      progress_bar_refresh_rate=1,
                      show_progress_bar=True)

    trainer.test(model)
Exemple #15
0
    def __init__(self, cfg):
        self.work_dir = '/media/trevor/mariadb/thesis/'
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg

        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency_step,
                             agent=cfg.agent.name,
                             action_repeat=cfg.action_repeat)

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.env = make_env(cfg)

        cfg.agent.params.obs_shape = self.env.observation_space.shape
        cfg.agent.params.action_shape = self.env.action_space.shape
        cfg.agent.params.action_range = [
            float(self.env.action_space.low.min()),
            float(self.env.action_space.high.max())
        ]

        self.agent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = ReplayBuffer(self.env.observation_space.shape,
                                          self.env.action_space.shape,
                                          cfg.replay_buffer_capacity,
                                          self.cfg.image_pad, self.device,
                                          self.cfg.env)

        # obs_shape = (3 * 3, 84, 84)
        # pre_aug_obs_shape = (3 * 3, 100, 100)
        #
        # self.replay_buffer = ReplayBuffer(
        #     obs_shape=pre_aug_obs_shape,
        #     action_shape=self.env.action_space.shape,
        #     capacity=cfg.replay_buffer_capacity,
        #     batch_size=cfg.batch_size,
        #     device=self.device,
        #     image_size=84,
        #     pre_image_size=100,
        # )

        self.video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = 0
Exemple #16
0
    def __init__(self, cfg):

        self.work_dir = os.getcwd()
        """Hack to adjust action_repeat"""
        adjust_action_repeat_hack(cfg)

        print(f"CFG:\n{'-'*100}\n{cfg}\n{'-'*100}")

        self.cfg = cfg
        experiment_name = f"{cfg.full_title}_{cfg.run_id}"

        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             save_wb=cfg.log_save_wandb,
                             log_frequency=cfg.log_frequency_step,
                             agent=cfg.agent.name,
                             action_repeat=cfg.action_repeat,
                             cfg=dict(flatten_cfg(cfg)),
                             plot_project="drqtest",
                             experiment=experiment_name)
        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.env = make_env(cfg)

        cfg.agent.params.obs_shape = self.env.observation_space.shape
        cfg.agent.params.action_shape = self.env.action_space.shape
        cfg.agent.params.action_range = [
            float(self.env.action_space.low.min()),
            float(self.env.action_space.high.max())
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)

        print(f"ACTOR:\n{'-'*100}\n{self.agent.actor}\n{'-'*100}")
        print(f"CRITIC:\n{'-'*100}\n{self.agent.critic}\n{'-'*100}")

        self.replay_buffer = ReplayBuffer(
            self.env.observation_space.shape,
            self.env.action_space.shape,
            cfg.replay_buffer_capacity,
            self.cfg.image_pad,
            self.device,
            use_aug=cfg.replay_buffer_augmentation)

        self.video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = 0
Exemple #17
0
    def __init__(self, cfg, env_maker, phi, work_dir):
        self.env_maker = env_maker
        self.env = env_maker()
        self.phi = phi

        # self.env = dmc2gym.make(
        #     domain_name='CarIntersect',
        #     task_name=task_name,
        #     seed=cfg.seed,
        #     visualize_reward=False,
        #     from_pixels=True,
        #     frame_skip=cfg.action_repeat,
        # )

        self.work_dir = work_dir
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)

        cfg.agent.params.obs_shape = self.phi(self.env.reset()).shape
        print(f'DRQ: get observation shape : {cfg.agent.params.obs_shape}')

        cfg.agent.params.action_shape = self.env.action_space.shape
        print(f'DRQ: get action shape : {cfg.agent.params.obs_shape}')

        cfg.agent.params.action_range = [-1, +1]
        self.agent: DRQAgent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = ReplayBuffer(
            obs_shape=cfg.agent.params.obs_shape,
            action_shape=self.env.action_space.shape,
            capacity=cfg.replay_buffer_capacity,
            image_pad=self.cfg.image_pad,
            device=self.device,
            phi=self.phi,
        )

        self.step = 0

        self.total_env_step = 0
        self.total_updates = 0
        self.total_episodes = 0
Exemple #18
0
    def __init__(
            self,
            log_save_tb=True,
            log_frequency_step=10000,
            agent_name='drq',
            # device='cuda',
            device='cpu',
            env='cartpole_swingup',
            seed=1,
            image_size=84,
            action_repeat=8,
            frame_stack=3,
            replay_buffer_capacity=100000,
            image_pad=4,
            save_video=True):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.logger = Logger(self.work_dir,
                             save_tb=log_save_tb,
                             log_frequency=log_frequency_step,
                             agent=agent_name,
                             action_repeat=action_repeat)

        utils.set_seed_everywhere(seed)
        self.device = torch.device(device)
        self.env = make_env(env, seed, image_size, action_repeat, frame_stack)

        self.agent = DRQAgent(
            obs_shape=self.env.observation_space.shape,
            action_shape=self.env.action_space.shape,
            action_range=(float(self.env.action_space.low.min()),
                          float(self.env.action_space.high.max())),
            device=self.device)

        self.replay_buffer = ReplayBuffer(self.env.observation_space.shape,
                                          self.env.action_space.shape,
                                          replay_buffer_capacity, image_pad,
                                          self.device)

        self.video_recorder = VideoRecorder(
            self.work_dir if save_video else None)
        self.step = 0
Exemple #19
0
def set_envs(args):
    if not torch.cuda.is_available():
        args.cuda = False
        args.fp16 = False

    if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
        args.device_ids = eval(f"[{os.environ['CUDA_VISIBLE_DEVICES']}]")

    torch.cuda.set_device(args.local_rank)
    torch.distributed.init_process_group(backend='nccl',
                                        init_method='env://')
    torch.backends.cudnn.benchmark = True

    if not args.device:
        args.device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
    if args.expand_filepaths_to_save_dir:
        args.model_state_file = os.path.join(args.save_dir,args.model_state_file)
    set_seed_everywhere(args.seed, args.cuda)
    handle_dirs(args.save_dir)
Exemple #20
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f"workspace: {self.work_dir}")

        self.cfg = cfg

        self.logger = Logger(
            self.work_dir,
            save_tb=cfg.log_save_tb,
            log_frequency=cfg.log_frequency_step,
            agent=cfg.agent.name,
            action_repeat=cfg.action_repeat,
        )

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.env = make_env(cfg, eval=False)

        cfg.agent.params.obs_shape = self.env.observation_space.shape
        cfg.agent.params.action_shape = self.env.action_space.shape
        cfg.agent.params.action_range = [
            float(self.env.action_space.low.min()),
            float(self.env.action_space.high.max()),
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = ReplayBuffer(
            self.env.observation_space.shape,
            self.env.action_space.shape,
            self.env.state_space.shape,
            cfg.replay_buffer_capacity,
            self.cfg.image_size,
            self.agent.random_encoder,
            self.cfg.aug_type,
            self.cfg.use_drq,
            self.device,
        )

        self.video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = 0
Exemple #21
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg

        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency_step,
                             agent=cfg.agent.name,
                             action_repeat=cfg.action_repeat)

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.env = make_env(cfg, self.logger)
        self.eval_env = gym.make(cfg.env)
        if "img_only" not in cfg.env:
            self.eval_env = DictToBoxWrapper(DictTransposeImage(self.eval_env))
        else:
            self.eval_env = TransposeImage(self.eval_env)
        # env = utils.FrameStack(env, k=cfg.frame_stack)

        self.eval_env.seed(cfg.seed + 111)

        cfg.agent.params.obs_shape = self.env.observation_space.shape
        cfg.agent.params.action_shape = self.env.action_space.shape
        cfg.agent.params.action_range = [
            float(self.env.action_space.low.min()),
            float(self.env.action_space.high.max())
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = ReplayBuffer(self.env.observation_space.shape,
                                          self.env.action_space.shape,
                                          cfg.replay_buffer_capacity,
                                          self.cfg.image_pad, self.device)

        self.video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = 0
Exemple #22
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg

        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency_step,
                             agent=cfg.agent.name,
                             action_repeat=cfg.action_repeat)

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)

        gibson_config_filename = os.path.join(
            os.path.dirname(gibson2.__file__),
            '../examples/configs/hand_drawer.yaml')
        self.env = HandDrawerEnv(config_file=gibson_config_filename,
                                 mode='headless')
        self.env = utils.FrameStack(self.env, k=cfg.frame_stack)

        cfg.agent.params.obs_shape = self.env.observation_space.shape
        cfg.agent.params.action_shape = self.env.action_space.shape
        cfg.agent.params.action_range = [
            float(self.env.action_space.low.min()),
            float(self.env.action_space.high.max())
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = ReplayBuffer(self.env.observation_space.shape,
                                          self.env.action_space.shape,
                                          cfg.replay_buffer_capacity,
                                          self.cfg.image_pad, self.device)

        self.video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = 0
Exemple #23
0
def load_model(args):
    utils.set_seed_everywhere(args.seed)
    cfg = hyperparameters.get_config(args)
    cfg.data_shapes = {'image': (None, 16, 3, 64, 64)}

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if args.cuda else "cpu")

    if not args.inv_fwd:
        model = train_keyp_pred.KeypointModel(cfg).to(device)
    else:
        model = train_keyp_inverse_forward.KeypointModel(cfg).to(device)

    checkpoint_path = os.path.join(args.pretrained_path,
                                   "_ckpt_epoch_" + args.ckpt + ".ckpt")
    checkpoint = torch.load(checkpoint_path,
                            map_location=lambda storage, loc: storage)
    print("Loading model from: ", checkpoint_path)
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()
    print("Load complete")

    return model
Exemple #24
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg

        self.logger = Logger(self.work_dir + "_" + self.cfg.env +
                             "_eval2k_effective_{}_seed_{}".format(
                                 self.cfg.effective_aug, self.cfg.seed),
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency_step,
                             agent=cfg.agent.name,
                             action_repeat=cfg.action_repeat)

        self.effective_aug = self.cfg.effective_aug
        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.env = make_env(cfg)

        cfg.agent.params.obs_shape = self.env.observation_space.shape
        cfg.agent.params.action_shape = self.env.action_space.shape
        cfg.agent.params.action_range = [
            float(self.env.action_space.low.min()),
            float(self.env.action_space.high.max())
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = ReplayBuffer(self.env.observation_space.shape,
                                          self.env.action_space.shape,
                                          cfg.replay_buffer_capacity,
                                          self.cfg.image_pad, self.device,
                                          self.effective_aug)

        self.video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = 0
Exemple #25
0
    def __init__(self, cfg):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.cfg = cfg

        self.logger = Logger(self.work_dir,
                             save_tb=cfg.log_save_tb,
                             log_frequency=cfg.log_frequency_step,
                             agent=cfg.agent.name)

        utils.set_seed_everywhere(cfg.seed)
        self.device = torch.device(cfg.device)
        self.env = dmc.make_meta(cfg.env, cfg.episode_length, cfg.seed)
        self.eval_env = dmc.make_meta(cfg.env, cfg.episode_length,
                                      cfg.seed + 1)

        obs_spec = self.env.observation_spec()['features']
        action_spec = self.env.action_spec()

        cfg.agent.params.obs_shape = obs_spec.shape
        cfg.agent.params.action_shape = action_spec.shape
        cfg.agent.params.action_range = [
            float(action_spec.minimum.min()),
            float(action_spec.maximum.max())
        ]
        self.agent = hydra.utils.instantiate(cfg.agent)

        self.replay_buffer = MetaReplayBuffer(cfg.train_tasks, obs_spec.shape,
                                              action_spec.shape,
                                              cfg.replay_buffer_capacity,
                                              self.device)

        self.eval_video_recorder = VideoRecorder(
            self.work_dir if cfg.save_video else None)
        self.step = 0
Exemple #26
0
def _remove_spaces(sentence):
    s = sentence.strip().split()
    s = " ".join(s)
    return s


def _find_nums(question):
    nums = re.findall('\d*\.?\d+', question)
    return nums


if __name__ == '__main__':
    args = read_arguments_manual_inference()

    device, n_gpu = setup_device()
    set_seed_everywhere(args.seed, n_gpu)

    schemas_raw, schemas_dict = spider_utils.load_schema(args.data_dir)

    grammar = semQL.Grammar()
    model = IRNet(args, device, grammar)
    model.to(device)

    # load the pre-trained parameters
    model.load_state_dict(torch.load(args.model_to_load))
    # to use cpu instead of gpu , uncomment this code
    # model.load_state_dict(torch.load(args.model_to_load,map_location=torch.device('cpu')))

    model.eval()
    print("Load pre-trained model from '{}'".format(args.model_to_load))
Exemple #27
0
        args.model_state_file = os.path.join(args.save_dir,
                                             args.model_state_file)

        print("Expanded filepaths: ")
        print("\t{}".format(args.vectorizer_file))
        print("\t{}".format(args.model_state_file))

    # Check CUDA
    if not torch.cuda.is_available():
        args.cuda = False

    print("Using CUDA: {}".format(args.cuda))

    args.device = torch.device("cuda" if args.cuda else "cpu")

    set_seed_everywhere(args.seed, args.cuda)

    dataset = args.dataset
    vectorizer = args.vectorizer
    classifier = args.classifier

    classifier = classifier.to(args.device)

    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(classifier.parameters(), lr=args.learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                     mode='min',
                                                     factor=0.5,
                                                     patience=1)

    train_state = make_train_state(args)
Exemple #28
0
def main():
    args = parse_args()
    if args.seed == -1:
        args.__dict__["seed"] = np.random.randint(1, 1000000)
    utils.set_seed_everywhere(args.seed)

    pre_transform_image_size = args.pre_transform_image_size if 'crop' in args.data_augs else args.image_size
    pre_image_size = args.pre_transform_image_size  # record the pre transform image size for translation

    env = dmc2gym.make(domain_name=args.domain_name,
                       task_name=args.task_name,
                       seed=args.seed,
                       visualize_reward=False,
                       from_pixels=(args.encoder_type == 'pixel'),
                       height=pre_transform_image_size,
                       width=pre_transform_image_size,
                       frame_skip=args.action_repeat)

    env.seed(args.seed)

    # stack several consecutive frames together
    if args.encoder_type == 'pixel':
        env = utils.FrameStack(env, k=args.frame_stack)

    # make directory
    ts = time.gmtime()
    ts = time.strftime("%m-%d", ts)
    env_name = args.domain_name + '-' + args.task_name
    exp_name = env_name + '-' + ts + '-im' + str(args.image_size) +'-b'  \
    + str(args.batch_size) + '-s' + str(args.seed)  + '-' + args.encoder_type
    args.work_dir = args.work_dir + '/' + exp_name

    utils.make_dir(args.work_dir)
    video_dir = utils.make_dir(os.path.join(args.work_dir, 'video'))
    model_dir = utils.make_dir(os.path.join(args.work_dir, 'model'))
    buffer_dir = utils.make_dir(os.path.join(args.work_dir, 'buffer'))

    video = VideoRecorder(video_dir if args.save_video else None)

    with open(os.path.join(args.work_dir, 'args.json'), 'w') as f:
        json.dump(vars(args), f, sort_keys=True, indent=4)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    action_shape = env.action_space.shape

    if args.encoder_type == 'pixel':
        obs_shape = (3 * args.frame_stack, args.image_size, args.image_size)
        pre_aug_obs_shape = (3 * args.frame_stack, pre_transform_image_size,
                             pre_transform_image_size)
    else:
        obs_shape = env.observation_space.shape
        pre_aug_obs_shape = obs_shape

    replay_buffer = utils.ReplayBuffer(
        obs_shape=pre_aug_obs_shape,
        action_shape=action_shape,
        capacity=args.replay_buffer_capacity,
        batch_size=args.batch_size,
        device=device,
        image_size=args.image_size,
        pre_image_size=pre_image_size,
    )

    agent = make_agent(obs_shape=obs_shape,
                       action_shape=action_shape,
                       args=args,
                       device=device)

    L = Logger(args.work_dir, use_tb=args.save_tb)

    episode, episode_reward, done = 0, 0, True
    start_time = time.time()

    for step in range(args.num_train_steps):
        # evaluate agent periodically

        if step % args.eval_freq == 0:
            L.log('eval/episode', episode, step)
            evaluate(env, agent, video, args.num_eval_episodes, L, step, args)
            if args.save_model:
                agent.save_curl(model_dir, step)
            if args.save_buffer:
                replay_buffer.save(buffer_dir)

        if done:
            if step > 0:
                if step % args.log_interval == 0:
                    L.log('train/duration', time.time() - start_time, step)
                    L.dump(step)
                start_time = time.time()
            if step % args.log_interval == 0:
                L.log('train/episode_reward', episode_reward, step)

            obs = env.reset()
            done = False
            episode_reward = 0
            episode_step = 0
            episode += 1
            if step % args.log_interval == 0:
                L.log('train/episode', episode, step)

        # sample action for data collection
        if step < args.init_steps:
            action = env.action_space.sample()
        else:
            with utils.eval_mode(agent):
                action = agent.sample_action(obs / 255.)

        # run training update
        if step >= args.init_steps:
            num_updates = 1
            for _ in range(num_updates):
                agent.update(replay_buffer, L, step)

        next_obs, reward, done, _ = env.step(action)

        # allow infinit bootstrap
        done_bool = 0 if episode_step + 1 == env._max_episode_steps else float(
            done)
        episode_reward += reward
        replay_buffer.add(obs, action, reward, next_obs, done_bool)

        obs = next_obs
        episode_step += 1
Exemple #29
0
class Experiment(object):
    def __init__(
            self,
            # environment
            env_id,

            # visual?
            from_images=False,

            # reproducibility
            seed=1,

            # env
            fix_goals=False,

            # compute
            device='cuda' if torch.cuda.is_available() else 'cpu',

            # replay buffer
            num_resampled_goals=1,
            capacity=1_000_000,

            # agent
            feature_dim=128,
            hidden_sizes=[512, 512, 512],
            log_std_bounds=[-10, 2],
            discount=0.95,
            init_temperature=0.1,
            lr=0.0006,
            actor_update_frequency=2,
            critic_tau=0.005,
            critic_target_update_frequency=2,
            batch_size=128,

            # evaluation
            num_eval_episodes=5,

            # training
            gradient_steps=1,  # better for wall clock time. increase for better performance.
            num_timesteps=20_000,  # maximum time steps
            num_seed_steps=1_000,  # random actions to improve exploration
            update_after=1_000,  # when to start updating (off-policy still learns from seed steps)
            eval_every=20,  # episodic frequency for evaluation
            save_every=5_000,  # how often to save the experiment progress in time steps
            **kwargs,  # lazily absorb extra args
    ):
        self.observation_key = 'image_observation' if from_images else 'observation'
        self.achieved_goal_key = 'image_achieved_goal' if from_images else 'achieved_goal'
        self.desired_goal_key = 'image_desired_goal' if from_images else 'desired_goal'

        # Seed
        utils.set_seed_everywhere(seed)

        #         # Create env
        self.env_id = env_id
        self.seed = seed
        self.from_images = from_images
        self.fix_goals = fix_goals

        self.env = gym.make(self.env_id)
        if 'Kinova' in self.env_id:
            self.env = wrappers.KinovaWrapper(self.env, self.seed,
                                              self.from_images, self.fix_goals)
        else:
            self.env = wrappers.MultiWrapper(self.env, self.seed,
                                             self.from_images, self.fix_goals)

        # Create agent
        self.agent = Agent(from_images,
                           self.env.observation_space,
                           self.env.action_space,
                           device=device,
                           feature_dim=feature_dim,
                           hidden_sizes=hidden_sizes,
                           log_std_bounds=log_std_bounds,
                           discount=discount,
                           init_temperature=init_temperature,
                           lr=lr,
                           critic_tau=critic_tau,
                           batch_size=batch_size)

        # update env to use agent encoder for images if necessary
        if self.from_images:
            self.env.set_agent(
                self.agent)  # set the conv encoder for latent distance rewards

        # Create replay buffer
        self.replay_buffer = HindsightReplayBuffer(
            from_images=from_images,
            env=self.env,
            num_resampled_goals=num_resampled_goals,
            observation_space=self.env.observation_space,
            action_space=self.env.action_space,
            capacity=capacity,
            device=device,
        )

        self.step = 0
        self.num_eval_episodes = num_eval_episodes

        self.gradient_steps = gradient_steps
        self.num_timesteps = num_timesteps
        self.num_seed_steps = num_seed_steps
        self.update_after = update_after
        self.eval_every = eval_every
        self.save_every = save_every
Exemple #30
0
def main(args):
    # Set seed
    utils.set_seed_everywhere(args.seed)

    # Initialize environments
    gym.logger.set_level(40)
    env = make_env(domain_name=args.domain_name,
                   task_name=args.task_name,
                   seed=args.seed + 42,
                   episode_length=args.episode_length,
                   action_repeat=args.action_repeat,
                   mode=args.eval_mode)

    # Set working directory
    work_dir = os.path.join(args.log_dir,
                            args.domain_name + '_' + args.task_name,
                            args.algorithm, str(args.seed))
    print('Working directory:', work_dir)
    assert os.path.exists(
        work_dir), 'specified working directory does not exist'
    model_dir = utils.make_dir(os.path.join(work_dir, 'model'))
    video_dir = utils.make_dir(os.path.join(work_dir, 'video'))
    video = VideoRecorder(video_dir if args.save_video else None,
                          height=448,
                          width=448)

    # Check if evaluation has already been run
    results_fp = os.path.join(work_dir, args.eval_mode + '.pt')
    assert not os.path.exists(
        results_fp), f'{args.eval_mode} results already exist for {work_dir}'

    # Prepare agent
    assert torch.cuda.is_available(), 'must have cuda enabled'
    cropped_obs_shape = (3 * args.frame_stack, 84, 84)
    agent = make_agent(obs_shape=cropped_obs_shape,
                       action_shape=env.action_space.shape,
                       args=args)
    agent = torch.load(os.path.join(model_dir, str(args.train_steps) + '.pt'))
    agent.train(False)

    print(
        f'\nEvaluating {work_dir} for {args.eval_episodes} episodes (mode: {args.eval_mode})'
    )
    reward = evaluate(env, agent, video, args.eval_episodes, args.eval_mode)
    print('Reward:', int(reward))

    adapt_reward = None
    if args.algorithm == 'pad':
        env = make_env(domain_name=args.domain_name,
                       task_name=args.task_name,
                       seed=args.seed + 42,
                       episode_length=args.episode_length,
                       action_repeat=args.action_repeat,
                       mode=args.eval_mode)
        adapt_reward = evaluate(env,
                                agent,
                                video,
                                args.eval_episodes,
                                args.eval_mode,
                                adapt=True)
        print('Adapt reward:', int(adapt_reward))

    # Save results
    torch.save({
        'args': args,
        'reward': reward,
        'adapt_reward': adapt_reward
    }, results_fp)
    print('Saved results to', results_fp)