def __init__(self, cfg): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.cfg = cfg self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency_step, agent=cfg.agent.name, action_repeat=cfg.action_repeat) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.env = make_env(cfg) cfg.agent.params.obs_shape = self.env.observation_space.shape print(self.env.action_space.shape) cfg.agent.params.action_shape = (self.env.action_space.n,) cfg.agent.params.action_range = [ 0, 12 ] self.agent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = ReplayBuffer(self.env.observation_space.shape, self.env.action_space.shape, cfg.replay_buffer_capacity, self.cfg.image_pad, self.device) self.video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = 0
def __init__(self, cfg): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.cfg = cfg self.observation_space_shape = (16, 16) self.device = device self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency, agent=cfg.agent.name) utils.set_seed_everywhere(cfg.seed) self.env = make_env(cfg.env) self.max_episode_steps = cfg.max_episode_steps cfg.agent.params.obs_dim = self.observation_space_shape # SET action_dim = env.action_space.n cfg.agent.params.action_dim = (self.env.action_space.n) cfg.agent.params.action_range = [ float(0), float(self.env.action_space.n) ] self.agent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = ReplayBuffer(self.observation_space_shape, (self.env.action_space.n), int(cfg.replay_buffer_capacity), self.device) ''' self.video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) ''' self.step = 0
def __init__(self, cfg): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.cfg = cfg self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency, agent=cfg.agent.name) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.env = DummyWrapper({'steps': 100}) cfg.agent.params.obs_dim = 1152 cfg.agent.params.action_dim = self.env.action_space.shape[0] cfg.agent.params.action_range = [ float(self.env.action_space.low.min()), float(self.env.action_space.high.max()) ] self.agent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = ReplayMemoryFast(memory_size=1024) self.step = 0
def __init__(self, cfg): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.cfg = cfg self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency, agent=cfg.agent.name) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.env = utils.make_env(cfg) cfg.agent.params.obs_dim = self.env.observation_space.shape[0] cfg.agent.params.action_dim = self.env.action_space.shape[0] cfg.agent.params.action_range = [ float(self.env.action_space.low.min()), float(self.env.action_space.high.max()) ] self.agent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = ReplayBuffer(self.env.observation_space.shape, self.env.action_space.shape, int(cfg.replay_buffer_capacity), self.device) self.video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = 0
def __init__(self, cfg): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.cfg = cfg self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency, agent=cfg.agent.name) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.train_envs, self.test_envs = utils.make_env(cfg) cfg.agent.params.obs_dim = self.train_envs[0].observation_space.shape[0] + cfg.noise_dims cfg.agent.params.action_dim = self.train_envs[0].action_space.shape[0] if cfg.agent.name != 'sac': cfg.agent.params.num_envs = cfg.num_train_envs cfg.agent.params.action_range = [ float(self.train_envs[0].action_space.low.min()), float(self.train_envs[0].action_space.high.max()) ] self.agent = hydra.utils.instantiate(cfg.agent) self.agent.seq_len = cfg.seq_len self.replay_buffer = MultiEnvReplayBuffer((cfg.agent.params.obs_dim,), # hard coded self.train_envs[0].action_space.shape, int(cfg.replay_buffer_capacity), self.device, num_envs=cfg.num_train_envs, seq_len=cfg.seq_len) self.video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = [0] * cfg.num_train_envs
def load_static(args): device, n_gpu = setup_device() set_seed_everywhere(args.seed, n_gpu) schemas_raw, schemas_dict = spider_utils.load_schema(args.data_dir) grammar = semQL.Grammar() model = IRNet(args, device, grammar) model.to(device) # load the pre-trained parameters model.load_state_dict( torch.load(args.model_to_load, map_location=torch.device('cpu'))) model.eval() print("Load pre-trained model from '{}'".format(args.model_to_load)) nlp = English() tokenizer = nlp.Defaults.create_tokenizer(nlp) with open(os.path.join(args.conceptNet, 'english_RelatedTo.pkl'), 'rb') as f: related_to_concept = pickle.load(f) with open(os.path.join(args.conceptNet, 'english_IsA.pkl'), 'rb') as f: is_a_concept = pickle.load(f) return args, grammar, model, nlp, tokenizer, related_to_concept, is_a_concept, schemas_raw, schemas_dict
def run(args): import hyperparameters cfg = hyperparameters.get_config(args) cfg.layers_per_scale = 1 # cfg.layers_per_scale=1 # cfg.num_keypoints=32 # cfg.batch_size = 25 utils.set_seed_everywhere(args.seed) imgs_to_keyp_model = ImagesToKeypEncoder(cfg, (8, 3, 64, 64), debug=True) keyp_to_imgs_model = KeypToImagesDecoder(cfg, (8, 3, 64, 64), debug=True) keyp_pred_net = KeypPredictor(cfg) keyp_inverse_net = KeypInverseModel(cfg) print(imgs_to_keyp_model) print(keyp_to_imgs_model) print(keyp_pred_net) # summary(model, input_size=(2, 3, 64, 64)) img = 0.5 * torch.ones((1, 4, 3, 64, 64)) action = 0.4 * torch.ones((1, 4, 4)) k, h = imgs_to_keyp_model(img) r = keyp_to_imgs_model(k, img[:, 0], k[:, 0]) print(k.shape, h.shape, r.shape) pred_k = keyp_pred_net(k[Ellipsis, :2], action) pred_action = keyp_inverse_net(k[Ellipsis, :2]) print("Pred_k: ", pred_k.shape, "Pred_action:", pred_action.shape) b = sum([ np.prod(list(params.size())) for params in imgs_to_keyp_model.parameters() ]) print("Encodeer params: ", b) c = sum([ np.prod(list(params.size())) for params in keyp_to_imgs_model.parameters() ]) print("Decoder params: ", c) d = sum([ np.prod(list(params.size())) for params in keyp_pred_net.parameters() ]) print("Keyp Predictor params: ", d) print("Model parameters: ", b + c + d) for n in range(k.shape[1]): print(pred_k[0, 2, n, :2], k[0, 2, n, :2]) print( F.mse_loss(pred_k, k[:, 1:, :, :2], reduction='sum') / (pred_k.shape[0] * pred_k.shape[1]))
def __init__(self, cfg): self.cfg = cfg seeds = torch.randint(0, 2**32, torch.Size([4])) set_seed_everywhere(seeds[0]) self.save_dir = os.path.join(self.cfg.gen.base_dir, 'results/unsup_cl', self.cfg.gen.target_dir, str(seeds[0].item())) self.log_dir = os.path.join(self.save_dir, 'logs')
def init_env(args): utils.set_seed_everywhere(args.seed) return make_pad_env(domain_name=args.domain_name, task_name=args.task_name, seed=args.seed, episode_length=args.episode_length, action_repeat=args.action_repeat, mode=args.mode)
def main(args): utils.set_seed_everywhere(args.seed) cfg = hyperparameters.get_config(args) cfg.seed = args.seed args.cuda = not args.no_cuda and torch.cuda.is_available() time_str = datetime.now( timezone('US/Eastern')).strftime("%Y-%m-%d-%H-%M-%S") exp_dir = os.path.join(cfg.base_dir, time_str) checkpoint_dir = os.path.join(exp_dir, cfg.checkpoint_dir) log_dir = os.path.join(exp_dir, cfg.log_dir) save_config(cfg, exp_dir, "config.json") print("Log path: ", log_dir, "Checkpoint Dir: ", checkpoint_dir) num_timsteps = cfg.observed_steps + cfg.predicted_steps data_shape = {'image': (None, num_timsteps, 3, 64, 64)} cfg.data_shapes = data_shape model = KeypointModel(cfg) cp_callback = ModelCheckpoint(filepath=os.path.join( checkpoint_dir, "model_"), period=25, save_top_k=-1) logger = TensorBoardLogger(log_dir, name="", version=None) gpus = 1 if args.cuda else None if args.pretrained_path: checkpoint_path = get_latest_checkpoint(args.pretrained_path) import json model = KeypointModel.load_from_checkpoint(checkpoint_path) print(json.dumps(model.cfg, indent=4)) print("On GPU Device: ", gpus) trainer = Trainer( max_epochs=args.num_epochs, logger=logger, checkpoint_callback=cp_callback, gpus=gpus, #distributed_backend='dp', progress_bar_refresh_rate=1, #gradient_clip_val=cfg.clipnorm, fast_dev_run=False, #train_percent_check=0.1,val_percent_check=0.0, #val_percent_check=0.3, track_grad_norm=2, show_progress_bar=True) trainer.fit(model) save_path = os.path.join(checkpoint_dir, "model_final_" + str(args.num_epochs) + ".ckpt") print("Saving model finally:") trainer.save_checkpoint(save_path)
def __init__(self, cfg): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.model_dir = utils.make_dir(self.work_dir, 'model') self.buffer_dir = utils.make_dir(self.work_dir, 'buffer') self.cfg = cfg self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency_step, action_repeat=cfg.action_repeat, agent=cfg.agent.name) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.env = dmc.make(cfg.env, cfg.frame_stack, cfg.action_repeat, cfg.seed) self.eval_env = dmc.make(cfg.env, cfg.frame_stack, cfg.action_repeat, cfg.seed + 1) obs_spec = self.env.observation_spec()['pixels'] action_spec = self.env.action_spec() cfg.agent.params.obs_shape = obs_spec.shape cfg.agent.params.action_shape = action_spec.shape cfg.agent.params.action_range = [ float(action_spec.minimum.min()), float(action_spec.maximum.max()) ] # exploration agent uses intrinsic reward self.expl_agent = hydra.utils.instantiate(cfg.agent, task_agnostic=True) # task agent uses extr extrinsic reward self.task_agent = hydra.utils.instantiate(cfg.agent, task_agnostic=False) self.task_agent.assign_modules_from(self.expl_agent) if cfg.load_pretrained: pretrained_path = utils.find_pretrained_agent( cfg.pretrained_dir, cfg.env, cfg.seed, cfg.pretrained_step) print(f'snapshot is taken from: {pretrained_path}') pretrained_agent = utils.load(pretrained_path) self.task_agent.assign_modules_from(pretrained_agent) # buffer for the task-agnostic phase self.expl_buffer = ReplayBuffer(obs_spec.shape, action_spec.shape, cfg.replay_buffer_capacity, self.device) # buffer for task-specific phase self.task_buffer = ReplayBuffer(obs_spec.shape, action_spec.shape, cfg.replay_buffer_capacity, self.device) self.eval_video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = 0
def __init__(self, cfg): self.cfg = cfg seeds = torch.randint(0, 2 ** 32, torch.Size([4])) set_seed_everywhere(seeds[0]) self.save_dir = os.path.join(self.cfg.gen.base_dir, 'results/unsup_cl_rag', self.cfg.gen.target_dir, str(seeds[0].item())) self.log_dir = os.path.join(self.save_dir, 'logs') print("embeddings are on sphere") print(f"save dir: {self.save_dir}") print(f"log dir: {self.log_dir}")
def init_env(args): utils.set_seed_everywhere(args.seed) return make_locomotion_env(env_name=args.env_name, seed=args.seed, episode_length=args.episode_length, from_pixels=args.pixel_obs, action_repeat=args.action_repeat, obs_height=args.obs_height, obs_width=args.obs_width, camera_id=args.env_camera_id, mode=args.mode)
def run_final_test(args): utils.set_seed_everywhere(args.seed) cfg = hyperparameters.get_config(args) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if args.cuda else "cpu") l_dir = cfg.train_dir if args.is_train else args.test_dir print("Data loader: ", l_dir) loader, data_shapes = datasets.get_sequence_dataset( data_dir=os.path.join(cfg.data_dir, l_dir), batch_size=cfg.batch_size, num_timesteps=2 * args.timesteps, shuffle=True) cfg.log_training = args.log_training cfg.log_training_path = os.path.join(args.log_training_path) cfg.data_shapes = data_shapes if args.no_first: if args.keyp_pred: print("Loding keyp pred") model = train_keyp_pred.KeypointModel(cfg).to(device) elif args.keyp_inverse: print("Loding Inverse Model") model = train_keyp_inverse.KeypointModel(cfg).to(device) else: pass else: model = train.KeypointModel(cfg).to(device) if args.pretrained_path: if args.ckpt: checkpoint_path = os.path.join( args.pretrained_path, "_ckpt_epoch_" + args.ckpt + ".ckpt") else: print("Loading latest") checkpoint_path = get_latest_checkpoint(args.pretrained_path) checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) print("Loading model from: ", checkpoint_path) model.load_state_dict(checkpoint['state_dict']) model.eval() print("Load complete") trainer = Trainer(gpus=1, progress_bar_refresh_rate=1, show_progress_bar=True) trainer.test(model)
def __init__(self, cfg): self.work_dir = '/media/trevor/mariadb/thesis/' print(f'workspace: {self.work_dir}') self.cfg = cfg self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency_step, agent=cfg.agent.name, action_repeat=cfg.action_repeat) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.env = make_env(cfg) cfg.agent.params.obs_shape = self.env.observation_space.shape cfg.agent.params.action_shape = self.env.action_space.shape cfg.agent.params.action_range = [ float(self.env.action_space.low.min()), float(self.env.action_space.high.max()) ] self.agent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = ReplayBuffer(self.env.observation_space.shape, self.env.action_space.shape, cfg.replay_buffer_capacity, self.cfg.image_pad, self.device, self.cfg.env) # obs_shape = (3 * 3, 84, 84) # pre_aug_obs_shape = (3 * 3, 100, 100) # # self.replay_buffer = ReplayBuffer( # obs_shape=pre_aug_obs_shape, # action_shape=self.env.action_space.shape, # capacity=cfg.replay_buffer_capacity, # batch_size=cfg.batch_size, # device=self.device, # image_size=84, # pre_image_size=100, # ) self.video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = 0
def __init__(self, cfg): self.work_dir = os.getcwd() """Hack to adjust action_repeat""" adjust_action_repeat_hack(cfg) print(f"CFG:\n{'-'*100}\n{cfg}\n{'-'*100}") self.cfg = cfg experiment_name = f"{cfg.full_title}_{cfg.run_id}" self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, save_wb=cfg.log_save_wandb, log_frequency=cfg.log_frequency_step, agent=cfg.agent.name, action_repeat=cfg.action_repeat, cfg=dict(flatten_cfg(cfg)), plot_project="drqtest", experiment=experiment_name) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.env = make_env(cfg) cfg.agent.params.obs_shape = self.env.observation_space.shape cfg.agent.params.action_shape = self.env.action_space.shape cfg.agent.params.action_range = [ float(self.env.action_space.low.min()), float(self.env.action_space.high.max()) ] self.agent = hydra.utils.instantiate(cfg.agent) print(f"ACTOR:\n{'-'*100}\n{self.agent.actor}\n{'-'*100}") print(f"CRITIC:\n{'-'*100}\n{self.agent.critic}\n{'-'*100}") self.replay_buffer = ReplayBuffer( self.env.observation_space.shape, self.env.action_space.shape, cfg.replay_buffer_capacity, self.cfg.image_pad, self.device, use_aug=cfg.replay_buffer_augmentation) self.video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = 0
def __init__(self, cfg, env_maker, phi, work_dir): self.env_maker = env_maker self.env = env_maker() self.phi = phi # self.env = dmc2gym.make( # domain_name='CarIntersect', # task_name=task_name, # seed=cfg.seed, # visualize_reward=False, # from_pixels=True, # frame_skip=cfg.action_repeat, # ) self.work_dir = work_dir print(f'workspace: {self.work_dir}') self.cfg = cfg utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) cfg.agent.params.obs_shape = self.phi(self.env.reset()).shape print(f'DRQ: get observation shape : {cfg.agent.params.obs_shape}') cfg.agent.params.action_shape = self.env.action_space.shape print(f'DRQ: get action shape : {cfg.agent.params.obs_shape}') cfg.agent.params.action_range = [-1, +1] self.agent: DRQAgent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = ReplayBuffer( obs_shape=cfg.agent.params.obs_shape, action_shape=self.env.action_space.shape, capacity=cfg.replay_buffer_capacity, image_pad=self.cfg.image_pad, device=self.device, phi=self.phi, ) self.step = 0 self.total_env_step = 0 self.total_updates = 0 self.total_episodes = 0
def __init__( self, log_save_tb=True, log_frequency_step=10000, agent_name='drq', # device='cuda', device='cpu', env='cartpole_swingup', seed=1, image_size=84, action_repeat=8, frame_stack=3, replay_buffer_capacity=100000, image_pad=4, save_video=True): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.logger = Logger(self.work_dir, save_tb=log_save_tb, log_frequency=log_frequency_step, agent=agent_name, action_repeat=action_repeat) utils.set_seed_everywhere(seed) self.device = torch.device(device) self.env = make_env(env, seed, image_size, action_repeat, frame_stack) self.agent = DRQAgent( obs_shape=self.env.observation_space.shape, action_shape=self.env.action_space.shape, action_range=(float(self.env.action_space.low.min()), float(self.env.action_space.high.max())), device=self.device) self.replay_buffer = ReplayBuffer(self.env.observation_space.shape, self.env.action_space.shape, replay_buffer_capacity, image_pad, self.device) self.video_recorder = VideoRecorder( self.work_dir if save_video else None) self.step = 0
def set_envs(args): if not torch.cuda.is_available(): args.cuda = False args.fp16 = False if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): args.device_ids = eval(f"[{os.environ['CUDA_VISIBLE_DEVICES']}]") torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') torch.backends.cudnn.benchmark = True if not args.device: args.device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") if args.expand_filepaths_to_save_dir: args.model_state_file = os.path.join(args.save_dir,args.model_state_file) set_seed_everywhere(args.seed, args.cuda) handle_dirs(args.save_dir)
def __init__(self, cfg): self.work_dir = os.getcwd() print(f"workspace: {self.work_dir}") self.cfg = cfg self.logger = Logger( self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency_step, agent=cfg.agent.name, action_repeat=cfg.action_repeat, ) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.env = make_env(cfg, eval=False) cfg.agent.params.obs_shape = self.env.observation_space.shape cfg.agent.params.action_shape = self.env.action_space.shape cfg.agent.params.action_range = [ float(self.env.action_space.low.min()), float(self.env.action_space.high.max()), ] self.agent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = ReplayBuffer( self.env.observation_space.shape, self.env.action_space.shape, self.env.state_space.shape, cfg.replay_buffer_capacity, self.cfg.image_size, self.agent.random_encoder, self.cfg.aug_type, self.cfg.use_drq, self.device, ) self.video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = 0
def __init__(self, cfg): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.cfg = cfg self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency_step, agent=cfg.agent.name, action_repeat=cfg.action_repeat) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.env = make_env(cfg, self.logger) self.eval_env = gym.make(cfg.env) if "img_only" not in cfg.env: self.eval_env = DictToBoxWrapper(DictTransposeImage(self.eval_env)) else: self.eval_env = TransposeImage(self.eval_env) # env = utils.FrameStack(env, k=cfg.frame_stack) self.eval_env.seed(cfg.seed + 111) cfg.agent.params.obs_shape = self.env.observation_space.shape cfg.agent.params.action_shape = self.env.action_space.shape cfg.agent.params.action_range = [ float(self.env.action_space.low.min()), float(self.env.action_space.high.max()) ] self.agent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = ReplayBuffer(self.env.observation_space.shape, self.env.action_space.shape, cfg.replay_buffer_capacity, self.cfg.image_pad, self.device) self.video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = 0
def __init__(self, cfg): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.cfg = cfg self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency_step, agent=cfg.agent.name, action_repeat=cfg.action_repeat) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) gibson_config_filename = os.path.join( os.path.dirname(gibson2.__file__), '../examples/configs/hand_drawer.yaml') self.env = HandDrawerEnv(config_file=gibson_config_filename, mode='headless') self.env = utils.FrameStack(self.env, k=cfg.frame_stack) cfg.agent.params.obs_shape = self.env.observation_space.shape cfg.agent.params.action_shape = self.env.action_space.shape cfg.agent.params.action_range = [ float(self.env.action_space.low.min()), float(self.env.action_space.high.max()) ] self.agent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = ReplayBuffer(self.env.observation_space.shape, self.env.action_space.shape, cfg.replay_buffer_capacity, self.cfg.image_pad, self.device) self.video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = 0
def load_model(args): utils.set_seed_everywhere(args.seed) cfg = hyperparameters.get_config(args) cfg.data_shapes = {'image': (None, 16, 3, 64, 64)} args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if args.cuda else "cpu") if not args.inv_fwd: model = train_keyp_pred.KeypointModel(cfg).to(device) else: model = train_keyp_inverse_forward.KeypointModel(cfg).to(device) checkpoint_path = os.path.join(args.pretrained_path, "_ckpt_epoch_" + args.ckpt + ".ckpt") checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) print("Loading model from: ", checkpoint_path) model.load_state_dict(checkpoint['state_dict']) model.eval() print("Load complete") return model
def __init__(self, cfg): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.cfg = cfg self.logger = Logger(self.work_dir + "_" + self.cfg.env + "_eval2k_effective_{}_seed_{}".format( self.cfg.effective_aug, self.cfg.seed), save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency_step, agent=cfg.agent.name, action_repeat=cfg.action_repeat) self.effective_aug = self.cfg.effective_aug utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.env = make_env(cfg) cfg.agent.params.obs_shape = self.env.observation_space.shape cfg.agent.params.action_shape = self.env.action_space.shape cfg.agent.params.action_range = [ float(self.env.action_space.low.min()), float(self.env.action_space.high.max()) ] self.agent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = ReplayBuffer(self.env.observation_space.shape, self.env.action_space.shape, cfg.replay_buffer_capacity, self.cfg.image_pad, self.device, self.effective_aug) self.video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = 0
def __init__(self, cfg): self.work_dir = os.getcwd() print(f'workspace: {self.work_dir}') self.cfg = cfg self.logger = Logger(self.work_dir, save_tb=cfg.log_save_tb, log_frequency=cfg.log_frequency_step, agent=cfg.agent.name) utils.set_seed_everywhere(cfg.seed) self.device = torch.device(cfg.device) self.env = dmc.make_meta(cfg.env, cfg.episode_length, cfg.seed) self.eval_env = dmc.make_meta(cfg.env, cfg.episode_length, cfg.seed + 1) obs_spec = self.env.observation_spec()['features'] action_spec = self.env.action_spec() cfg.agent.params.obs_shape = obs_spec.shape cfg.agent.params.action_shape = action_spec.shape cfg.agent.params.action_range = [ float(action_spec.minimum.min()), float(action_spec.maximum.max()) ] self.agent = hydra.utils.instantiate(cfg.agent) self.replay_buffer = MetaReplayBuffer(cfg.train_tasks, obs_spec.shape, action_spec.shape, cfg.replay_buffer_capacity, self.device) self.eval_video_recorder = VideoRecorder( self.work_dir if cfg.save_video else None) self.step = 0
def _remove_spaces(sentence): s = sentence.strip().split() s = " ".join(s) return s def _find_nums(question): nums = re.findall('\d*\.?\d+', question) return nums if __name__ == '__main__': args = read_arguments_manual_inference() device, n_gpu = setup_device() set_seed_everywhere(args.seed, n_gpu) schemas_raw, schemas_dict = spider_utils.load_schema(args.data_dir) grammar = semQL.Grammar() model = IRNet(args, device, grammar) model.to(device) # load the pre-trained parameters model.load_state_dict(torch.load(args.model_to_load)) # to use cpu instead of gpu , uncomment this code # model.load_state_dict(torch.load(args.model_to_load,map_location=torch.device('cpu'))) model.eval() print("Load pre-trained model from '{}'".format(args.model_to_load))
args.model_state_file = os.path.join(args.save_dir, args.model_state_file) print("Expanded filepaths: ") print("\t{}".format(args.vectorizer_file)) print("\t{}".format(args.model_state_file)) # Check CUDA if not torch.cuda.is_available(): args.cuda = False print("Using CUDA: {}".format(args.cuda)) args.device = torch.device("cuda" if args.cuda else "cpu") set_seed_everywhere(args.seed, args.cuda) dataset = args.dataset vectorizer = args.vectorizer classifier = args.classifier classifier = classifier.to(args.device) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(classifier.parameters(), lr=args.learning_rate) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=1) train_state = make_train_state(args)
def main(): args = parse_args() if args.seed == -1: args.__dict__["seed"] = np.random.randint(1, 1000000) utils.set_seed_everywhere(args.seed) pre_transform_image_size = args.pre_transform_image_size if 'crop' in args.data_augs else args.image_size pre_image_size = args.pre_transform_image_size # record the pre transform image size for translation env = dmc2gym.make(domain_name=args.domain_name, task_name=args.task_name, seed=args.seed, visualize_reward=False, from_pixels=(args.encoder_type == 'pixel'), height=pre_transform_image_size, width=pre_transform_image_size, frame_skip=args.action_repeat) env.seed(args.seed) # stack several consecutive frames together if args.encoder_type == 'pixel': env = utils.FrameStack(env, k=args.frame_stack) # make directory ts = time.gmtime() ts = time.strftime("%m-%d", ts) env_name = args.domain_name + '-' + args.task_name exp_name = env_name + '-' + ts + '-im' + str(args.image_size) +'-b' \ + str(args.batch_size) + '-s' + str(args.seed) + '-' + args.encoder_type args.work_dir = args.work_dir + '/' + exp_name utils.make_dir(args.work_dir) video_dir = utils.make_dir(os.path.join(args.work_dir, 'video')) model_dir = utils.make_dir(os.path.join(args.work_dir, 'model')) buffer_dir = utils.make_dir(os.path.join(args.work_dir, 'buffer')) video = VideoRecorder(video_dir if args.save_video else None) with open(os.path.join(args.work_dir, 'args.json'), 'w') as f: json.dump(vars(args), f, sort_keys=True, indent=4) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') action_shape = env.action_space.shape if args.encoder_type == 'pixel': obs_shape = (3 * args.frame_stack, args.image_size, args.image_size) pre_aug_obs_shape = (3 * args.frame_stack, pre_transform_image_size, pre_transform_image_size) else: obs_shape = env.observation_space.shape pre_aug_obs_shape = obs_shape replay_buffer = utils.ReplayBuffer( obs_shape=pre_aug_obs_shape, action_shape=action_shape, capacity=args.replay_buffer_capacity, batch_size=args.batch_size, device=device, image_size=args.image_size, pre_image_size=pre_image_size, ) agent = make_agent(obs_shape=obs_shape, action_shape=action_shape, args=args, device=device) L = Logger(args.work_dir, use_tb=args.save_tb) episode, episode_reward, done = 0, 0, True start_time = time.time() for step in range(args.num_train_steps): # evaluate agent periodically if step % args.eval_freq == 0: L.log('eval/episode', episode, step) evaluate(env, agent, video, args.num_eval_episodes, L, step, args) if args.save_model: agent.save_curl(model_dir, step) if args.save_buffer: replay_buffer.save(buffer_dir) if done: if step > 0: if step % args.log_interval == 0: L.log('train/duration', time.time() - start_time, step) L.dump(step) start_time = time.time() if step % args.log_interval == 0: L.log('train/episode_reward', episode_reward, step) obs = env.reset() done = False episode_reward = 0 episode_step = 0 episode += 1 if step % args.log_interval == 0: L.log('train/episode', episode, step) # sample action for data collection if step < args.init_steps: action = env.action_space.sample() else: with utils.eval_mode(agent): action = agent.sample_action(obs / 255.) # run training update if step >= args.init_steps: num_updates = 1 for _ in range(num_updates): agent.update(replay_buffer, L, step) next_obs, reward, done, _ = env.step(action) # allow infinit bootstrap done_bool = 0 if episode_step + 1 == env._max_episode_steps else float( done) episode_reward += reward replay_buffer.add(obs, action, reward, next_obs, done_bool) obs = next_obs episode_step += 1
class Experiment(object): def __init__( self, # environment env_id, # visual? from_images=False, # reproducibility seed=1, # env fix_goals=False, # compute device='cuda' if torch.cuda.is_available() else 'cpu', # replay buffer num_resampled_goals=1, capacity=1_000_000, # agent feature_dim=128, hidden_sizes=[512, 512, 512], log_std_bounds=[-10, 2], discount=0.95, init_temperature=0.1, lr=0.0006, actor_update_frequency=2, critic_tau=0.005, critic_target_update_frequency=2, batch_size=128, # evaluation num_eval_episodes=5, # training gradient_steps=1, # better for wall clock time. increase for better performance. num_timesteps=20_000, # maximum time steps num_seed_steps=1_000, # random actions to improve exploration update_after=1_000, # when to start updating (off-policy still learns from seed steps) eval_every=20, # episodic frequency for evaluation save_every=5_000, # how often to save the experiment progress in time steps **kwargs, # lazily absorb extra args ): self.observation_key = 'image_observation' if from_images else 'observation' self.achieved_goal_key = 'image_achieved_goal' if from_images else 'achieved_goal' self.desired_goal_key = 'image_desired_goal' if from_images else 'desired_goal' # Seed utils.set_seed_everywhere(seed) # # Create env self.env_id = env_id self.seed = seed self.from_images = from_images self.fix_goals = fix_goals self.env = gym.make(self.env_id) if 'Kinova' in self.env_id: self.env = wrappers.KinovaWrapper(self.env, self.seed, self.from_images, self.fix_goals) else: self.env = wrappers.MultiWrapper(self.env, self.seed, self.from_images, self.fix_goals) # Create agent self.agent = Agent(from_images, self.env.observation_space, self.env.action_space, device=device, feature_dim=feature_dim, hidden_sizes=hidden_sizes, log_std_bounds=log_std_bounds, discount=discount, init_temperature=init_temperature, lr=lr, critic_tau=critic_tau, batch_size=batch_size) # update env to use agent encoder for images if necessary if self.from_images: self.env.set_agent( self.agent) # set the conv encoder for latent distance rewards # Create replay buffer self.replay_buffer = HindsightReplayBuffer( from_images=from_images, env=self.env, num_resampled_goals=num_resampled_goals, observation_space=self.env.observation_space, action_space=self.env.action_space, capacity=capacity, device=device, ) self.step = 0 self.num_eval_episodes = num_eval_episodes self.gradient_steps = gradient_steps self.num_timesteps = num_timesteps self.num_seed_steps = num_seed_steps self.update_after = update_after self.eval_every = eval_every self.save_every = save_every
def main(args): # Set seed utils.set_seed_everywhere(args.seed) # Initialize environments gym.logger.set_level(40) env = make_env(domain_name=args.domain_name, task_name=args.task_name, seed=args.seed + 42, episode_length=args.episode_length, action_repeat=args.action_repeat, mode=args.eval_mode) # Set working directory work_dir = os.path.join(args.log_dir, args.domain_name + '_' + args.task_name, args.algorithm, str(args.seed)) print('Working directory:', work_dir) assert os.path.exists( work_dir), 'specified working directory does not exist' model_dir = utils.make_dir(os.path.join(work_dir, 'model')) video_dir = utils.make_dir(os.path.join(work_dir, 'video')) video = VideoRecorder(video_dir if args.save_video else None, height=448, width=448) # Check if evaluation has already been run results_fp = os.path.join(work_dir, args.eval_mode + '.pt') assert not os.path.exists( results_fp), f'{args.eval_mode} results already exist for {work_dir}' # Prepare agent assert torch.cuda.is_available(), 'must have cuda enabled' cropped_obs_shape = (3 * args.frame_stack, 84, 84) agent = make_agent(obs_shape=cropped_obs_shape, action_shape=env.action_space.shape, args=args) agent = torch.load(os.path.join(model_dir, str(args.train_steps) + '.pt')) agent.train(False) print( f'\nEvaluating {work_dir} for {args.eval_episodes} episodes (mode: {args.eval_mode})' ) reward = evaluate(env, agent, video, args.eval_episodes, args.eval_mode) print('Reward:', int(reward)) adapt_reward = None if args.algorithm == 'pad': env = make_env(domain_name=args.domain_name, task_name=args.task_name, seed=args.seed + 42, episode_length=args.episode_length, action_repeat=args.action_repeat, mode=args.eval_mode) adapt_reward = evaluate(env, agent, video, args.eval_episodes, args.eval_mode, adapt=True) print('Adapt reward:', int(adapt_reward)) # Save results torch.save({ 'args': args, 'reward': reward, 'adapt_reward': adapt_reward }, results_fp) print('Saved results to', results_fp)