def get_hypers(self, actions): """ Bit of confusing logic here where I construct a `flat` dict of hypers from the actions - looks like how hypers are specified above ('dot.key.str': val). Then from that we hydrate it as a proper config dict (hydrated). Keeping `flat` around because I save the run to the database so can be analyzed w/ a decision tree (for feature_importances and the like) and that's a good format, rather than a nested dict. :param actions: the hyperparamters """ self.flat = flat = {} # Preprocess hypers for k, v in actions.items(): try: v = v.item() # sometimes primitive, sometimes numpy except Exception: pass hyper = self.hypers[k] if 'pre' in hyper: v = hyper['pre'](v) flat[k] = v flat.update(self.hardcoded) # Post-process hypers (allow for dependency handling, etc) for k, v in flat.items(): hyper = self.hypers[k] if type(hyper) == dict and 'post' in hyper: flat[k] = hyper['post'](v, flat) # change all a.b=c to {a:{b:c}} (note DotDict class above, I hate and would rather use an off-the-shelf) main, custom = utils.DotDict({}), utils.DotDict({}) for k, v in flat.items(): obj = main if k in hypers[self.agent] else custom try: v = self.hypers[k]['hydrate'](v, self.flat) if type(v) == dict: obj.update(v) else: obj[k] = v except: obj[k] = v main, custom = main.to_dict(), custom.to_dict() network = custom_net(custom, print_net=True) if flat['baseline_mode']: if type(self.hypers['baseline_mode']) == bool: main.update(hydrate_baseline(self.hypers['baseline_mode'], flat)) main['baseline']['network'] = custom_net(custom, print_net=True, baseline=True) # TODO remove this special-handling if main['gae_lambda']: main['gae_lambda'] = main['discount'] ## GPU split gpu_split = self.cli_args.gpu_split if gpu_split != 1: fraction = .9 / gpu_split if gpu_split > 1 else gpu_split session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=fraction)) main['execution'] = {'type': 'single', 'session_config': session_config} print('--- Flat ---') pprint(flat) print('--- Hydrated ---') pprint(main) return flat, main, network
def mlp(x, output_units, depth, units, activation, use_layernorm, output_activation=None, output_bias=True, **kwargs): if output_units: depth -= 1 for i in range(depth): if isinstance(activation, list): x = tf.layers.dense(x, units=units) x = apply_mixed_activations(x, activation) else: x = tf.layers.dense(x, units=units, activation=activation) if use_layernorm: use_center = output_bias or i < depth - 1 x = tfc.layers.layer_norm(x, center=use_center, begin_norm_axis=-1) hidden = x if output_units: out = tf.layers.dense(x, units=output_units, activation=output_activation, use_bias=output_bias) else: out = hidden return utils.DotDict(locals())
def test_todict(self): dic: Dict[str, Any] = { 'nested': {'key': {'key': 'val'}} } dotdict = utils.DotDict(dic) ret: Dict[Any, Any] = dotdict.todict() self.assertDictEqual(ret, dic)
def _extract_records(resp, translate_to_db=True): recs = [r["_source"] for r in resp["hits"]["hits"]] excepts = 0 for rec in recs: try: rec["posted"] = datetime.datetime.strptime(rec["posted"], "%Y-%m-%dT%H:%M:%S") except ValueError: rec["posted"] = datetime.datetime.strptime(rec["posted"], "%Y-%m-%d %H:%M:%S") excepts += 1 if translate_to_db: allrecs = [utils.DotDict(rec) for rec in db_names_from_elastic(recs)] else: allrecs = [utils.DotDict(rec) for rec in recs] g.date_excepts = excepts return allrecs
def test_simple(self): dic: Dict[str, Any] = { 'string': 'aaa', 'integer': 12, 'dict': {'key': 'value'}, 'list': [1, 2] } dotdict = utils.DotDict(dic) self.assertEqual(dotdict.string, dic['string']) self.assertEqual(dotdict.integer, dic['integer']) self.assertEqual(dotdict.dict.key, dic['dict']['key']) self.assertEqual(dotdict.list, dic['list'])
def _create(self, scope): with tf.variable_scope(scope): replace_manager = utils.ReplaceVariableManager() return utils.DotDict({ 'critic': tf.make_template('critic', self._critic, True), 'critic2': tf.make_template('critic2', self._critic, True), 'policy': tf.make_template('policy', self._policy, True, custom_getter_=replace_manager) })
def _setup(self, config): import tensorflow as tf self.target_timesteps = 1 logger.warning('Starting experiment') tf.logging.set_verbosity(tf.logging.ERROR) if not isinstance(config['env_name'], list): config['env_name'] = [config['env_name']] self.dconfig = dconfig = utils.DotDict(config) self.summary_writer = self.find_tf_logger() or tf.summary.FileWriter( self.logdir) tflog_utils.log_text(self.summary_writer, 'config', str(dconfig)) # Assign different environments to different agents env_count = len(config['env_name']) agent_configs = [ utils.merge_dicts(config, {'env_name': config['env_name'][i % env_count]}) for i in range(dconfig.agent_count) ] self.agents = [ ray_workers.AgentWorker.remote(i, agent_configs[i], self.logdir) for i in range(dconfig.agent_count) ] logger.warning('Setting up agents') # [ray] There is no way to wait for the actors to finalize initialization, thus we put this in a setup method # Comment this out because we call setup in the __init__ function of agent worker # ray.wait([agent.setup.remote() for agent in self.agents], num_returns=dconfig.agent_count) logger.warning('Created agents') if dconfig.restore_count: self._restore_from_specification(dconfig, agent_configs) # Create objective server and sync objective parameters if dconfig.agent_count > 1: params = self.agents[0].get_objective_params.remote() self.server = ray_workers.ObjectiveServer.remote(config, params) logger.warning('Created server') self.obj_param_count = len(ray.get(params)) ray.wait([ agent.update_objective_params.remote(params) for agent in self.agents[1:] ], num_returns=dconfig.agent_count - 1) logger.warning('Synced objective function')
def __init__(self, worker_index, config, logdir): logger.warning(f'Create agent {worker_index}') self.dconfig = utils.DotDict(config) self.logdir = logdir self.worker_index = worker_index self.locals = None self.feed_dict = None self.objective_vars_oid = None self.datasets_initialized = False import tensorflow as tf plasma.load_plasma_tensorflow_op() logger.warning(f'Created agent {worker_index}')
def read_config(): parser = argparse.ArgumentParser( prog='Train/Eval script', description= ('Script for training and evaluating memory models on various bitmap tasks. ' 'All parameters should be given throug the config file.'), ) parser.add_argument( '-n', '--name', type=str, required=True, help= 'Name of the current experiment. Can also provide name/with/path for grouping' ) parser.add_argument('-k', '--keep', action='store_true', help='Keep logs from previous run.') parser.add_argument( '-l', '--load', help='Path to checkpoint file to load from', default=None, ) args = parser.parse_args() path = pathlib.Path('experiments') / args.name assert args.name, f'No such directory: {str(path)}.' assert (path / 'config.yaml').exists(), 'No configuration file found.' with open(path / 'config.yaml') as f: config = utils.DotDict(yaml.safe_load(f)) if not args.keep: (path / 'tensorboard').exists() and shutil.rmtree(path / 'tensorboard') (path / 'checkpoints').exists() and shutil.rmtree(path / 'checkpoints') open(path / 'train.log', 'w').close() (path / 'tensorboard').mkdir(exist_ok=True) (path / 'checkpoints').mkdir(exist_ok=True) config.path = path config.tensorboard = path / 'tensorboard' config.checkpoints = path / 'checkpoints' config.load = args.load return config
def __init__(self, config, init_vars): import tensorflow as tf dconfig = utils.DotDict(config) plasma.load_plasma_tensorflow_op() store_socket = utils.get_store_socket() self.var_oid = None self.obj_vars = [ tf.Variable(init_var, name='obj_var', dtype=tf.float32) for init_var in init_vars ] self.plasma_grads_oids = tf.placeholder(shape=[dconfig.agent_count], dtype=tf.string, name="plasma_grads_oids") self.plasma_vars_oid = tf.placeholder(shape=[], dtype=tf.string, name="plasma_vars_oids") shapes = [v.shape for v in self.obj_vars] grads = utils.reverse_flat( tf.reduce_mean([ plasma.tf_plasma_op.plasma_to_tensor( self.plasma_grads_oids[a], dtype=tf.float32, plasma_store_socket_name=store_socket) for a in range(dconfig.agent_count) ], axis=0), shapes) obj_optimizer = tf.train.AdamOptimizer( learning_rate=dconfig.obj_func_learning_rate) self.train_obj_op = obj_optimizer.apply_gradients( zip(grads, self.obj_vars)) with tf.control_dependencies([self.train_obj_op]): import tensorflow as tf self.update_vars = plasma.tf_plasma_op.tensor_to_plasma( [utils.flat(self.obj_vars)], self.plasma_vars_oid, plasma_store_socket_name=store_socket) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer())
def _policy(self, x, initial_state=None, **kwargs): kwargs = {k: v for k, v in kwargs.items() if v is not None} if initial_state is not None: initial_state = tf.unstack(initial_state) kwargs['initial_state'] = initial_state policy = self.policy_func(x, self.act_dim, output_activation=tf.tanh, **kwargs) pi = self.act_limit * policy.out result = { 'action': pi, 'hidden': policy.hidden, 'value': self.main.critic(x, pi), 'target_value': lambda: self.target.critic(x, pi), } if hasattr(policy, 'state'): result['state'] = policy.state return utils.DotDict(result)
def recurrent(x, output_units, depth, units, activation, use_layernorm, initial_state=None, output_activation=None, seq_len=None): lstm_cell = tfc.rnn.LayerNormBasicLSTMCell( units, activation=lookup_activation(activation), layer_norm=use_layernorm) inputs = np.repeat(tf.unstack(x, axis=1), depth).tolist() outputs, state = tf.nn.static_rnn(lstm_cell, inputs, dtype=tf.float32, initial_state=initial_state, sequence_length=seq_len) hidden = tf.stack(outputs[depth - 1::depth], axis=1) out = tf.layers.dense(hidden, output_units, activation=output_activation) return utils.DotDict(locals())
type=str, help='The weights of trained model.') parser.add_argument('-i', '--img_path', type=str, default='demo.jpg', help='Specify the image to predict.') parser.add_argument('-o', '--output_path', type=str, default='output.png', help='A path to save prediction result.') args = parser.parse_args() cfg_dict: Dict[str, Any] = utils.load_yaml('./config.yml') cfg: utils.DotDict = utils.DotDict(cfg_dict) # cmaps: List[Tuple[str, Tuple[int]]] = utils.load_labelmap( # path='../VOCdevkit/VOC2012/labelmap.txt' # ) # print(cmaps) device: str = 'cuda' if torch.cuda.is_available() else 'cpu' # Load UNet model and its weights. net: nn.Module = models.utils.load_model( num_classes=cfg.num_classes, architecture=cfg.model.architecture, backbone=cfg.model.backbone, pretrained=False) net.load_state_dict(torch.load(args.weights_path, map_location=device)) net.eval()
def _setup(self, dconfig, logdir): """ Create tensorflow graph and summary writer :param dconfig: configuration to use to build the graph :param logdir: log directory to write tensorflow logs to """ env = gym.make(dconfig.env_name) obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] # Action limit for clamping: critically, assumes all dimensions share the same bound! act_limit = env.action_space.high[0] agent = Agent(dconfig, env) objective = Objective(dconfig) # Experience buffer replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=dconfig.buffer_size, discount_factor=dconfig.discount_factor) time = dconfig.recurrent_time_steps if dconfig.recurrent_time_steps > 1 else None # Create datasets from replay buffer replay_buffer_dataset = replay_buffer.create_dataset( dconfig.buffer_sample_size, time) replay_buffer_dataset_iterator = replay_buffer_dataset.make_initializable_iterator( ) # If we perform multiple gradient steps in the inner loop, provide different data for each step large_batch_size = (self.dconfig.obj_func_second_order_steps + 1) * dconfig.buffer_sample_size large_replay_buffer_dataset = replay_buffer.create_dataset( large_batch_size, time) large_replay_buffer_dataset_iterator = large_replay_buffer_dataset.make_initializable_iterator( ) handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, replay_buffer_dataset.output_types, replay_buffer_dataset.output_shapes) itr_elem = utils.DotDict(iterator.get_next()) x_ph, a_ph, x2_ph, r_ph, d_ph, lens_ph = itr_elem.obs1, itr_elem.acts, itr_elem.obs2,\ itr_elem.rews, itr_elem.done, itr_elem.lens # Mask for different trajectory lengths if lens_ph is not None: seq_mask = tf.sequence_mask(lens_ph, time, dtype=tf.float32) else: seq_mask = tf.ones([], dtype=tf.float32) x_ph_behv = placeholder(obs_dim, name='ObsBehavior') timestep = tf.placeholder(tf.float32, [], 'timestep') if dconfig.policy_is_recurrent: state_shape = [2, 1, dconfig.policy_units] init_policy_state = tf.placeholder_with_default( tf.zeros(state_shape), [2, 1, dconfig.policy_units]) else: init_policy_state = None transition = [ x_ph, a_ph, x2_ph, r_ph[..., tf.newaxis], d_ph[..., tf.newaxis] ] # Learning rate annealing if dconfig.policy_update_start: base = dconfig.policy_lr_annealing_base lr_progress = (base**tf.minimum( 1.0, timestep / dconfig.policy_update_start) - 1) / (base - 1) else: lr_progress = 1 # Optimizers pi_optimizer = utils.TensorAdamOptimizer( learning_rate=dconfig.policy_learning_rate * lr_progress) q_optimizer = tf.train.AdamOptimizer( learning_rate=dconfig.critic_learning_rate) obj_optimizer = tf.train.AdamOptimizer( learning_rate=dconfig.obj_func_learning_rate) # Main outputs from computation graph main = agent.main policy = main.policy(x_ph, seq_len=lens_ph) pi_action = policy.action q1_pi = policy.value pi_behv = main.policy(x_ph_behv[:, tf.newaxis], initial_state=init_policy_state) q1 = main.critic(x_ph, a_ph) q2 = main.critic2(x_ph, a_ph) obj = objective.objective(x_ph, a_ph, transition, lens_ph, seq_mask, agent, policy) # Target policy network pi_action_targ = agent.target.policy(x2_ph, seq_len=lens_ph).action # Target Q networks # Target policy smoothing, by adding clipped noise to target actions epsilon = tf.random_normal(tf.shape(pi_action_targ), stddev=dconfig.critic_noise) epsilon = tf.clip_by_value(epsilon, -dconfig.critic_noise_clip, dconfig.critic_noise_clip) a2 = pi_action_targ + epsilon a2 = tf.clip_by_value(a2, -act_limit, act_limit) q1_targ = agent.target.critic(x2_ph, a2) q2_targ = agent.target.critic2(x2_ph, a2) # Bellman backup for Q functions, using Clipped Double-Q targets min_q_targ = tf.minimum(q1_targ, q2_targ) gamma = dconfig.discount_factor backup = tf.stop_gradient(r_ph + gamma * (1 - d_ph) * min_q_targ + d_ph) # Objective function annealing if dconfig.obj_func_anneal_steps: progress = tf.minimum(1.0, timestep / dconfig.obj_func_anneal_steps) obj = progress * obj - (1 - progress) * q1_pi # TD3 losses pi_loss = -tf.reduce_mean(q1_pi * seq_mask) pi_obj_loss = tf.reduce_mean(obj * seq_mask) q1_loss = tf.reduce_mean((q1 - backup)**2 * seq_mask) q2_loss = tf.reduce_mean((q2 - backup)**2 * seq_mask) q_loss = q1_loss + q2_loss main_vars = sorted(get_vars('main', trainable_only=False), key=lambda v: v.name) target_vars = sorted(get_vars('target', trainable_only=False), key=lambda v: v.name) # Train policy directly using critic train_pi_op = self._clipped_minimize(pi_optimizer, pi_loss, get_vars('main/policy'), grad_name='ddpg_policy_grads') # Train policy using objective function train_pi_obj_op = self._clipped_minimize( pi_optimizer, pi_obj_loss, get_vars('main/policy'), grad_name='objective_policy_grads') # Train critic train_q_op = q_optimizer.minimize(q_loss, var_list=get_vars('main/critic')) tf.summary.histogram('policy_params', utils.flat(get_vars('main/policy'))) # Objective function loss q1_obj = objective.future_policy_value( x_ph, a_ph, transition, lens_ph, seq_mask, agent, pi_optimizer, create_summary=dconfig.obj_func_enabled) obj_loss = -tf.reduce_mean(q1_obj) # Objective function optimization using ray (send gradients to ObjectiveServer) obj_vars = get_vars('objective') store_socket = utils.get_store_socket() shapes = [v.shape for v in obj_vars] plasma_var_oid = tf.placeholder(shape=[], dtype=tf.string, name="plasma_var_oid") retrieved_vars = utils.reverse_flat( plasma.tf_plasma_op.plasma_to_tensor( plasma_var_oid, dtype=tf.float32, plasma_store_socket_name=store_socket), shapes) # Op to read new objective parameters from ray object store plasma_read_vars = [ var.assign(retrieved) for var, retrieved in zip(obj_vars, retrieved_vars) ] grads, vars = zip(*obj_optimizer.compute_gradients(obj_loss, obj_vars)) grads, _ = tf.clip_by_global_norm(grads, clip_norm=dconfig.clip_gradient) tf.summary.histogram('objective_params', utils.flat(vars)) tf.summary.histogram('objective_param_grads', utils.flat(grads)) objective_grads = grads # Op to send gradients to ObjectiveServer train_obj_op = obj_optimizer.apply_gradients(zip( objective_grads, vars)) plasma_grad_oid = tf.placeholder(shape=[], dtype=tf.string, name="plasma_grad_oid") # Op to send gradients to ObjectiveServer plasma_write_grads = plasma.tf_plasma_op.tensor_to_plasma( [utils.flat(objective_grads)], plasma_grad_oid, plasma_store_socket_name=store_socket) # Print number of parameters print(f''' =================================================================== Parameters Policy {np.sum(np.prod(v.shape) for v in get_vars('main/policy'))} Critic {np.sum(np.prod(v.shape) for v in get_vars('main/critic'))} Objective {np.sum(np.prod(v.shape) for v in obj_vars)} =================================================================== ''') # Polyak averaging for target variables polyak = 1 - dconfig.target_network_update_speed target_update = tf.group([ tf.assign(v_targ, polyak * v_targ + (1 - polyak) * v_main) for v_main, v_targ in zip(main_vars, target_vars) ]) # Initializing target networks to match main variables target_init = tf.group([ tf.assign(v_targ, v_main) for v_main, v_targ in zip(main_vars, target_vars) ]) # Ops for copying and resetting the policy (currently not used) reset_policy = tf.variables_initializer(get_vars('main')) copy_policy = tf.group([ tf.assign(v_targ, v_main) for v_main, v_targ in zip(get_vars('main'), get_vars('target')) ]) # Summaries tflog_utils.log_scalars(policy_loss=pi_loss, q_loss=q_loss) if dconfig.obj_func_enabled: tflog_utils.log_scalars(policy_obj_loss=pi_obj_loss, objective_loss=obj_loss) self.restore_savers = self._create_restore_savers(dconfig) self.saver = tf.train.Saver(max_to_keep=1000, save_relative_paths=True) self.summary = tf.summary.merge_all() self.summary_writer = tf.summary.FileWriter( f'{logdir}_agent{self.worker_index}') config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer()) init_ops = [target_init] self.sess.run(init_ops) rb_handle, large_rb_handle = self.sess.run([ replay_buffer_dataset_iterator.string_handle(), large_replay_buffer_dataset_iterator.string_handle() ]) # Return all created tf ops return utils.DotDict(locals())
def test_init_nested_dict(self): nested = {'key': {'k': {'a': 'b'}}} dotdict = utils.DotDict(nested) self.assertIsInstance(dotdict.key.k, utils.DotDict)
def __init__( self, dset_name=None, cfg_path="./agent_motion_config.yaml", cfg_data=None, stage=None, ): print(f"Initializing LyftDataset {dset_name}...") if stage is not None: print( 'DDEPRECATION WARNING! LyftDataset:: argument "stage=" is deprecated, use "dset_name=" instead' ) if dset_name is None: dset_name = stage else: raise ValueError( 'LyftDataset::Please use only "dset_name" argument') assert dset_name is not None self.dm = LocalDataManager(None) self.dset_name = dset_name if cfg_data is None: self.cfg = utils.DotDict(load_config_data(cfg_path)) else: self.cfg = utils.DotDict(cfg_data) self.dset_cfg = self.cfg[ LyftDataset.name_2_dataloader_key[dset_name]].copy() if self.cfg["raster_params"]["map_type"] == "py_satellite": print("WARNING! USING SLOW RASTERIZER!!! py_satellite") self.rasterizer = build_rasterizer(self.cfg, self.dm) self.rasterizer = build_custom_rasterizer(self.cfg, self.dm) if dset_name == LyftDataset.DSET_VALIDATION_CHOPPED: eval_base_path = Path( "/opt/data3/lyft_motion_prediction/prediction_dataset/scenes/validate_chopped_100" ) eval_zarr_path = str( Path(eval_base_path) / Path(self.dm.require(self.dset_cfg["key"])).name) eval_mask_path = str(Path(eval_base_path) / "mask.npz") self.eval_gt_path = str(Path(eval_base_path) / "gt.csv") self.zarr_dataset = ChunkedDataset(eval_zarr_path).open( cached=False) self.agent_dataset = AgentDataset( self.cfg, self.zarr_dataset, self.rasterizer, agents_mask=np.load(eval_mask_path)["arr_0"], ) self.val_chopped_gt = defaultdict(dict) for el in read_gt_csv(self.eval_gt_path): self.val_chopped_gt[el["track_id"] + el["timestamp"]] = el elif dset_name == LyftDataset.DSET_TEST: self.zarr_dataset = ChunkedDataset( self.dm.require(self.dset_cfg["key"])).open(cached=False) test_mask = np.load( f"{config.L5KIT_DATA_FOLDER}/scenes/mask.npz")["arr_0"] self.agent_dataset = AgentDataset(self.cfg, self.zarr_dataset, self.rasterizer, agents_mask=test_mask) else: zarr_path = self.dm.require(self.dset_cfg["key"]) print(f"Opening Chunked Dataset {zarr_path}...") self.zarr_dataset = ChunkedDataset(zarr_path).open(cached=False) print("Creating Agent Dataset...") self.agent_dataset = AgentDataset( self.cfg, self.zarr_dataset, self.rasterizer, min_frame_history=0, min_frame_future=10, ) print("Creating Agent Dataset... [OK]") if dset_name == LyftDataset.DSET_VALIDATION: mask_frame100 = np.zeros( shape=self.agent_dataset.agents_mask.shape, dtype=np.bool) for scene in self.agent_dataset.dataset.scenes: frame_interval = scene["frame_index_interval"] agent_index_interval = self.agent_dataset.dataset.frames[ frame_interval[0] + 99]["agent_index_interval"] mask_frame100[ agent_index_interval[0]:agent_index_interval[1]] = True prev_agents_num = np.sum(self.agent_dataset.agents_mask) self.agent_dataset.agents_mask = self.agent_dataset.agents_mask * mask_frame100 print( f"nb agent: orig {prev_agents_num} filtered {np.sum(self.agent_dataset.agents_mask)}" ) # store the valid agents indexes self.agent_dataset.agents_indices = np.nonzero( self.agent_dataset.agents_mask)[0] self.w, self.h = self.cfg["raster_params"]["raster_size"] self.add_agent_state = self.cfg["model_params"]["add_agent_state"] self.agent_state = None