def _get_model(input_dict, obs_space, num_outputs, options, state_in, seq_lens): if options.get("custom_model"): model = options["custom_model"] logger.info("Using custom model {}".format(model)) return _global_registry.get(RLLIB_MODEL, model)( input_dict, obs_space, num_outputs, options, state_in=state_in, seq_lens=seq_lens) obs_rank = len(input_dict["obs"].shape) - 1 if obs_rank > 1: return VisionNetwork(input_dict, obs_space, num_outputs, options) return FullyConnectedNetwork(input_dict, obs_space, num_outputs, options)
def _get_model(inputs, num_outputs, options): if "custom_model" in options: model = options["custom_model"] print("Using custom model {}".format(model)) return _global_registry.get(RLLIB_MODEL, model)(inputs, num_outputs, options) obs_rank = len(inputs.shape) - 1 # num_outputs > 1 used to avoid hitting this with the value function if isinstance( options.get("custom_options", {}).get( "multiagent_fcnet_hiddens", 1), list) and num_outputs > 1: return MultiAgentFullyConnectedNetwork(inputs, num_outputs, options) if obs_rank > 1: return VisionNetwork(inputs, num_outputs, options) return FullyConnectedNetwork(inputs, num_outputs, options)
def get_model(inputs, num_outputs, options=None): """Returns a suitable model conforming to given input and output specs. Args: inputs (Tensor): The input tensor to the model. num_outputs (int): The size of the output vector of the model. options (dict): Optional args to pass to the model constructor. Returns: model (Model): Neural network model. """ if options is None: options = {} obs_rank = len(inputs.get_shape()) - 1 if obs_rank > 1: return VisionNetwork(inputs, num_outputs, options) return FullyConnectedNetwork(inputs, num_outputs, options)
def get_model(registry, inputs, num_outputs, options=dict()): """Returns a suitable model conforming to given input and output specs. Args: registry (obj): Registry of named objects (ray.tune.registry). inputs (Tensor): The input tensor to the model. num_outputs (int): The size of the output vector of the model. options (dict): Optional args to pass to the model constructor. Returns: model (Model): Neural network model. """ if "custom_model" in options: model = options["custom_model"] print("Using custom model {}".format(model)) return registry.get(RLLIB_MODEL, model)(inputs, num_outputs, options) obs_rank = len(inputs.shape) - 1 # num_outputs > 1 used to avoid hitting this with the value function if isinstance( options.get("custom_options", {}).get( "multiagent_fcnet_hiddens", 1), list) and num_outputs > 1: return MultiAgentFullyConnectedNetwork(inputs, num_outputs, options) if obs_rank > 1: return VisionNetwork(inputs, num_outputs, options) # Use two-level network if the hidden sizes are a nested list if "hierarchical_fcnet_hiddens" in options.get("custom_options", {}) and num_outputs > 1: return TwoLevelFCNetwork(inputs, num_outputs, options) return FullyConnectedNetwork(inputs, num_outputs, options)
def __init__(self, env_creator, config, is_ext_train=False): self.local_steps = 0 self.config = config self.summarize = config.get("summarize") env = ModelCatalog.get_preprocessor_as_wrapper( env_creator(self.config["env_config"]), self.config["model"]) if is_ext_train: train_dataset = input_fn( self.config["inverse_model"]["ext_train_file_path"]) valid_dataset = input_fn( self.config["inverse_model"]["ext_valid_file_path"]) iterator = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) next_element = iterator.get_next() self.x = next_element[0] self.ac = next_element[1] self.training_init_op = iterator.make_initializer(train_dataset) self.validation_init_op = iterator.make_initializer(valid_dataset) else: self.x = tf.placeholder( tf.float32, shape=[ None, numpy.prod([2] + list(env.observation_space.shape)) ]) if isinstance(env.action_space, gym.spaces.Box): self.ac = tf.placeholder(tf.float32, [None] + list(env.action_space.shape), name="ac") elif isinstance(env.action_space, gym.spaces.Discrete): self.ac = tf.placeholder(tf.int64, [None], name="ac") else: raise NotImplementedError("action space" + str(type(env.action_space)) + "currently not supported") # Setup graph dist_class, logit_dim = ModelCatalog.get_action_dist( env.action_space, self.config["model"]) self._model = FullyConnectedNetwork(self.x, logit_dim, {}) self.logits = self._model.outputs self.curr_dist = dist_class(self.logits) self.sample = self.curr_dist.sample() self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) # Setup loss log_prob = self.curr_dist.logp(self.ac) self.pi_loss = -tf.reduce_sum(log_prob) self.loss = self.pi_loss self.optimizer = tf.train.AdamOptimizer(self.config["lr"]).minimize( self.loss) # Setup similarity -> cosine similarity normalize_sample = tf.nn.l2_normalize(self.sample, 1) normalize_ac = tf.nn.l2_normalize(self.ac, 1) self.similarity = 1 - tf.losses.cosine_distance( normalize_sample, normalize_ac, dim=1) # Initialize self.initialize()