def __init__(self, env_spec, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.relu, action_merge_layer=-2, output_nonlinearity=None, bn=False, dropout=.05): Serializable.quick_init(self, locals()) l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim), name="obs") l_action = L.InputLayer(shape=(None, env_spec.action_space.flat_dim), name="actions") n_layers = len(hidden_sizes) + 1 if n_layers > 1: action_merge_layer = \ (action_merge_layer % n_layers + n_layers) % n_layers else: action_merge_layer = 1 l_hidden = l_obs for idx, size in enumerate(hidden_sizes): if bn: l_hidden = batch_norm(l_hidden) if idx == action_merge_layer: l_hidden = L.ConcatLayer([l_hidden, l_action]) l_hidden = L.DenseLayer(l_hidden, num_units=size, nonlinearity=hidden_nonlinearity, name="h%d" % (idx + 1)) l_hidden = L.DropoutLayer(l_hidden, dropout) if action_merge_layer == n_layers: l_hidden = L.ConcatLayer([l_hidden, l_action]) l_output = L.DenseLayer(l_hidden, num_units=1, nonlinearity=output_nonlinearity, name="output") output_var = L.get_output(l_output, deterministic=True) output_var_drop = L.get_output(l_output, deterministic=False) self._f_qval = tensor_utils.compile_function( [l_obs.input_var, l_action.input_var], output_var) self._f_qval_drop = tensor_utils.compile_function( [l_obs.input_var, l_action.input_var], output_var_drop) self._output_layer = l_output self._obs_layer = l_obs self._action_layer = l_action self._output_nonlinearity = output_nonlinearity LayersPowered.__init__(self, [l_output])
def _merge_finals(self, final_acts: Dict[UnboundAction, Any]) -> Any: prob_meta = self._prob_meta # we make a huge tensor of actions that we'll have to reorder sorted_final_acts = sorted(final_acts.items(), key=lambda t: t[0]) # also get some metadata about which positions in tensor correspond to # which schemas unbound_to_super_ind = { t[0]: idx for idx, t in enumerate(sorted_final_acts) } # indiv_sizes[i] is the number of bound acts associated with the i-th # schema indiv_sizes = [ len(prob_meta.schema_to_acts(ub)) for ub, _ in sorted_final_acts ] # cumul_sizes[i] is the sum of the number of ground actions associated # with each action schema *before* the i-th schema cumul_sizes = np.cumsum([0] + indiv_sizes) # this stores indices that we have to look up gather_list = [] for ground_act in prob_meta.bound_acts_ordered: subact_ind = prob_meta.act_to_schema_subtensor_ind(ground_act) superact_ind = unbound_to_super_ind[ground_act.prototype] actual_ind = cumul_sizes[superact_ind] + subact_ind assert 0 <= actual_ind < prob_meta.num_acts, \ "action index %d for %r out of range [0, %d)" \ % (actual_ind, ground_act, prob_meta.num_acts) gather_list.append(actual_ind) # now let's actually build and reorder our huge tensor of action # selection probs cat_super_acts = L.ConcatLayer( [t[1] for t in sorted_final_acts], axis=1, name='merge_finals/cat') rv = L.OpLayer( incoming=cat_super_acts, # the [:, :, 0] drops the single dimension on the last axis op=lambda t: tf.gather(t[:, :, 0], np.array(gather_list), axis=1), shape_op=lambda s: s, name='merge_finals/reorder') return rv
def __init__(self, env_spec, name='qnet', hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.relu, action_merge_layer=-2, output_nonlinearity=None, eqf_use_full_qf=False, eqf_sample_size=1, mqprop=False, bn=False): Serializable.quick_init(self, locals()) assert not env_spec.action_space.is_discrete self._env_spec = env_spec with tf.variable_scope(name): l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim), name="obs") l_action = L.InputLayer(shape=(None, env_spec.action_space.flat_dim), name="actions") n_layers = len(hidden_sizes) + 1 if n_layers > 1: action_merge_layer = \ (action_merge_layer % n_layers + n_layers) % n_layers else: action_merge_layer = 1 l_hidden = l_obs for idx, size in enumerate(hidden_sizes): if bn: l_hidden = batch_norm(l_hidden) if idx == action_merge_layer: l_hidden = L.ConcatLayer([l_hidden, l_action]) l_hidden = L.DenseLayer(l_hidden, num_units=size, nonlinearity=hidden_nonlinearity, name="h%d" % (idx + 1)) if action_merge_layer == n_layers: l_hidden = L.ConcatLayer([l_hidden, l_action]) l_output = L.DenseLayer(l_hidden, num_units=1, nonlinearity=output_nonlinearity, name="output") output_var = L.get_output(l_output, deterministic=True) output_var = tf.reshape(output_var, (-1, )) self._f_qval = tensor_utils.compile_function( [l_obs.input_var, l_action.input_var], output_var) self._output_layer = l_output self._obs_layer = l_obs self._action_layer = l_action self._output_nonlinearity = output_nonlinearity self.eqf_use_full_qf = eqf_use_full_qf self.eqf_sample_size = eqf_sample_size self.mqprop = mqprop LayersPowered.__init__(self, [l_output])
def __init__( self, env_spec, name='qnet', hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.relu, action_merge_layer=-2, output_nonlinearity=None, hidden_W_init=L.XavierUniformInitializer(), hidden_b_init=tf.zeros_initializer, output_W_init=L.XavierUniformInitializer(), output_b_init=tf.zeros_initializer, bn=False): Serializable.quick_init(self, locals()) with tf.variable_scope(name): l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim), name="obs") l_action = L.InputLayer(shape=(None, env_spec.action_space.flat_dim), name="actions") n_layers = len(hidden_sizes) + 1 if n_layers > 1: action_merge_layer = \ (action_merge_layer % n_layers + n_layers) % n_layers else: action_merge_layer = 1 l_hidden = l_obs for idx, size in enumerate(hidden_sizes): if bn: l_hidden = L.batch_norm(l_hidden) if idx == action_merge_layer: l_hidden = L.ConcatLayer([l_hidden, l_action]) l_hidden = L.DenseLayer( l_hidden, num_units=size, W=hidden_W_init, b=hidden_b_init, nonlinearity=hidden_nonlinearity, name="h%d" % (idx + 1) ) if action_merge_layer == n_layers: l_hidden = L.ConcatLayer([l_hidden, l_action]) l_output = L.DenseLayer( l_hidden, num_units=1, W=output_W_init, b=output_b_init, nonlinearity=output_nonlinearity, name="output" ) #output_var = L.get_output(l_output, deterministic=True).flatten() output_var = tf.reshape(L.get_output(l_output, deterministic=True),(-1,)) self._f_qval = tensor_utils.compile_function([l_obs.input_var, l_action.input_var], output_var) self._output_layer = l_output self._obs_layer = l_obs self._action_layer = l_action self._output_nonlinearity = output_nonlinearity LayersPowered.__init__(self, [l_output])
def _make_action_module(self, prev_dict: Dict[str, Any], unbound_act: UnboundAction, output_size: int, layer_num: int, l_in: L.Layer, nonlinearity: Any=None, dropout: float=0.0, norm_response=False, extra_dict=None) -> Any: # TODO: can I do all of this index-futzing just once, instead of each # time I need to make an action module? Same applies to proposition # modules. Will make construction much faster (not that it's very # expensive at the moment...). name_pfx = 'act_mod_%s_%d' % (unbound_act.schema_name, layer_num) prob_meta = self._prob_meta dom_meta = self._weight_manager.dom_meta # sort input layers so we can pass them to Lasagne pred_to_tensor_idx, prev_inputs = self._sort_inputs(prev_dict) # this tells us how many channels our input will have to be index_spec = [] dom_rel_preds = dom_meta.rel_pred_names(unbound_act) for act_pred_idx, arg_pred in enumerate(dom_rel_preds): pools = [] for ground_act in prob_meta.schema_to_acts(unbound_act): # we're looking at the act_pred_idx-th relevant proposition bound_prop = prob_meta.rel_props(ground_act)[act_pred_idx] prop_idx = prob_meta.prop_to_pred_subtensor_ind(bound_prop) # we're only "pooling" over one element (the proposition # features) pools.append([prop_idx]) # which tensor do we need to pick this out of? tensor_idx = pred_to_tensor_idx[arg_pred] index_spec.append((tensor_idx, pools)) conv_input = PickPoolAndStack( prev_inputs, index_spec, name=name_pfx + '/cat') if layer_num == 0 and extra_dict is not None: # first action layer, so add in extra data act_data = extra_dict[unbound_act] conv_input = L.ConcatLayer( [conv_input, act_data], axis=2, name=name_pfx + '/extra_cat') W, b = self._weight_manager.act_weights[layer_num][unbound_act] if nonlinearity is None: nonlinearity = self.nonlinearity rv = L.Conv1DLayer( conv_input, output_size, filter_size=1, stride=1, pad='VALID', W=W, b=b, nonlinearity=nonlinearity, name=name_pfx + '/conv') if dropout > 0: rv = L.DropoutLayer(rv, p=dropout, name=name_pfx + '/drop') if norm_response and output_size > 1: rv = ResponseNormLayer(rv, name=name_pfx + '/norm') # BN won't work because it's a mess to apply in a net like this # rv = L.BatchNormLayer(rv, center=True, scale=True) return rv
def __init__(self, name, input_dim, output_dim, hidden_sizes, hidden_nonlinearity, output_nonlinearity, vocab_size, embedding_size, hidden_W_init=L.xavier_init, hidden_b_init=tf.zeros_initializer, output_W_init=L.xavier_init, output_b_init=tf.zeros_initializer, has_other_input=True, input_var=None, input_layer=None, **kwargs): Serializable.quick_init(self, locals()) with tf.variable_scope(name): if input_layer is None: input_layer = L.InputLayer(shape=(None, input_dim), input_var=input_var, name="input") l_in = input_layer if has_other_input: # Slice apart l_other_in = L.SliceLayer(l_in, "slice_other", slice(0, input_dim - vocab_size), axis=-1) l_emb_in = L.SliceLayer(l_in, "slice_emb", slice(input_dim - vocab_size, input_dim), axis=-1) # HACK: This is cheap with small embedding matrices but will not scale well.. # Find a better way to lookup from this representation + mean-pool l_embs = MeanPoolEmbeddingLayer(l_emb_in, "embeddings", embedding_size) l_hidden_input = L.ConcatLayer([l_other_in, l_embs], "merge") else: l_hidden_input = l_in hidden_layers = [l_hidden_input] for i, hidden_size in enumerate(hidden_sizes): l_hid = L.DenseLayer(hidden_layers[-1], num_units=hidden_size, nonlinearity=hidden_nonlinearity, name="hidden_%i" % i, W=hidden_W_init, b=hidden_b_init) hidden_layers.append(l_hid) l_out = L.DenseLayer(hidden_layers[-1], num_units=output_dim, nonlinearity=output_nonlinearity, name="output", W=output_W_init, b=output_b_init) self.input_layer = l_in self.input_var = l_in.input_var self.output_layer = l_out LayersPowered.__init__(self, l_out)