Example #1
0
    def __init__(self,
                 env_spec,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.relu,
                 action_merge_layer=-2,
                 output_nonlinearity=None,
                 bn=False,
                 dropout=.05):
        Serializable.quick_init(self, locals())

        l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim),
                             name="obs")
        l_action = L.InputLayer(shape=(None, env_spec.action_space.flat_dim),
                                name="actions")

        n_layers = len(hidden_sizes) + 1

        if n_layers > 1:
            action_merge_layer = \
                (action_merge_layer % n_layers + n_layers) % n_layers
        else:
            action_merge_layer = 1

        l_hidden = l_obs

        for idx, size in enumerate(hidden_sizes):
            if bn:
                l_hidden = batch_norm(l_hidden)

            if idx == action_merge_layer:
                l_hidden = L.ConcatLayer([l_hidden, l_action])

            l_hidden = L.DenseLayer(l_hidden,
                                    num_units=size,
                                    nonlinearity=hidden_nonlinearity,
                                    name="h%d" % (idx + 1))
            l_hidden = L.DropoutLayer(l_hidden, dropout)

        if action_merge_layer == n_layers:
            l_hidden = L.ConcatLayer([l_hidden, l_action])

        l_output = L.DenseLayer(l_hidden,
                                num_units=1,
                                nonlinearity=output_nonlinearity,
                                name="output")

        output_var = L.get_output(l_output, deterministic=True)
        output_var_drop = L.get_output(l_output, deterministic=False)

        self._f_qval = tensor_utils.compile_function(
            [l_obs.input_var, l_action.input_var], output_var)
        self._f_qval_drop = tensor_utils.compile_function(
            [l_obs.input_var, l_action.input_var], output_var_drop)
        self._output_layer = l_output
        self._obs_layer = l_obs
        self._action_layer = l_action
        self._output_nonlinearity = output_nonlinearity

        LayersPowered.__init__(self, [l_output])
Example #2
0
    def _merge_finals(self, final_acts: Dict[UnboundAction, Any]) -> Any:
        prob_meta = self._prob_meta
        # we make a huge tensor of actions that we'll have to reorder
        sorted_final_acts = sorted(final_acts.items(), key=lambda t: t[0])
        # also get some metadata about which positions in tensor correspond to
        # which schemas
        unbound_to_super_ind = {
            t[0]: idx
            for idx, t in enumerate(sorted_final_acts)
        }
        # indiv_sizes[i] is the number of bound acts associated with the i-th
        # schema
        indiv_sizes = [
            len(prob_meta.schema_to_acts(ub)) for ub, _ in sorted_final_acts
        ]
        # cumul_sizes[i] is the sum of the number of ground actions associated
        # with each action schema *before* the i-th schema
        cumul_sizes = np.cumsum([0] + indiv_sizes)
        # this stores indices that we have to look up
        gather_list = []
        for ground_act in prob_meta.bound_acts_ordered:
            subact_ind = prob_meta.act_to_schema_subtensor_ind(ground_act)
            superact_ind = unbound_to_super_ind[ground_act.prototype]
            actual_ind = cumul_sizes[superact_ind] + subact_ind
            assert 0 <= actual_ind < prob_meta.num_acts, \
                "action index %d for %r out of range [0, %d)" \
                % (actual_ind, ground_act, prob_meta.num_acts)
            gather_list.append(actual_ind)

        # now let's actually build and reorder our huge tensor of action
        # selection probs
        cat_super_acts = L.ConcatLayer(
            [t[1] for t in sorted_final_acts], axis=1, name='merge_finals/cat')
        rv = L.OpLayer(
            incoming=cat_super_acts,
            # the [:, :, 0] drops the single dimension on the last axis
            op=lambda t: tf.gather(t[:, :, 0], np.array(gather_list), axis=1),
            shape_op=lambda s: s,
            name='merge_finals/reorder')

        return rv
Example #3
0
    def __init__(self,
                 env_spec,
                 name='qnet',
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.relu,
                 action_merge_layer=-2,
                 output_nonlinearity=None,
                 eqf_use_full_qf=False,
                 eqf_sample_size=1,
                 mqprop=False,
                 bn=False):
        Serializable.quick_init(self, locals())

        assert not env_spec.action_space.is_discrete
        self._env_spec = env_spec

        with tf.variable_scope(name):
            l_obs = L.InputLayer(shape=(None,
                                        env_spec.observation_space.flat_dim),
                                 name="obs")
            l_action = L.InputLayer(shape=(None,
                                           env_spec.action_space.flat_dim),
                                    name="actions")

            n_layers = len(hidden_sizes) + 1

            if n_layers > 1:
                action_merge_layer = \
                    (action_merge_layer % n_layers + n_layers) % n_layers
            else:
                action_merge_layer = 1

            l_hidden = l_obs

            for idx, size in enumerate(hidden_sizes):
                if bn:
                    l_hidden = batch_norm(l_hidden)

                if idx == action_merge_layer:
                    l_hidden = L.ConcatLayer([l_hidden, l_action])

                l_hidden = L.DenseLayer(l_hidden,
                                        num_units=size,
                                        nonlinearity=hidden_nonlinearity,
                                        name="h%d" % (idx + 1))

            if action_merge_layer == n_layers:
                l_hidden = L.ConcatLayer([l_hidden, l_action])

            l_output = L.DenseLayer(l_hidden,
                                    num_units=1,
                                    nonlinearity=output_nonlinearity,
                                    name="output")

            output_var = L.get_output(l_output, deterministic=True)
            output_var = tf.reshape(output_var, (-1, ))

            self._f_qval = tensor_utils.compile_function(
                [l_obs.input_var, l_action.input_var], output_var)
            self._output_layer = l_output
            self._obs_layer = l_obs
            self._action_layer = l_action
            self._output_nonlinearity = output_nonlinearity

            self.eqf_use_full_qf = eqf_use_full_qf
            self.eqf_sample_size = eqf_sample_size
            self.mqprop = mqprop

            LayersPowered.__init__(self, [l_output])
Example #4
0
    def __init__(
            self,
            env_spec,
            name='qnet',
            hidden_sizes=(32, 32),
            hidden_nonlinearity=tf.nn.relu,
            action_merge_layer=-2,
            output_nonlinearity=None,
            hidden_W_init=L.XavierUniformInitializer(),
            hidden_b_init=tf.zeros_initializer,
            output_W_init=L.XavierUniformInitializer(),
            output_b_init=tf.zeros_initializer,
            bn=False):
        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim), name="obs")
            l_action = L.InputLayer(shape=(None, env_spec.action_space.flat_dim), name="actions")

            n_layers = len(hidden_sizes) + 1

            if n_layers > 1:
                action_merge_layer = \
                    (action_merge_layer % n_layers + n_layers) % n_layers
            else:
                action_merge_layer = 1

            l_hidden = l_obs

            for idx, size in enumerate(hidden_sizes):
                if bn:
                    l_hidden = L.batch_norm(l_hidden)

                if idx == action_merge_layer:
                    l_hidden = L.ConcatLayer([l_hidden, l_action])

                l_hidden = L.DenseLayer(
                    l_hidden,
                    num_units=size,
                    W=hidden_W_init,
                    b=hidden_b_init,
                    nonlinearity=hidden_nonlinearity,
                    name="h%d" % (idx + 1)
                )

            if action_merge_layer == n_layers:
                l_hidden = L.ConcatLayer([l_hidden, l_action])

            l_output = L.DenseLayer(
                l_hidden,
                num_units=1,
                W=output_W_init,
                b=output_b_init,
                nonlinearity=output_nonlinearity,
                name="output"
            )

            #output_var = L.get_output(l_output, deterministic=True).flatten()
            output_var = tf.reshape(L.get_output(l_output, deterministic=True),(-1,))

            self._f_qval = tensor_utils.compile_function([l_obs.input_var, l_action.input_var], output_var)
            self._output_layer = l_output
            self._obs_layer = l_obs
            self._action_layer = l_action
            self._output_nonlinearity = output_nonlinearity

            LayersPowered.__init__(self, [l_output])
Example #5
0
    def _make_action_module(self,
                            prev_dict: Dict[str, Any],
                            unbound_act: UnboundAction,
                            output_size: int,
                            layer_num: int,
                            l_in: L.Layer,
                            nonlinearity: Any=None,
                            dropout: float=0.0,
                            norm_response=False,
                            extra_dict=None) -> Any:
        # TODO: can I do all of this index-futzing just once, instead of each
        # time I need to make an action module? Same applies to proposition
        # modules. Will make construction much faster (not that it's very
        # expensive at the moment...).
        name_pfx = 'act_mod_%s_%d' % (unbound_act.schema_name, layer_num)
        prob_meta = self._prob_meta
        dom_meta = self._weight_manager.dom_meta

        # sort input layers so we can pass them to Lasagne
        pred_to_tensor_idx, prev_inputs = self._sort_inputs(prev_dict)

        # this tells us how many channels our input will have to be
        index_spec = []
        dom_rel_preds = dom_meta.rel_pred_names(unbound_act)
        for act_pred_idx, arg_pred in enumerate(dom_rel_preds):
            pools = []
            for ground_act in prob_meta.schema_to_acts(unbound_act):
                # we're looking at the act_pred_idx-th relevant proposition
                bound_prop = prob_meta.rel_props(ground_act)[act_pred_idx]
                prop_idx = prob_meta.prop_to_pred_subtensor_ind(bound_prop)
                # we're only "pooling" over one element (the proposition
                # features)
                pools.append([prop_idx])

            # which tensor do we need to pick this out of?
            tensor_idx = pred_to_tensor_idx[arg_pred]
            index_spec.append((tensor_idx, pools))

        conv_input = PickPoolAndStack(
            prev_inputs, index_spec, name=name_pfx + '/cat')
        if layer_num == 0 and extra_dict is not None:
            # first action layer, so add in extra data
            act_data = extra_dict[unbound_act]
            conv_input = L.ConcatLayer(
                [conv_input, act_data], axis=2, name=name_pfx + '/extra_cat')
        W, b = self._weight_manager.act_weights[layer_num][unbound_act]
        if nonlinearity is None:
            nonlinearity = self.nonlinearity
        rv = L.Conv1DLayer(
            conv_input,
            output_size,
            filter_size=1,
            stride=1,
            pad='VALID',
            W=W,
            b=b,
            nonlinearity=nonlinearity,
            name=name_pfx + '/conv')
        if dropout > 0:
            rv = L.DropoutLayer(rv, p=dropout, name=name_pfx + '/drop')
        if norm_response and output_size > 1:
            rv = ResponseNormLayer(rv, name=name_pfx + '/norm')
        # BN won't work because it's a mess to apply in a net like this
        #     rv = L.BatchNormLayer(rv, center=True, scale=True)
        return rv
Example #6
0
    def __init__(self,
                 name,
                 input_dim,
                 output_dim,
                 hidden_sizes,
                 hidden_nonlinearity,
                 output_nonlinearity,
                 vocab_size,
                 embedding_size,
                 hidden_W_init=L.xavier_init,
                 hidden_b_init=tf.zeros_initializer,
                 output_W_init=L.xavier_init,
                 output_b_init=tf.zeros_initializer,
                 has_other_input=True,
                 input_var=None,
                 input_layer=None,
                 **kwargs):
        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            if input_layer is None:
                input_layer = L.InputLayer(shape=(None, input_dim),
                                           input_var=input_var,
                                           name="input")
            l_in = input_layer

            if has_other_input:
                # Slice apart
                l_other_in = L.SliceLayer(l_in,
                                          "slice_other",
                                          slice(0, input_dim - vocab_size),
                                          axis=-1)
                l_emb_in = L.SliceLayer(l_in,
                                        "slice_emb",
                                        slice(input_dim - vocab_size,
                                              input_dim),
                                        axis=-1)

                # HACK: This is cheap with small embedding matrices but will not scale well..
                # Find a better way to lookup from this representation + mean-pool
                l_embs = MeanPoolEmbeddingLayer(l_emb_in, "embeddings",
                                                embedding_size)

                l_hidden_input = L.ConcatLayer([l_other_in, l_embs], "merge")
            else:
                l_hidden_input = l_in

            hidden_layers = [l_hidden_input]
            for i, hidden_size in enumerate(hidden_sizes):
                l_hid = L.DenseLayer(hidden_layers[-1],
                                     num_units=hidden_size,
                                     nonlinearity=hidden_nonlinearity,
                                     name="hidden_%i" % i,
                                     W=hidden_W_init,
                                     b=hidden_b_init)
                hidden_layers.append(l_hid)

            l_out = L.DenseLayer(hidden_layers[-1],
                                 num_units=output_dim,
                                 nonlinearity=output_nonlinearity,
                                 name="output",
                                 W=output_W_init,
                                 b=output_b_init)

            self.input_layer = l_in
            self.input_var = l_in.input_var
            self.output_layer = l_out

            LayersPowered.__init__(self, l_out)