def update_model(self, states: str, actions: str,
                     q_vals_target: str) -> None:
        """
        Takes in states, actions, and target q values. Updates the model:

            Runs the forward pass, computing Q(states, actions).
                Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j).
            Comptutes Loss of Q(states, actions) with respect to q_vals_targets
            Updates Q Network's weights according to loss and optimizer

        :param states: Numpy array with shape (batch_size, state_dim). The ith
            row is a representation of the ith transition's state.
        :param actions: Numpy array with shape (batch_size, action_dim). The ith
            row is a representation of the ith transition's action.
        :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith
            row is the label to train against for the data from the ith transition.
        """
        model = C2.model()
        q_vals_target = C2.StopGradient(q_vals_target)
        q_values = C2.NextBlob("train_output")
        state_action_pairs, _ = C2.Concat(states, actions, axis=1)
        self.ml_trainer.make_forward_pass_ops(model, state_action_pairs,
                                              q_values, False)

        self.loss_blob = self.ml_trainer.generateLossOps(
            model, q_values, q_vals_target)
        model.AddGradientOperators([self.loss_blob])
        for param in model.params:
            if param in model.param_to_grad:
                param_grad = model.param_to_grad[param]
                param_grad = C2.NanCheck(param_grad)
        self.ml_trainer.addParameterUpdateOps(model)
Ejemplo n.º 2
0
    def normalize_dense_matrix(
        self,
        input_matrix: str,
        features: List[str],
        normalization_parameters: Dict[str, NormalizationParameters],
        blobname_prefix: str,
    ) -> Tuple[str, List[str]]:
        """
        Normalizes inputs according to parameters. Expects a dense matrix whose ith
        column corresponds to feature i.

        Note that the Caffe2 BatchBoxCox operator isn't implemented on CUDA GPU so
        we need to use a CPU context.

        :param input_matrix: Input matrix to normalize.
        :param features: Array that maps feature ids to column indices.
        :param normalization_parameters: Mapping from feature names to
            NormalizationParameters.
        :param blobname_prefix: Prefix for input blobs to norm_net.
        :param num_output_features: The number of features in an output processed
            datapoint. If set to None, this function will compute it.
        """
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
            feature_starts = self._get_type_boundaries(
                features, normalization_parameters)

            normalized_input_blobs = []
            parameters: List[str] = []
            for i, feature_type in enumerate(FEATURE_TYPES):
                start_index = feature_starts[i]
                if (i + 1) == len(FEATURE_TYPES):
                    end_index = len(normalization_parameters)
                else:
                    end_index = feature_starts[i + 1]
                if start_index == end_index:
                    continue  # No features of this type
                sliced_input_features = self._get_input_blob(
                    blobname_prefix, feature_type)
                C2.net().Slice(
                    [input_matrix],
                    [sliced_input_features],
                    starts=[0, start_index],
                    ends=[-1, end_index],
                )
                normalized_input_blob, blob_parameters = self.preprocess_blob(
                    sliced_input_features,
                    [
                        normalization_parameters[x]
                        for x in features[start_index:end_index]
                    ],
                )
                parameters.extend(blob_parameters)
                normalized_input_blobs.append(normalized_input_blob)
            for i, inp in enumerate(normalized_input_blobs):
                logger.info("input# {}: {}".format(i, inp))
            concatenated_input_blob, concatenated_input_blob_dim = C2.Concat(
                *normalized_input_blobs, axis=1)
            concatenated_input_blob = C2.NanCheck(concatenated_input_blob)
            return concatenated_input_blob, parameters
Ejemplo n.º 3
0
    def update_model(
        self,
        states: str,
        actions: str,
        q_vals_target: str,
    ) -> None:
        """
        Takes in states, actions, and target q values. Updates the model:

            Runs the forward pass, computing Q(states, actions).
                Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j).
            Comptutes Loss of Q(states, actions) with respect to q_vals_targets
            Updates Q Network's weights according to loss and optimizer

        :param states: Numpy array with shape (batch_size, state_dim). The ith
            row is a representation of the ith transition's state.
        :param actions: Numpy array with shape (batch_size, action_dim). The ith
            row is a representation of the ith transition's action.
        :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith
            row is the label to train against for the data from the ith transition.
        """
        model = C2.model()
        q_vals_target = C2.StopGradient(q_vals_target)
        q_values = C2.NextBlob("train_output")
        state_action_pairs, _ = C2.Concat(states, actions, axis=1)
        MakeForwardPassOps(
            model,
            self.model_id,
            state_action_pairs,
            q_values,
            self.weights,
            self.biases,
            self.activations,
            self.layers,
            self.dropout_ratio,
            False,
        )

        self.loss_blob = GenerateLossOps(
            model,
            q_values,
            q_vals_target,
        )
        model.AddGradientOperators([self.loss_blob])
        for param in model.params:
            if param in model.param_to_grad:
                param_grad = model.param_to_grad[param]
                param_grad = C2.NanCheck(param_grad)
        AddParameterUpdateOps(
            model,
            optimizer_input=self.optimizer,
            base_learning_rate=self.learning_rate,
            gamma=self.gamma,
            policy=self.lr_policy,
        )
Ejemplo n.º 4
0
 def concat_states_and_possible_next_actions(
     self,
     next_state_preprocessed_matrix_blob: str,
     possible_next_actions_blob: str,
     possible_next_actions_lengths_blob: str,
 ) -> str:
     stacked_states = C2.LengthsTile(next_state_preprocessed_matrix_blob,
                                     possible_next_actions_lengths_blob)
     state_action_pairs, _ = C2.Concat(stacked_states,
                                       possible_next_actions_blob,
                                       axis=1)
     return state_action_pairs
Ejemplo n.º 5
0
 def get_q_values(self, states: str, actions: str, use_target_network: bool) -> str:
     state_action_pairs, _ = C2.Concat(states, actions, axis=1)
     q_values = C2.NextBlob("q_values")
     if use_target_network:
         self.target_network.make_forward_pass_ops(
             C2.model(), state_action_pairs, q_values, True
         )
     else:
         self.ml_trainer.make_forward_pass_ops(
             C2.model(), state_action_pairs, q_values, True
         )
     return q_values
Ejemplo n.º 6
0
 def get_q_values(
     self,
     states: str,
     actions: str,
     use_target_network: bool,
 ) -> str:
     state_action_pairs, _ = C2.Concat(states, actions, axis=1)
     if use_target_network:
         return self.target_network.target_values(state_action_pairs)
     else:
         q_values = C2.NextBlob("q_values")
         MakeForwardPassOps(
             C2.model(),
             self.model_id,
             state_action_pairs,
             q_values,
             self.weights,
             self.biases,
             self.activations,
             self.layers,
             self.dropout_ratio,
             True,
         )
         return q_values
    def export(cls,
               trainer,
               actions,
               state_normalization_parameters,
               int_features=False):
        """ Creates a DiscreteActionPredictor from a DiscreteActionTrainer.

        :param trainer DiscreteActionTrainer
        :param actions list of action names
        :param state_normalization_parameters state NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        """

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob('input/image', np.zeros([1, 1, 1, 1],
                                                   dtype=np.int32))
        workspace.FeedBlob('input/float_features.lengths',
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob('input/float_features.keys',
                           np.zeros(1, dtype=np.int64))
        workspace.FeedBlob('input/float_features.values',
                           np.zeros(1, dtype=np.float32))

        input_feature_lengths = 'input_feature_lengths'
        input_feature_keys = 'input_feature_keys'
        input_feature_values = 'input_feature_values'

        if int_features:
            workspace.FeedBlob('input/int_features.lengths',
                               np.zeros(1, dtype=np.int32))
            workspace.FeedBlob('input/int_features.keys',
                               np.zeros(1, dtype=np.int64))
            workspace.FeedBlob('input/int_features.values',
                               np.zeros(1, dtype=np.int32))
            C2.net().Cast(['input/int_features.values'],
                          ['input/int_features.values_float'],
                          dtype=caffe2_pb2.TensorProto.FLOAT)
            C2.net().MergeMultiScalarFeatureTensors([
                'input/float_features.lengths', 'input/float_features.keys',
                'input/float_features.values', 'input/int_features.lengths',
                'input/int_features.keys', 'input/int_features.values_float'
            ], [
                input_feature_lengths, input_feature_keys, input_feature_values
            ])
        else:
            C2.net().Copy(['input/float_features.lengths'],
                          [input_feature_lengths])
            C2.net().Copy(['input/float_features.keys'], [input_feature_keys])
            C2.net().Copy(['input/float_features.values'],
                          [input_feature_values])

        parameters = []
        if state_normalization_parameters is not None:
            preprocessor = PreprocessorNet(net, True)
            parameters.extend(preprocessor.parameters)
            normalized_dense_matrix, new_parameters = \
                preprocessor.normalize_sparse_matrix(
                    input_feature_lengths,
                    input_feature_keys,
                    input_feature_values,
                    state_normalization_parameters,
                    'state_norm',
                )
            parameters.extend(new_parameters)
        else:
            # Image input.  Note: Currently this does the wrong thing if
            #   more than one image is passed at a time.
            normalized_dense_matrix = 'input/image'

        new_parameters, q_values = RLPredictor._forward_pass(
            model,
            trainer,
            normalized_dense_matrix,
            actions,
        )
        parameters.extend(new_parameters)

        # Get 1 x n action index tensor under the max_q policy
        max_q_act_idxs = 'max_q_policy_actions'
        C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0)
        shape_of_num_of_states = 'num_states_shape'
        C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states])
        num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1])

        # Get 1 x n action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32))
        tempered_q_values = C2.Div(q_values, "temperature", broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = 'softmax_act_idxs_nested'
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_idxs = 'softmax_policy_actions'
        C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0)

        # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]]
        # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_idxs,
                                 to=caffe2_pb2.TensorProto.INT32)
        softmax_act_blob = C2.Cast(softmax_act_idxs,
                                   to=caffe2_pb2.TensorProto.INT32)
        C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob])
        transposed_action_idxs = C2.Transpose(max_q_act_blob)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        output_values = 'output/string_single_categorical_features.values'
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64))
        C2.net().Gather(["action_names", flat_transposed_action_idxs],
                        [output_values])

        output_lengths = 'output/string_single_categorical_features.lengths'
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill([shape_of_num_of_states], [output_lengths],
                              value=2,
                              dtype=caffe2_pb2.TensorProto.INT32)

        output_keys = 'output/string_single_categorical_features.keys'
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1],
                            value=0,
                            dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1],
                            value=1,
                            dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0)
        C2.net().FlattenToVec([output_key_tile], [output_keys])

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net)
        return DiscreteActionPredictor(net, parameters, int_features)
Ejemplo n.º 8
0
    def normalize_sparse_matrix(
        self,
        lengths_blob: str,
        keys_blob: str,
        values_blob: str,
        normalization_parameters: Dict[int, NormalizationParameters],
        blobname_prefix: str,
        split_sparse_to_dense: bool,
        split_expensive_feature_groups: bool,
        normalize: bool = True,
        sorted_features_override: List[int] = None,
    ) -> Tuple[str, List[str]]:
        if sorted_features_override:
            sorted_features = sorted_features_override
        else:
            sorted_features, _ = sort_features_by_normalization(
                normalization_parameters)
        int_features = [int(feature) for feature in sorted_features]

        preprocess_num_batches = 8 if split_sparse_to_dense else 1

        lengths_batch = []
        keys_batch = []
        values_batch = []
        for _ in range(preprocess_num_batches):
            lengths_batch.append(C2.NextBlob(blobname_prefix +
                                             "_length_batch"))
            keys_batch.append(C2.NextBlob(blobname_prefix + "_key_batch"))
            values_batch.append(C2.NextBlob(blobname_prefix + "_value_batch"))

        C2.net().Split([lengths_blob], lengths_batch, axis=0)
        total_lengths_batch = []
        for x in range(preprocess_num_batches):
            total_lengths_batch.append(
                C2.Reshape(C2.ReduceBackSum(lengths_batch[x],
                                            num_reduce_dims=1),
                           shape=[1])[0])
        total_lengths_batch_concat, _ = C2.Concat(*total_lengths_batch, axis=0)
        C2.net().Split([keys_blob, total_lengths_batch_concat],
                       keys_batch,
                       axis=0)
        C2.net().Split([values_blob, total_lengths_batch_concat],
                       values_batch,
                       axis=0)

        dense_input_fragments = []
        parameters: List[str] = []

        MISSING_SCALAR = self._store_parameter(
            parameters, "MISSING_SCALAR",
            np.array([MISSING_VALUE], dtype=np.float32))
        C2.net().GivenTensorFill([], [MISSING_SCALAR],
                                 shape=[],
                                 values=[MISSING_VALUE])

        for preprocess_batch in range(preprocess_num_batches):
            dense_input_fragment = C2.SparseToDenseMask(
                keys_batch[preprocess_batch],
                values_batch[preprocess_batch],
                MISSING_SCALAR,
                lengths_batch[preprocess_batch],
                mask=int_features,
            )[0]

            if normalize:
                normalized_fragment, p = self.normalize_dense_matrix(
                    dense_input_fragment,
                    sorted_features,
                    normalization_parameters,
                    blobname_prefix,
                    split_expensive_feature_groups,
                )
                dense_input_fragments.append(normalized_fragment)
                parameters.extend(p)
            else:
                dense_input_fragments.append(dense_input_fragment)

        dense_input = C2.NextBlob(blobname_prefix + "_dense_input")
        dense_input_dims = C2.NextBlob(blobname_prefix + "_dense_input_dims")
        C2.net().Concat(dense_input_fragments, [dense_input, dense_input_dims],
                        axis=0)

        return dense_input, parameters
    def export(
        cls,
        trainer,
        state_normalization_parameters,
        action_normalization_parameters,
        int_features=False,
        model_on_gpu=False,
    ):
        """Export caffe2 preprocessor net and pytorch DQN forward pass as one
        caffe2 net.

        :param trainer ParametricDQNTrainer
        :param state_normalization_parameters state NormalizationParameters
        :param action_normalization_parameters action NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        :param model_on_gpu boolean indicating if the model is a GPU model or CPU model
        """

        input_dim = trainer.num_features
        if isinstance(trainer.q_network, DataParallel):
            trainer.q_network = trainer.q_network.module

        buffer = PytorchCaffe2Converter.pytorch_net_to_buffer(
            trainer.q_network, input_dim, model_on_gpu
        )
        qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef(
            buffer
        )
        torch_workspace = caffe2_netdef.workspace

        parameters = torch_workspace.Blobs()
        for blob_str in parameters:
            workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str))

        torch_init_net = core.Net(caffe2_netdef.init_net)
        torch_predict_net = core.Net(caffe2_netdef.predict_net)
        # While converting to metanetdef, the external_input of predict_net
        # will be recomputed. Add the real output of init_net to parameters
        # to make sure they will be counted.
        parameters.extend(
            set(caffe2_netdef.init_net.external_output)
            - set(caffe2_netdef.init_net.external_input)
        )

        # ensure state and action IDs have no intersection
        assert (
            len(
                set(state_normalization_parameters.keys())
                & set(action_normalization_parameters.keys())
            )
            == 0
        )

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32))
        workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64))
        workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32))

        input_feature_lengths = "input_feature_lengths"
        input_feature_keys = "input_feature_keys"
        input_feature_values = "input_feature_values"

        if int_features:
            workspace.FeedBlob(
                "input/int_features.lengths", np.zeros(1, dtype=np.int32)
            )
            workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64))
            workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32))
            C2.net().Cast(
                ["input/int_features.values"],
                ["input/int_features.values_float"],
                dtype=caffe2_pb2.TensorProto.FLOAT,
            )
            C2.net().MergeMultiScalarFeatureTensors(
                [
                    "input/float_features.lengths",
                    "input/float_features.keys",
                    "input/float_features.values",
                    "input/int_features.lengths",
                    "input/int_features.keys",
                    "input/int_features.values_float",
                ],
                [input_feature_lengths, input_feature_keys, input_feature_values],
            )
        else:
            C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths])
            C2.net().Copy(["input/float_features.keys"], [input_feature_keys])
            C2.net().Copy(["input/float_features.values"], [input_feature_values])

        preprocessor = PreprocessorNet(True)
        sorted_state_features, _ = sort_features_by_normalization(
            state_normalization_parameters
        )
        state_dense_matrix, new_parameters = sparse_to_dense(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            sorted_state_features,
        )
        parameters.extend(new_parameters)
        state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix(
            state_dense_matrix,
            sorted_state_features,
            state_normalization_parameters,
            "state_norm",
            False,
        )
        parameters.extend(new_parameters)

        sorted_action_features, _ = sort_features_by_normalization(
            action_normalization_parameters
        )
        action_dense_matrix, new_parameters = sparse_to_dense(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            sorted_action_features,
        )
        parameters.extend(new_parameters)
        action_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix(
            action_dense_matrix,
            sorted_action_features,
            action_normalization_parameters,
            "action_norm",
            False,
        )
        parameters.extend(new_parameters)

        state_action_normalized = "state_action_normalized"
        state_action_normalized_dim = "state_action_normalized_dim"
        net.Concat(
            [state_normalized_dense_matrix, action_normalized_dense_matrix],
            [state_action_normalized, state_action_normalized_dim],
            axis=1,
        )

        net.Copy([state_action_normalized], [qnet_input_blob])

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(torch_init_net)

        net.AppendNet(torch_predict_net)

        new_parameters, q_values = RLPredictor._forward_pass(
            model, trainer, state_action_normalized, ["Q"], qnet_output_blob
        )
        parameters.extend(new_parameters)

        flat_q_values_key = (
            "output/string_weighted_multi_categorical_features.values.values"
        )
        num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1])
        q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1])

        # Get 1 x n (number of examples) action index tensor under the max_q policy
        max_q_act_idxs = "max_q_policy_actions"
        C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs])
        max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0)

        # Get 1 x n (number of examples) action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32)
        )
        tempered_q_values = C2.Div(q_value_blob, temperature, broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = "softmax_act_idxs_nested"
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_blob = C2.Tile(
            C2.FlattenToVec(softmax_act_idxs_nested), num_examples, axis=0
        )

        # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]]
        # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_blob, to=caffe2_pb2.TensorProto.INT64)
        softmax_act_blob = C2.Cast(softmax_act_blob, to=caffe2_pb2.TensorProto.INT64)
        max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1])
        softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob, shape=[1, -1])
        C2.net().Append(
            [max_q_act_blob_nested, softmax_act_blob_nested], [max_q_act_blob_nested]
        )
        transposed_action_idxs = C2.Transpose(max_q_act_blob_nested)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        output_values = "output/int_single_categorical_features.values"
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64))
        C2.net().Copy([flat_transposed_action_idxs], [output_values])

        output_lengths = "output/int_single_categorical_features.lengths"
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill(
            [flat_q_values_key],
            [output_lengths],
            value=2,
            dtype=caffe2_pb2.TensorProto.INT32,
        )

        output_keys = "output/int_single_categorical_features.keys"
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0)
        C2.net().FlattenToVec([output_key_tile], [output_keys])

        workspace.CreateNet(net)
        return ParametricDQNPredictor(net, torch_init_net, parameters, int_features)
Ejemplo n.º 10
0
    def export(
        cls,
        trainer,
        state_normalization_parameters,
        action_normalization_parameters,
    ):
        """ Creates ContinuousActionDQNPredictor from a list of action trainers

        :param trainer ContinuousActionDQNPredictor
        :param state_features list of state feature names
        :param action_features list of action feature names
        """
        # ensure state and action IDs have no intersection
        assert (len(
            set(state_normalization_parameters.keys())
            & set(action_normalization_parameters.keys())) == 0)

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob('input/float_features.lengths',
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob('input/float_features.keys',
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob('input/float_features.values',
                           np.zeros(1, dtype=np.float32))

        preprocessor = PreprocessorNet(net, True)
        parameters = []
        parameters.extend(preprocessor.parameters)
        state_normalized_dense_matrix, new_parameters = \
            preprocessor.normalize_sparse_matrix(
                'input/float_features.lengths',
                'input/float_features.keys',
                'input/float_features.values',
                state_normalization_parameters,
                'state_norm',
            )
        parameters.extend(new_parameters)
        action_normalized_dense_matrix, new_parameters = \
            preprocessor.normalize_sparse_matrix(
                'input/float_features.lengths',
                'input/float_features.keys',
                'input/float_features.values',
                action_normalization_parameters,
                'action_norm',
            )
        parameters.extend(new_parameters)
        state_action_normalized = 'state_action_normalized'
        state_action_normalized_dim = 'state_action_normalized_dim'
        net.Concat(
            [state_normalized_dense_matrix, action_normalized_dense_matrix],
            [state_action_normalized, state_action_normalized_dim],
            axis=1)
        new_parameters, q_values = RLPredictor._forward_pass(
            model,
            trainer,
            state_action_normalized,
            ['Q'],
        )
        parameters.extend(new_parameters)

        flat_q_values_key = \
            'output/string_weighted_multi_categorical_features.values.values'
        num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1])
        q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1])

        # Get 1 x n (number of examples) action index tensor under the max_q policy
        max_q_act_idxs = 'max_q_policy_actions'
        C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs])
        max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0)

        # Get 1 x n (number of examples) action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32))
        tempered_q_values = C2.Div(q_value_blob, "temperature", broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = 'softmax_act_idxs_nested'
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_blob = C2.Tile(C2.FlattenToVec(softmax_act_idxs_nested),
                                   num_examples,
                                   axis=0)

        # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]]
        # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_blob,
                                 to=caffe2_pb2.TensorProto.INT64)
        softmax_act_blob = C2.Cast(softmax_act_blob,
                                   to=caffe2_pb2.TensorProto.INT64)
        max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1])
        softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob,
                                                shape=[1, -1])
        C2.net().Append([max_q_act_blob_nested, softmax_act_blob_nested],
                        [max_q_act_blob_nested])
        transposed_action_idxs = C2.Transpose(max_q_act_blob_nested)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        output_values = 'output/int_single_categorical_features.values'
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64))
        C2.net().Copy([flat_transposed_action_idxs], [output_values])

        output_lengths = 'output/int_single_categorical_features.lengths'
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill([flat_q_values_key], [output_lengths],
                              value=2,
                              dtype=caffe2_pb2.TensorProto.INT32)

        output_keys = 'output/int_single_categorical_features.keys'
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1],
                            value=0,
                            dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1],
                            value=1,
                            dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0)
        C2.net().FlattenToVec([output_key_tile], [output_keys])

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net)
        return ContinuousActionDQNPredictor(net, parameters)
Ejemplo n.º 11
0
    def export(
        cls,
        trainer,
        actions,
        state_normalization_parameters,
        int_features=False,
        model_on_gpu=False,
        set_missing_value_to_zero=False,
    ):
        """Export caffe2 preprocessor net and pytorch DQN forward pass as one
        caffe2 net.

        :param trainer DQNTrainer
        :param state_normalization_parameters state NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        :param model_on_gpu boolean indicating if the model is a GPU model or CPU model
        """

        input_dim = trainer.num_features

        q_network = (trainer.q_network.module if isinstance(
            trainer.q_network, DataParallel) else trainer.q_network)

        buffer = PytorchCaffe2Converter.pytorch_net_to_buffer(
            q_network, input_dim, model_on_gpu)
        qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef(
            buffer)
        torch_workspace = caffe2_netdef.workspace

        parameters = torch_workspace.Blobs()
        for blob_str in parameters:
            workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str))

        torch_init_net = core.Net(caffe2_netdef.init_net)
        torch_predict_net = core.Net(caffe2_netdef.predict_net)
        logger.info("Generated ONNX predict net:")
        logger.info(str(torch_predict_net.Proto()))
        # While converting to metanetdef, the external_input of predict_net
        # will be recomputed. Add the real output of init_net to parameters
        # to make sure they will be counted.
        parameters.extend(
            set(caffe2_netdef.init_net.external_output) -
            set(caffe2_netdef.init_net.external_input))

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob("input/image", np.zeros([1, 1, 1, 1],
                                                   dtype=np.int32))
        workspace.FeedBlob("input/float_features.lengths",
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob("input/float_features.keys",
                           np.zeros(1, dtype=np.int64))
        workspace.FeedBlob("input/float_features.values",
                           np.zeros(1, dtype=np.float32))

        input_feature_lengths = "input_feature_lengths"
        input_feature_keys = "input_feature_keys"
        input_feature_values = "input_feature_values"

        if int_features:
            workspace.FeedBlob("input/int_features.lengths",
                               np.zeros(1, dtype=np.int32))
            workspace.FeedBlob("input/int_features.keys",
                               np.zeros(1, dtype=np.int64))
            workspace.FeedBlob("input/int_features.values",
                               np.zeros(1, dtype=np.int32))
            C2.net().Cast(
                ["input/int_features.values"],
                ["input/int_features.values_float"],
                dtype=caffe2_pb2.TensorProto.FLOAT,
            )
            C2.net().MergeMultiScalarFeatureTensors(
                [
                    "input/float_features.lengths",
                    "input/float_features.keys",
                    "input/float_features.values",
                    "input/int_features.lengths",
                    "input/int_features.keys",
                    "input/int_features.values_float",
                ],
                [
                    input_feature_lengths, input_feature_keys,
                    input_feature_values
                ],
            )
        else:
            C2.net().Copy(["input/float_features.lengths"],
                          [input_feature_lengths])
            C2.net().Copy(["input/float_features.keys"], [input_feature_keys])
            C2.net().Copy(["input/float_features.values"],
                          [input_feature_values])

        if state_normalization_parameters is not None:
            sorted_feature_ids = sort_features_by_normalization(
                state_normalization_parameters)[0]
            dense_matrix, new_parameters = sparse_to_dense(
                input_feature_lengths,
                input_feature_keys,
                input_feature_values,
                sorted_feature_ids,
                set_missing_value_to_zero=set_missing_value_to_zero,
            )
            parameters.extend(new_parameters)
            preprocessor_net = PreprocessorNet()
            state_normalized_dense_matrix, new_parameters = preprocessor_net.normalize_dense_matrix(
                dense_matrix,
                sorted_feature_ids,
                state_normalization_parameters,
                "state_norm_",
                True,
            )
            parameters.extend(new_parameters)
        else:
            # Image input.  Note: Currently this does the wrong thing if
            #   more than one image is passed at a time.
            state_normalized_dense_matrix = "input/image"

        net.Copy([state_normalized_dense_matrix], [qnet_input_blob])

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(torch_init_net)

        net.AppendNet(torch_predict_net)

        new_parameters, q_values = RLPredictor._forward_pass(
            model, trainer, state_normalized_dense_matrix, actions,
            qnet_output_blob)
        parameters.extend(new_parameters)

        # Get 1 x n action index tensor under the max_q policy
        max_q_act_idxs = "max_q_policy_actions"
        C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0)
        shape_of_num_of_states = "num_states_shape"
        C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states])
        num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1])

        # Get 1 x n action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32))
        tempered_q_values = C2.Div(q_values, temperature, broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = "softmax_act_idxs_nested"
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_idxs = "softmax_policy_actions"
        C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0)

        action_names = C2.NextBlob("action_names")
        parameters.append(action_names)
        workspace.FeedBlob(action_names, np.array(actions))

        # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]]
        # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_idxs,
                                 to=caffe2_pb2.TensorProto.INT32)
        softmax_act_blob = C2.Cast(softmax_act_idxs,
                                   to=caffe2_pb2.TensorProto.INT32)
        C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob])
        transposed_action_idxs = C2.Transpose(max_q_act_blob)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        workspace.FeedBlob(OUTPUT_SINGLE_CAT_VALS_NAME,
                           np.zeros(1, dtype=np.int64))
        C2.net().Gather([action_names, flat_transposed_action_idxs],
                        [OUTPUT_SINGLE_CAT_VALS_NAME])

        workspace.FeedBlob(OUTPUT_SINGLE_CAT_LENGTHS_NAME,
                           np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill(
            [shape_of_num_of_states],
            [OUTPUT_SINGLE_CAT_LENGTHS_NAME],
            value=2,
            dtype=caffe2_pb2.TensorProto.INT32,
        )

        workspace.FeedBlob(OUTPUT_SINGLE_CAT_KEYS_NAME,
                           np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1],
                            value=0,
                            dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1],
                            value=1,
                            dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0)
        C2.net().FlattenToVec([output_key_tile], [OUTPUT_SINGLE_CAT_KEYS_NAME])

        workspace.CreateNet(net)
        return DQNPredictor(net, torch_init_net, parameters, int_features)
    def export(
        cls,
        trainer,
        state_normalization_parameters,
        action_normalization_parameters,
        int_features=False,
    ):
        """ Creates a ContinuousActionDQNPredictor from a ContinuousActionDQNTrainer.

        :param trainer ContinuousActionDQNTrainer
        :param state_normalization_parameters state NormalizationParameters
        :param action_normalization_parameters action NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        """
        # ensure state and action IDs have no intersection
        assert (
            len(
                set(state_normalization_parameters.keys())
                & set(action_normalization_parameters.keys())
            )
            == 0
        )

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32))
        workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64))
        workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32))

        input_feature_lengths = "input_feature_lengths"
        input_feature_keys = "input_feature_keys"
        input_feature_values = "input_feature_values"

        if int_features:
            workspace.FeedBlob(
                "input/int_features.lengths", np.zeros(1, dtype=np.int32)
            )
            workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64))
            workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32))
            C2.net().Cast(
                ["input/int_features.values"],
                ["input/int_features.values_float"],
                dtype=caffe2_pb2.TensorProto.FLOAT,
            )
            C2.net().MergeMultiScalarFeatureTensors(
                [
                    "input/float_features.lengths",
                    "input/float_features.keys",
                    "input/float_features.values",
                    "input/int_features.lengths",
                    "input/int_features.keys",
                    "input/int_features.values_float",
                ],
                [input_feature_lengths, input_feature_keys, input_feature_values],
            )
        else:
            C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths])
            C2.net().Copy(["input/float_features.keys"], [input_feature_keys])
            C2.net().Copy(["input/float_features.values"], [input_feature_values])

        parameters = []
        state_normalized_dense_matrix, new_parameters = sparse_to_dense(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            state_normalization_parameters,
            None,
        )
        parameters.extend(new_parameters)
        action_normalized_dense_matrix, new_parameters = sparse_to_dense(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            action_normalization_parameters,
            None,
        )
        parameters.extend(new_parameters)
        state_action_normalized = "state_action_normalized"
        state_action_normalized_dim = "state_action_normalized_dim"
        net.Concat(
            [state_normalized_dense_matrix, action_normalized_dense_matrix],
            [state_action_normalized, state_action_normalized_dim],
            axis=1,
        )
        new_parameters, q_values = RLPredictor._forward_pass(
            model, trainer, state_action_normalized, ["Q"]
        )
        parameters.extend(new_parameters)

        flat_q_values_key = (
            "output/string_weighted_multi_categorical_features.values.values"
        )
        num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1])
        q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1])

        # Get 1 x n (number of examples) action index tensor under the max_q policy
        max_q_act_idxs = "max_q_policy_actions"
        C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs])
        max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0)

        # Get 1 x n (number of examples) action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32)
        )
        tempered_q_values = C2.Div(q_value_blob, temperature, broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = "softmax_act_idxs_nested"
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_blob = C2.Tile(
            C2.FlattenToVec(softmax_act_idxs_nested), num_examples, axis=0
        )

        # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]]
        # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_blob, to=caffe2_pb2.TensorProto.INT64)
        softmax_act_blob = C2.Cast(softmax_act_blob, to=caffe2_pb2.TensorProto.INT64)
        max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1])
        softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob, shape=[1, -1])
        C2.net().Append(
            [max_q_act_blob_nested, softmax_act_blob_nested], [max_q_act_blob_nested]
        )
        transposed_action_idxs = C2.Transpose(max_q_act_blob_nested)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        output_values = "output/int_single_categorical_features.values"
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64))
        C2.net().Copy([flat_transposed_action_idxs], [output_values])

        output_lengths = "output/int_single_categorical_features.lengths"
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill(
            [flat_q_values_key],
            [output_lengths],
            value=2,
            dtype=caffe2_pb2.TensorProto.INT32,
        )

        output_keys = "output/int_single_categorical_features.keys"
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0)
        C2.net().FlattenToVec([output_key_tile], [output_keys])

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net)
        return ContinuousActionDQNPredictor(net, parameters, int_features)