コード例 #1
0
    def process(
            self, sparse_data: StackedAssociativeArray
    ) -> Tuple[str, str, List[str]]:
        lengths_blob = sparse_data.lengths
        keys_blob = sparse_data.keys
        values_blob = sparse_data.values

        MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR")
        missing_value = 0.0 if self.set_missing_value_to_zero else MISSING_VALUE
        workspace.FeedBlob(MISSING_SCALAR,
                           np.array([missing_value], dtype=np.float32))
        C2.net().GivenTensorFill([], [MISSING_SCALAR],
                                 shape=[],
                                 values=[missing_value])

        parameters: List[str] = [MISSING_SCALAR]

        assert len(self.sorted_features) > 0, "Sorted features is empty"
        dense_input = C2.NextBlob("dense_input")
        dense_input_presence = C2.NextBlob("dense_input_presence")
        C2.net().SparseToDenseMask(
            [keys_blob, values_blob, MISSING_SCALAR, lengths_blob],
            [dense_input, dense_input_presence],
            mask=self.sorted_features,
            return_presence_mask=True,
        )

        if self.set_missing_value_to_zero:
            dense_input_presence = C2.And(
                C2.GT(dense_input, -1e-4, broadcast=1),
                C2.LT(dense_input, 1e-4, broadcast=1),
            )

        return dense_input, dense_input_presence, parameters
コード例 #2
0
    def update_model(self, states: str, actions: str, q_vals_target: str) -> None:
        """
        Takes in states, actions, and target q values. Updates the model:

            Runs the forward pass, computing Q(states, actions).
                Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j).
            Comptutes Loss of Q(states, actions) with respect to q_vals_targets
            Updates Q Network's weights according to loss and optimizer

        :param states: Numpy array with shape (batch_size, state_dim). The ith
            row is a representation of the ith transition's state.
        :param actions: Numpy array with shape (batch_size, action_dim). The ith
            row contains the one-hotted representation of the ith action.
        :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith
            row is the label to train against for the data from the ith transition.
        """
        model = C2.model()
        q_vals_target = C2.StopGradient(q_vals_target)
        output_blob = C2.NextBlob("train_output")
        if self.conv_ml_trainer is not None:
            conv_output_blob = C2.NextBlob("conv_output")
            self.conv_ml_trainer.make_conv_pass_ops(model, states, conv_output_blob)
            states = conv_output_blob

        self.ml_trainer.make_forward_pass_ops(model, states, output_blob, False)
        q_val_select = C2.ReduceBackSum(C2.Mul(output_blob, actions))
        q_values = C2.ExpandDims(q_val_select, dims=[1])

        self.loss_blob = self.ml_trainer.generateLossOps(model, q_values, q_vals_target)
        model.AddGradientOperators([self.loss_blob])
        for param in model.params:
            if param in model.param_to_grad:
                param_grad = model.param_to_grad[param]
                param_grad = C2.NanCheck(param_grad)
        self.ml_trainer.addParameterUpdateOps(model)
コード例 #3
0
    def _forward_pass(
        cls, model, trainer, normalized_dense_matrix, actions, qnet_output_blob
    ):
        C2.set_model(model)

        parameters = []
        q_values = "q_values"
        C2.net().Copy([qnet_output_blob], [q_values])

        action_names = C2.NextBlob("action_names")
        parameters.append(action_names)
        workspace.FeedBlob(action_names, np.array(actions))
        action_range = C2.NextBlob("action_range")
        parameters.append(action_range)
        workspace.FeedBlob(action_range, np.array(list(range(len(actions)))))

        output_shape = C2.Shape(q_values)
        output_shape_row_count = C2.Slice(output_shape, starts=[0], ends=[1])

        output_row_shape = C2.Slice(q_values, starts=[0, 0], ends=[-1, 1])

        output_feature_keys = "output/string_weighted_multi_categorical_features.keys"
        workspace.FeedBlob(output_feature_keys, np.zeros(1, dtype=np.int64))
        output_feature_keys_matrix = C2.ConstantFill(
            output_row_shape, value=0, dtype=caffe2_pb2.TensorProto.INT64
        )
        # Note: sometimes we need to use an explicit output name, so we call
        #  C2.net().Fn(...)
        C2.net().FlattenToVec([output_feature_keys_matrix], [output_feature_keys])

        output_feature_lengths = (
            "output/string_weighted_multi_categorical_features.lengths"
        )
        workspace.FeedBlob(output_feature_lengths, np.zeros(1, dtype=np.int32))
        output_feature_lengths_matrix = C2.ConstantFill(
            output_row_shape, value=1, dtype=caffe2_pb2.TensorProto.INT32
        )
        C2.net().FlattenToVec([output_feature_lengths_matrix], [output_feature_lengths])

        output_keys = "output/string_weighted_multi_categorical_features.values.keys"
        workspace.FeedBlob(output_keys, np.array(["a"]))
        C2.net().Tile([action_names, output_shape_row_count], [output_keys], axis=0)

        output_lengths_matrix = C2.ConstantFill(
            output_row_shape, value=len(actions), dtype=caffe2_pb2.TensorProto.INT32
        )
        output_lengths = (
            "output/string_weighted_multi_categorical_features.values.lengths"
        )
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().FlattenToVec([output_lengths_matrix], [output_lengths])

        output_values = (
            "output/string_weighted_multi_categorical_features.values.values"
        )
        workspace.FeedBlob(output_values, np.array([1.0]))
        C2.net().FlattenToVec([q_values], [output_values])
        return parameters, q_values
コード例 #4
0
    def process(
        self,
        sorted_features: List[int],
        sparse_data: StackedAssociativeArray,
        set_missing_value_to_zero: bool = False,
    ) -> Tuple[str, List[str]]:
        lengths_blob = sparse_data.lengths
        keys_blob = sparse_data.keys
        values_blob = sparse_data.values

        MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR")
        missing_value = 0.0 if set_missing_value_to_zero else MISSING_VALUE
        workspace.FeedBlob(MISSING_SCALAR,
                           np.array([missing_value], dtype=np.float32))
        C2.net().GivenTensorFill([], [MISSING_SCALAR],
                                 shape=[],
                                 values=[missing_value])

        parameters: List[str] = [MISSING_SCALAR]

        assert len(sorted_features) > 0, "Sorted features is empty"
        dense_input = C2.SparseToDenseMask(keys_blob,
                                           values_blob,
                                           MISSING_SCALAR,
                                           lengths_blob,
                                           mask=sorted_features)[0]

        return dense_input, parameters
コード例 #5
0
 def _sum_deterministic_policy(self, model_names, path):
     net = core.Net('DeterministicPolicy')
     C2.set_net(net)
     output = 'ActionProbabilities'
     workspace.FeedBlob(output, np.array([1.0]))
     model_outputs = []
     for model in model_names:
         model_output = '{}_Output'.format(model)
         workspace.FeedBlob(model_output, np.array([1.0], dtype=np.float32))
         model_outputs.append(model_output)
     max_action = C2.FlattenToVec(
         C2.ArgMax(C2.Transpose(C2.Sum(*model_outputs)))
     )
     one_blob = C2.NextBlob('one')
     workspace.FeedBlob(one_blob, np.array([1.0], dtype=np.float32))
     C2.net().SparseToDense(
         [
             max_action,
             one_blob,
             model_outputs[0],
         ],
         [output],
     )
     meta = PredictorExportMeta(
         net,
         [one_blob],
         model_outputs,
         [output],
     )
     save_to_db('minidb', path, meta)
コード例 #6
0
    def update_model(self, states: str, actions: str,
                     q_vals_target: str) -> None:
        """
        Takes in states, actions, and target q values. Updates the model:

            Runs the forward pass, computing Q(states, actions).
                Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j).
            Comptutes Loss of Q(states, actions) with respect to q_vals_targets
            Updates Q Network's weights according to loss and optimizer

        :param states: Numpy array with shape (batch_size, state_dim). The ith
            row is a representation of the ith transition's state.
        :param actions: Numpy array with shape (batch_size, action_dim). The ith
            row is a representation of the ith transition's action.
        :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith
            row is the label to train against for the data from the ith transition.
        """
        model = C2.model()
        q_vals_target = C2.StopGradient(q_vals_target)
        q_values = C2.NextBlob("train_output")
        state_action_pairs, _ = C2.Concat(states, actions, axis=1)
        self.ml_trainer.make_forward_pass_ops(model, state_action_pairs,
                                              q_values, False)

        self.loss_blob = self.ml_trainer.generateLossOps(
            model, q_values, q_vals_target)
        model.AddGradientOperators([self.loss_blob])
        for param in model.params:
            if param in model.param_to_grad:
                param_grad = model.param_to_grad[param]
                param_grad = C2.NanCheck(param_grad)
        self.ml_trainer.addParameterUpdateOps(model)
コード例 #7
0
    def update_model(
        self,
        states: str,
        actions: str,
        q_vals_target: str,
    ) -> None:
        """
        Takes in states, actions, and target q values. Updates the model:

            Runs the forward pass, computing Q(states, actions).
                Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j).
            Comptutes Loss of Q(states, actions) with respect to q_vals_targets
            Updates Q Network's weights according to loss and optimizer

        :param states: Numpy array with shape (batch_size, state_dim). The ith
            row is a representation of the ith transition's state.
        :param actions: Numpy array with shape (batch_size, action_dim). The ith
            row contains the one-hotted representation of the ith action.
        :param q_vals_targets: Numpy array with shape (batch_size, 1). The ith
            row is the label to train against for the data from the ith transition.
        """
        model = C2.model()
        q_vals_target = C2.StopGradient(q_vals_target)
        output_blob = C2.NextBlob("train_output")
        MakeForwardPassOps(
            model,
            self.model_id,
            states,
            output_blob,
            self.weights,
            self.biases,
            self.activations,
            self.layers,
            self.dropout_ratio,
            False,
        )
        q_val_select = C2.ReduceBackSum(C2.Mul(output_blob, actions))
        q_values = C2.ExpandDims(q_val_select, dims=[1])

        self.loss_blob = GenerateLossOps(
            model,
            q_values,
            q_vals_target,
        )
        model.AddGradientOperators([self.loss_blob])
        for param in model.params:
            if param in model.param_to_grad:
                param_grad = model.param_to_grad[param]
                param_grad = C2.NanCheck(param_grad)
        AddParameterUpdateOps(
            model,
            optimizer_input=self.optimizer,
            base_learning_rate=self.learning_rate,
            gamma=self.gamma,
            policy=self.lr_policy,
        )
コード例 #8
0
 def get_q_values(self, states: str, actions: str, use_target_network: bool) -> str:
     state_action_pairs, _ = C2.Concat(states, actions, axis=1)
     q_values = C2.NextBlob("q_values")
     if use_target_network:
         self.target_network.make_forward_pass_ops(
             C2.model(), state_action_pairs, q_values, True
         )
     else:
         self.ml_trainer.make_forward_pass_ops(
             C2.model(), state_action_pairs, q_values, True
         )
     return q_values
コード例 #9
0
    def get_q_values_all_actions(self, states: str, use_target_network: bool) -> str:
        """
        Takes in a set of states and runs the test Q Network on them.

        Creates Q(states, actions), a blob with shape (batch_size, action_dim).
        Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j).
        Note that action_j takes on every possible action (of which there are
        self.action_dim_. Stores blob in self.output_blob and returns its value.

        :param states: Numpy array with shape (batch_size, state_dim). Each row
            contains a representation of a state.
        :param possible_next_actions: Numpy array with shape (batch_size, action_dim).
            possible_next_actions[i][j] = 1 iff the agent can take action j from
            state i.
        :use_target_network: Boolean that indicates whether or not to use this
            trainer's TargetNetwork to compute Q values.
        """
        all_q_values = C2.NextBlob("all_q_values")
        if use_target_network:
            if self.conv_target_network is not None:
                conv_output_blob = C2.NextBlob("conv_output")
                self.conv_target_network.make_conv_pass_ops(
                    C2.model(), states, conv_output_blob
                )
                states = conv_output_blob
            self.target_network.make_forward_pass_ops(
                C2.model(), states, all_q_values, True
            )
        else:
            if self.conv_ml_trainer is not None:
                conv_output_blob = C2.NextBlob("conv_output")
                self.conv_ml_trainer.make_conv_pass_ops(
                    C2.model(), states, conv_output_blob
                )
                states = conv_output_blob
            self.ml_trainer.make_forward_pass_ops(
                C2.model(), states, all_q_values, True
            )
        return all_q_values
コード例 #10
0
    def test_normalize_dense_matrix_enum(self):
        normalization_parameters = {
            1:
            NormalizationParameters(
                identify_types.ENUM,
                None,
                None,
                None,
                None,
                [12, 4, 2],
                None,
                None,
                None,
            ),
            2:
            NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1,
                                    None, None, None, None),
            3:
            NormalizationParameters(identify_types.ENUM, None, None, None,
                                    None, [15, 3], None, None, None),
        }
        norm_net = core.Net("net")
        C2.set_net(norm_net)
        preprocessor = PreprocessorNet()

        inputs = np.zeros([4, 3], dtype=np.float32)
        feature_ids = [2, 1, 3]  # Sorted according to feature type
        inputs[:, feature_ids.index(1)] = [12, 4, 2, 2]
        inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0]
        inputs[:, feature_ids.index(3)] = [
            15, 3, 15, normalization.MISSING_VALUE
        ]
        input_blob = C2.NextBlob("input_blob")
        workspace.FeedBlob(input_blob, np.array([0], dtype=np.float32))
        normalized_output_blob, _ = preprocessor.normalize_dense_matrix(
            input_blob, feature_ids, normalization_parameters, "", False)
        workspace.FeedBlob(input_blob, inputs)
        workspace.RunNetOnce(norm_net)
        normalized_feature_matrix = workspace.FetchBlob(normalized_output_blob)

        np.testing.assert_allclose(
            np.array([
                [1.0, 1, 0, 0, 1, 0],
                [2.0, 0, 1, 0, 0, 1],
                [3.0, 0, 0, 1, 1, 0],
                [3.0, 0, 0, 1, 0, 0],  # Missing values should go to all 0
            ]),
            normalized_feature_matrix,
        )
コード例 #11
0
ファイル: preprocessor_net.py プロジェクト: ozzie00/Horizon-1
 def _store_parameter(self, parameters, name, value):
     c2_name = C2.NextBlob(name)
     if C2.init_net():
         C2.init_net().GivenTensorFill(
             [],
             c2_name,
             shape=value.shape,
             values=value.flatten(),
             dtype=schema.data_type_for_dtype(value.dtype),
         )
         C2.init_net().AddExternalOutput(c2_name)
     else:
         workspace.FeedBlob(c2_name, value)
         parameters.append(c2_name)
     return c2_name
コード例 #12
0
def save_sum_deterministic_policy(model_names, path, db_type):
    net = core.Net("DeterministicPolicy")
    C2.set_net(net)
    output = "ActionProbabilities"
    workspace.FeedBlob(output, np.array([1.0]))
    model_outputs = []
    for model in model_names:
        model_output = "{}_Output".format(model)
        workspace.FeedBlob(model_output, np.array([[1.0]], dtype=np.float32))
        model_outputs.append(model_output)
    max_action = C2.FlattenToVec(C2.ArgMax(C2.Transpose(C2.Sum(*model_outputs))))
    one_blob = C2.NextBlob("one")
    workspace.FeedBlob(one_blob, np.array([1.0], dtype=np.float32))
    C2.net().SparseToDense([max_action, one_blob, model_outputs[0]], [output])
    meta = PredictorExportMeta(net, [one_blob], model_outputs, [output])
    save_to_db(db_type, path, meta)
コード例 #13
0
def sparse_to_dense(lengths_blob: str, keys_blob: str, values_blob: str,
                    sorted_features: List[int]) -> Tuple[str, List[str]]:
    MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR")
    workspace.FeedBlob(MISSING_SCALAR,
                       np.array([MISSING_VALUE], dtype=np.float32))
    C2.net().GivenTensorFill([], [MISSING_SCALAR],
                             shape=[],
                             values=[MISSING_VALUE])

    parameters: List[str] = [MISSING_SCALAR]

    assert len(sorted_features) > 0, "Sorted features is empty"
    dense_input = C2.SparseToDenseMask(keys_blob,
                                       values_blob,
                                       MISSING_SCALAR,
                                       lengths_blob,
                                       mask=sorted_features)[0]

    return dense_input, parameters
コード例 #14
0
ファイル: target_network.py プロジェクト: chinpeng/BlueWhale
    def target_values(self, input_blob: str) -> str:
        """ Estimates the values for the given inputs using the target network

        :param inputs The given inputs
        """
        output_blob = C2.NextBlob("output_blob")
        MakeForwardPassOps(
            C2.model(),
            self.tn_model_id,
            input_blob,
            output_blob,
            self._weights,
            self._biases,
            self._trainer.activations,
            self._trainer.layers,
            self._trainer.dropout_ratio,
            True,
        )
        return output_blob
コード例 #15
0
    def get_q_values_all_actions(
        self,
        states: str,
        use_target_network: bool,
    ) -> str:
        """
        Takes in a set of states and runs the test Q Network on them.

        Creates Q(states, actions), a blob with shape (batch_size, action_dim).
        Q(states, actions)[i][j] is an approximation of Q*(states[i], action_j).
        Note that action_j takes on every possible action (of which there are
        self.action_dim_. Stores blob in self.output_blob and returns its value.

        :param states: Numpy array with shape (batch_size, state_dim). Each row
            contains a representation of a state.
        :param possible_next_actions: Numpy array with shape (batch_size, action_dim).
            possible_next_actions[i][j] = 1 iff the agent can take action j from
            state i.
        :use_target_network: Boolean that indicates whether or not to use this
            trainer's TargetNetwork to compute Q values.
        """
        if use_target_network:
            return self.target_network.target_values(states)
        else:
            all_q_values = C2.NextBlob("all_q_values")
            MakeForwardPassOps(
                C2.model(),
                self.model_id + "_score",
                states,
                all_q_values,
                self.weights,
                self.biases,
                self.activations,
                self.layers,
                self.dropout_ratio,
                True,
            )
            return all_q_values
コード例 #16
0
 def get_q_values(
     self,
     states: str,
     actions: str,
     use_target_network: bool,
 ) -> str:
     state_action_pairs, _ = C2.Concat(states, actions, axis=1)
     if use_target_network:
         return self.target_network.target_values(state_action_pairs)
     else:
         q_values = C2.NextBlob("q_values")
         MakeForwardPassOps(
             C2.model(),
             self.model_id,
             state_action_pairs,
             q_values,
             self.weights,
             self.biases,
             self.activations,
             self.layers,
             self.dropout_ratio,
             True,
         )
         return q_values
コード例 #17
0
    def generate_train_net(
        cls,
        trainer,
        model,
        min_action_range_tensor_serving,
        max_action_range_tensor_serving,
        model_on_gpu,
    ):
        input_dim = trainer.state_dim
        if isinstance(trainer.actor, DataParallel):
            trainer.actor = trainer.actor.module

        buffer = PytorchCaffe2Converter.pytorch_net_to_buffer(
            trainer.actor, input_dim, model_on_gpu
        )
        actor_input_blob, actor_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef(
            buffer
        )
        torch_workspace = caffe2_netdef.workspace
        torch_parameters = torch_workspace.Blobs()

        parameters = []
        for blob_str in torch_parameters:
            if str(blob_str) == str(actor_input_blob):
                continue
            workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str))
            parameters.append(str(blob_str))

        torch_init_net = core.Net(caffe2_netdef.init_net)
        torch_predict_net = core.Net(caffe2_netdef.predict_net)
        # While converting to metanetdef, the external_input of predict_net
        # will be recomputed. Add the real output of init_net to parameters
        # to make sure they will be counted.
        parameters.extend(
            set(caffe2_netdef.init_net.external_output)
            - set(caffe2_netdef.init_net.external_input)
        )

        # Feed action scaling tensors for serving
        min_action_serving_blob = C2.NextBlob("min_action_range_tensor_serving")
        workspace.FeedBlob(
            min_action_serving_blob, min_action_range_tensor_serving.cpu().data.numpy()
        )
        parameters.append(str(min_action_serving_blob))

        max_action_serving_blob = C2.NextBlob("max_action_range_tensor_serving")
        workspace.FeedBlob(
            max_action_serving_blob, max_action_range_tensor_serving.cpu().data.numpy()
        )
        parameters.append(str(max_action_serving_blob))

        # Feed action scaling tensors for training [-1, 1] due to tanh actor
        min_vals_training = trainer.min_action_range_tensor_training.cpu().data.numpy()
        min_action_training_blob = C2.NextBlob("min_action_range_tensor_training")
        workspace.FeedBlob(min_action_training_blob, min_vals_training)
        parameters.append(str(min_action_training_blob))

        max_vals_training = trainer.max_action_range_tensor_training.cpu().data.numpy()
        max_action_training_blob = C2.NextBlob("max_action_range_tensor_training")
        workspace.FeedBlob(max_action_training_blob, max_vals_training)
        parameters.append(str(max_action_training_blob))

        return (
            torch_init_net,
            torch_predict_net,
            parameters,
            actor_input_blob,
            actor_output_blob,
            min_action_training_blob,
            max_action_training_blob,
            min_action_serving_blob,
            max_action_serving_blob,
        )
コード例 #18
0
    def export(cls,
               trainer,
               actions,
               state_normalization_parameters,
               int_features=False):
        """ Creates a DiscreteActionPredictor from a DiscreteActionTrainer.

        :param trainer DiscreteActionTrainer
        :param actions list of action names
        :param state_normalization_parameters state NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        """

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob('input/image', np.zeros([1, 1, 1, 1],
                                                   dtype=np.int32))
        workspace.FeedBlob('input/float_features.lengths',
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob('input/float_features.keys',
                           np.zeros(1, dtype=np.int64))
        workspace.FeedBlob('input/float_features.values',
                           np.zeros(1, dtype=np.float32))

        input_feature_lengths = 'input_feature_lengths'
        input_feature_keys = 'input_feature_keys'
        input_feature_values = 'input_feature_values'

        if int_features:
            workspace.FeedBlob('input/int_features.lengths',
                               np.zeros(1, dtype=np.int32))
            workspace.FeedBlob('input/int_features.keys',
                               np.zeros(1, dtype=np.int64))
            workspace.FeedBlob('input/int_features.values',
                               np.zeros(1, dtype=np.int32))
            C2.net().Cast(['input/int_features.values'],
                          ['input/int_features.values_float'],
                          dtype=caffe2_pb2.TensorProto.FLOAT)
            C2.net().MergeMultiScalarFeatureTensors([
                'input/float_features.lengths', 'input/float_features.keys',
                'input/float_features.values', 'input/int_features.lengths',
                'input/int_features.keys', 'input/int_features.values_float'
            ], [
                input_feature_lengths, input_feature_keys, input_feature_values
            ])
        else:
            C2.net().Copy(['input/float_features.lengths'],
                          [input_feature_lengths])
            C2.net().Copy(['input/float_features.keys'], [input_feature_keys])
            C2.net().Copy(['input/float_features.values'],
                          [input_feature_values])

        parameters = []
        if state_normalization_parameters is not None:
            preprocessor = PreprocessorNet(net, True)
            parameters.extend(preprocessor.parameters)
            normalized_dense_matrix, new_parameters = \
                preprocessor.normalize_sparse_matrix(
                    input_feature_lengths,
                    input_feature_keys,
                    input_feature_values,
                    state_normalization_parameters,
                    'state_norm',
                )
            parameters.extend(new_parameters)
        else:
            # Image input.  Note: Currently this does the wrong thing if
            #   more than one image is passed at a time.
            normalized_dense_matrix = 'input/image'

        new_parameters, q_values = RLPredictor._forward_pass(
            model,
            trainer,
            normalized_dense_matrix,
            actions,
        )
        parameters.extend(new_parameters)

        # Get 1 x n action index tensor under the max_q policy
        max_q_act_idxs = 'max_q_policy_actions'
        C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0)
        shape_of_num_of_states = 'num_states_shape'
        C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states])
        num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1])

        # Get 1 x n action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32))
        tempered_q_values = C2.Div(q_values, "temperature", broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = 'softmax_act_idxs_nested'
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_idxs = 'softmax_policy_actions'
        C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0)

        # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]]
        # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_idxs,
                                 to=caffe2_pb2.TensorProto.INT32)
        softmax_act_blob = C2.Cast(softmax_act_idxs,
                                   to=caffe2_pb2.TensorProto.INT32)
        C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob])
        transposed_action_idxs = C2.Transpose(max_q_act_blob)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        output_values = 'output/string_single_categorical_features.values'
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64))
        C2.net().Gather(["action_names", flat_transposed_action_idxs],
                        [output_values])

        output_lengths = 'output/string_single_categorical_features.lengths'
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill([shape_of_num_of_states], [output_lengths],
                              value=2,
                              dtype=caffe2_pb2.TensorProto.INT32)

        output_keys = 'output/string_single_categorical_features.keys'
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1],
                            value=0,
                            dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1],
                            value=1,
                            dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0)
        C2.net().FlattenToVec([output_key_tile], [output_keys])

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net)
        return DiscreteActionPredictor(net, parameters, int_features)
コード例 #19
0
ファイル: preprocessor_net.py プロジェクト: hyzcn/Horizon
    def preprocess_blob(self, blob, normalization_parameters):
        """
        Takes in a blob and its normalization parameters. Outputs a tuple
        whose first element is a blob containing the normalized input blob
        and whose second element contains all the parameter blobs used to
        create it.

        Call this from a CPU context and ensure the input blob exists in it.
        """

        parameters: List[str] = []

        ZERO = self._store_parameter(parameters, "ZERO",
                                     np.array([0], dtype=np.float32))

        MISSING_U = self._store_parameter(
            parameters, "MISSING_U",
            np.array([MISSING_VALUE + 1e-4], dtype=np.float32))
        MISSING_L = self._store_parameter(
            parameters, "MISSING_L",
            np.array([MISSING_VALUE - 1e-4], dtype=np.float32))

        is_empty_l = C2.GT(blob, MISSING_L, broadcast=1)
        is_empty_u = C2.LT(blob, MISSING_U, broadcast=1)
        is_empty = C2.And(is_empty_l, is_empty_u)

        for i in range(len(normalization_parameters) - 1):
            if (normalization_parameters[i].feature_type !=
                    normalization_parameters[i + 1].feature_type):
                raise Exception(
                    "Only one feature type is allowed per call to preprocess_blob!"
                )
        feature_type = normalization_parameters[0].feature_type
        if feature_type == identify_types.BINARY:
            TOLERANCE = self._store_parameter(parameters, "TOLERANCE",
                                              np.array(1e-3, dtype=np.float32))
            is_gt_zero = C2.GT(blob,
                               C2.Add(ZERO, TOLERANCE, broadcast=1),
                               broadcast=1)
            is_lt_zero = C2.LT(blob,
                               C2.Sub(ZERO, TOLERANCE, broadcast=1),
                               broadcast=1)
            bool_blob = C2.Or(is_gt_zero, is_lt_zero)
            blob = C2.Cast(bool_blob, to=caffe2_pb2.TensorProto.FLOAT)
        elif feature_type == identify_types.PROBABILITY:
            blob = C2.Logit(C2.Clip(blob, min=0.01, max=0.99))
        elif feature_type == identify_types.ENUM:
            for parameter in normalization_parameters:
                possible_values = parameter.possible_values
                for x in possible_values:
                    if x < 0:
                        logger.fatal(
                            "Invalid enum possible value for feature: " +
                            str(x) + " " + str(parameter.possible_values))
                        raise Exception(
                            "Invalid enum possible value for feature " + blob +
                            ": " + str(x) + " " +
                            str(parameter.possible_values))

            int_blob = C2.Cast(blob, to=core.DataType.INT32)

            # Batch one hot transform with MISSING_VALUE as a possible value
            feature_lengths = [
                len(p.possible_values) + 1 for p in normalization_parameters
            ]
            feature_lengths_blob = self._store_parameter(
                parameters,
                "feature_lengths_blob",
                np.array(feature_lengths, dtype=np.int32),
            )

            feature_values = [
                x for p in normalization_parameters
                for x in p.possible_values + [int(MISSING_VALUE)]
            ]
            feature_values_blob = self._store_parameter(
                parameters,
                "feature_values_blob",
                np.array(feature_values, dtype=np.int32),
            )

            one_hot_output = C2.BatchOneHot(int_blob, feature_lengths_blob,
                                            feature_values_blob)
            flattened_one_hot = C2.FlattenToVec(one_hot_output)

            # Remove missing values with a mask
            cols_to_include = [[1] * len(p.possible_values) + [0]
                               for p in normalization_parameters]
            cols_to_include = [x for col in cols_to_include for x in col]
            mask = self._store_parameter(
                parameters, "mask", np.array(cols_to_include, dtype=np.int32))

            zero_vec = C2.ConstantFill(one_hot_output,
                                       value=0,
                                       dtype=caffe2_pb2.TensorProto.INT32)

            repeated_mask_bool = C2.Cast(C2.Add(zero_vec, mask, broadcast=1),
                                         to=core.DataType.BOOL)

            flattened_repeated_mask = C2.FlattenToVec(repeated_mask_bool)

            flattened_one_hot_proc = C2.NextBlob("flattened_one_hot_proc")
            flattened_one_hot_proc_indices = C2.NextBlob(
                "flattened_one_hot_proc_indices")
            C2.net().BooleanMask(
                [flattened_one_hot, flattened_repeated_mask],
                [flattened_one_hot_proc, flattened_one_hot_proc_indices],
            )

            one_hot_shape = C2.Shape(one_hot_output)

            shape_delta = self._store_parameter(
                parameters,
                "shape_delta",
                np.array([0, len(normalization_parameters)], dtype=np.int64),
            )

            target_shape = C2.Sub(one_hot_shape, shape_delta, broadcast=1)
            output_int_blob = C2.NextBlob("output_int_blob")
            output_int_blob_old_shape = C2.NextBlob(
                "output_int_blob_old_shape")
            C2.net().Reshape(
                [flattened_one_hot_proc, target_shape],
                [output_int_blob, output_int_blob_old_shape],
            )

            output_blob = C2.Cast(output_int_blob, to=core.DataType.FLOAT)

            return output_blob, parameters
        elif feature_type == identify_types.QUANTILE:
            # This transformation replaces a set of values with their quantile.
            # The quantile boundaries are provided in the normalization params.

            quantile_sizes = [
                len(norm.quantiles) for norm in normalization_parameters
            ]
            num_boundaries_blob = self._store_parameter(
                parameters,
                "num_boundaries_blob",
                np.array(quantile_sizes, dtype=np.int32),
            )

            quantile_values = np.array([], dtype=np.float32)
            quantile_labels = np.array([], dtype=np.float32)
            for norm in normalization_parameters:
                quantile_values = np.append(
                    quantile_values, np.array(norm.quantiles,
                                              dtype=np.float32))
                # TODO: Fix this: the np.unique is making this part not true.
                quantile_labels = np.append(
                    quantile_labels,
                    np.arange(len(norm.quantiles), dtype=np.float32) /
                    float(len(norm.quantiles)),
                )
            quantiles = np.vstack([quantile_values, quantile_labels]).T
            quantiles_blob = self._store_parameter(parameters,
                                                   "quantiles_blob", quantiles)

            quantile_blob = C2.Percentile(blob, quantiles_blob,
                                          num_boundaries_blob)
            blob = quantile_blob
        elif (feature_type == identify_types.CONTINUOUS
              or feature_type == identify_types.BOXCOX):
            boxcox_shifts = []
            boxcox_lambdas = []
            means = []
            stddevs = []

            for norm in normalization_parameters:
                if feature_type == identify_types.BOXCOX:
                    assert (norm.boxcox_shift is not None
                            and norm.boxcox_lambda is not None)
                    boxcox_shifts.append(norm.boxcox_shift)
                    boxcox_lambdas.append(norm.boxcox_lambda)
                means.append(norm.mean)
                stddevs.append(norm.stddev)

            if feature_type == identify_types.BOXCOX:
                boxcox_shift_blob = self._store_parameter(
                    parameters,
                    "boxcox_shift",
                    np.array(boxcox_shifts, dtype=np.float32),
                )
                boxcox_lambda_blob = self._store_parameter(
                    parameters,
                    "boxcox_shift",
                    np.array(boxcox_lambdas, dtype=np.float32),
                )

                blob = C2.BatchBoxCox(blob, boxcox_lambda_blob,
                                      boxcox_shift_blob)

            means_blob = self._store_parameter(
                parameters, "means_blob", np.array([means], dtype=np.float32))
            stddevs_blob = self._store_parameter(
                parameters, "stddevs_blob",
                np.array([stddevs], dtype=np.float32))

            blob = C2.Sub(blob, means_blob, broadcast=1, axis=0)
            blob = C2.Div(blob, stddevs_blob, broadcast=1, axis=0)
            if self.clip_anomalies:
                blob = C2.Clip(blob, min=-3.0, max=3.0)
        else:
            raise NotImplementedError(
                "Invalid feature type: {}".format(feature_type))

        zeros = C2.ConstantFill(blob, value=0.)
        output_blob = C2.Where(is_empty, zeros, blob)

        return output_blob, parameters
コード例 #20
0
ファイル: preprocessor_net.py プロジェクト: hyzcn/Horizon
    def normalize_sparse_matrix(
        self,
        lengths_blob: str,
        keys_blob: str,
        values_blob: str,
        normalization_parameters: Dict[int, NormalizationParameters],
        blobname_prefix: str,
        split_sparse_to_dense: bool,
        split_expensive_feature_groups: bool,
        normalize: bool = True,
        sorted_features_override: List[int] = None,
    ) -> Tuple[str, List[str]]:
        if sorted_features_override:
            sorted_features = sorted_features_override
        else:
            sorted_features, _ = sort_features_by_normalization(
                normalization_parameters)
        int_features = [int(feature) for feature in sorted_features]

        preprocess_num_batches = 8 if split_sparse_to_dense else 1

        lengths_batch = []
        keys_batch = []
        values_batch = []
        for _ in range(preprocess_num_batches):
            lengths_batch.append(C2.NextBlob(blobname_prefix +
                                             "_length_batch"))
            keys_batch.append(C2.NextBlob(blobname_prefix + "_key_batch"))
            values_batch.append(C2.NextBlob(blobname_prefix + "_value_batch"))

        C2.net().Split([lengths_blob], lengths_batch, axis=0)
        total_lengths_batch = []
        for x in range(preprocess_num_batches):
            total_lengths_batch.append(
                C2.Reshape(C2.ReduceBackSum(lengths_batch[x],
                                            num_reduce_dims=1),
                           shape=[1])[0])
        total_lengths_batch_concat, _ = C2.Concat(*total_lengths_batch, axis=0)
        C2.net().Split([keys_blob, total_lengths_batch_concat],
                       keys_batch,
                       axis=0)
        C2.net().Split([values_blob, total_lengths_batch_concat],
                       values_batch,
                       axis=0)

        dense_input_fragments = []
        parameters: List[str] = []

        MISSING_SCALAR = self._store_parameter(
            parameters, "MISSING_SCALAR",
            np.array([MISSING_VALUE], dtype=np.float32))
        C2.net().GivenTensorFill([], [MISSING_SCALAR],
                                 shape=[],
                                 values=[MISSING_VALUE])

        for preprocess_batch in range(preprocess_num_batches):
            dense_input_fragment = C2.SparseToDenseMask(
                keys_batch[preprocess_batch],
                values_batch[preprocess_batch],
                MISSING_SCALAR,
                lengths_batch[preprocess_batch],
                mask=int_features,
            )[0]

            if normalize:
                normalized_fragment, p = self.normalize_dense_matrix(
                    dense_input_fragment,
                    sorted_features,
                    normalization_parameters,
                    blobname_prefix,
                    split_expensive_feature_groups,
                )
                dense_input_fragments.append(normalized_fragment)
                parameters.extend(p)
            else:
                dense_input_fragments.append(dense_input_fragment)

        dense_input = C2.NextBlob(blobname_prefix + "_dense_input")
        dense_input_dims = C2.NextBlob(blobname_prefix + "_dense_input_dims")
        C2.net().Concat(dense_input_fragments, [dense_input, dense_input_dims],
                        axis=0)

        return dense_input, parameters
コード例 #21
0
ファイル: preprocessor_net.py プロジェクト: hyzcn/Horizon
 def _store_parameter(self, parameters, name, value):
     c2_name = C2.NextBlob(name)
     workspace.FeedBlob(c2_name, value)
     parameters.append(c2_name)
     return c2_name
コード例 #22
0
    def export(
        cls,
        trainer,
        state_normalization_parameters,
        action_normalization_parameters,
        int_features=False,
        model_on_gpu=False,
    ):
        """Export caffe2 preprocessor net and pytorch DQN forward pass as one
        caffe2 net.

        :param trainer ParametricDQNTrainer
        :param state_normalization_parameters state NormalizationParameters
        :param action_normalization_parameters action NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        :param model_on_gpu boolean indicating if the model is a GPU model or CPU model
        """

        input_dim = trainer.num_features
        if isinstance(trainer.q_network, DataParallel):
            trainer.q_network = trainer.q_network.module

        buffer = PytorchCaffe2Converter.pytorch_net_to_buffer(
            trainer.q_network, input_dim, model_on_gpu
        )
        qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef(
            buffer
        )
        torch_workspace = caffe2_netdef.workspace

        parameters = torch_workspace.Blobs()
        for blob_str in parameters:
            workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str))

        torch_init_net = core.Net(caffe2_netdef.init_net)
        torch_predict_net = core.Net(caffe2_netdef.predict_net)
        # While converting to metanetdef, the external_input of predict_net
        # will be recomputed. Add the real output of init_net to parameters
        # to make sure they will be counted.
        parameters.extend(
            set(caffe2_netdef.init_net.external_output)
            - set(caffe2_netdef.init_net.external_input)
        )

        # ensure state and action IDs have no intersection
        assert (
            len(
                set(state_normalization_parameters.keys())
                & set(action_normalization_parameters.keys())
            )
            == 0
        )

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32))
        workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64))
        workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32))

        input_feature_lengths = "input_feature_lengths"
        input_feature_keys = "input_feature_keys"
        input_feature_values = "input_feature_values"

        if int_features:
            workspace.FeedBlob(
                "input/int_features.lengths", np.zeros(1, dtype=np.int32)
            )
            workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64))
            workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32))
            C2.net().Cast(
                ["input/int_features.values"],
                ["input/int_features.values_float"],
                dtype=caffe2_pb2.TensorProto.FLOAT,
            )
            C2.net().MergeMultiScalarFeatureTensors(
                [
                    "input/float_features.lengths",
                    "input/float_features.keys",
                    "input/float_features.values",
                    "input/int_features.lengths",
                    "input/int_features.keys",
                    "input/int_features.values_float",
                ],
                [input_feature_lengths, input_feature_keys, input_feature_values],
            )
        else:
            C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths])
            C2.net().Copy(["input/float_features.keys"], [input_feature_keys])
            C2.net().Copy(["input/float_features.values"], [input_feature_values])

        preprocessor = PreprocessorNet(True)
        sorted_state_features, _ = sort_features_by_normalization(
            state_normalization_parameters
        )
        state_dense_matrix, new_parameters = sparse_to_dense(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            sorted_state_features,
        )
        parameters.extend(new_parameters)
        state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix(
            state_dense_matrix,
            sorted_state_features,
            state_normalization_parameters,
            "state_norm",
            False,
        )
        parameters.extend(new_parameters)

        sorted_action_features, _ = sort_features_by_normalization(
            action_normalization_parameters
        )
        action_dense_matrix, new_parameters = sparse_to_dense(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            sorted_action_features,
        )
        parameters.extend(new_parameters)
        action_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix(
            action_dense_matrix,
            sorted_action_features,
            action_normalization_parameters,
            "action_norm",
            False,
        )
        parameters.extend(new_parameters)

        state_action_normalized = "state_action_normalized"
        state_action_normalized_dim = "state_action_normalized_dim"
        net.Concat(
            [state_normalized_dense_matrix, action_normalized_dense_matrix],
            [state_action_normalized, state_action_normalized_dim],
            axis=1,
        )

        net.Copy([state_action_normalized], [qnet_input_blob])

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(torch_init_net)

        net.AppendNet(torch_predict_net)

        new_parameters, q_values = RLPredictor._forward_pass(
            model, trainer, state_action_normalized, ["Q"], qnet_output_blob
        )
        parameters.extend(new_parameters)

        flat_q_values_key = (
            "output/string_weighted_multi_categorical_features.values.values"
        )
        num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1])
        q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1])

        # Get 1 x n (number of examples) action index tensor under the max_q policy
        max_q_act_idxs = "max_q_policy_actions"
        C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs])
        max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0)

        # Get 1 x n (number of examples) action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32)
        )
        tempered_q_values = C2.Div(q_value_blob, temperature, broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = "softmax_act_idxs_nested"
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_blob = C2.Tile(
            C2.FlattenToVec(softmax_act_idxs_nested), num_examples, axis=0
        )

        # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]]
        # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_blob, to=caffe2_pb2.TensorProto.INT64)
        softmax_act_blob = C2.Cast(softmax_act_blob, to=caffe2_pb2.TensorProto.INT64)
        max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1])
        softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob, shape=[1, -1])
        C2.net().Append(
            [max_q_act_blob_nested, softmax_act_blob_nested], [max_q_act_blob_nested]
        )
        transposed_action_idxs = C2.Transpose(max_q_act_blob_nested)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        output_values = "output/int_single_categorical_features.values"
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64))
        C2.net().Copy([flat_transposed_action_idxs], [output_values])

        output_lengths = "output/int_single_categorical_features.lengths"
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill(
            [flat_q_values_key],
            [output_lengths],
            value=2,
            dtype=caffe2_pb2.TensorProto.INT32,
        )

        output_keys = "output/int_single_categorical_features.keys"
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0)
        C2.net().FlattenToVec([output_key_tile], [output_keys])

        workspace.CreateNet(net)
        return ParametricDQNPredictor(net, torch_init_net, parameters, int_features)
コード例 #23
0
    def export(
        cls,
        trainer,
        state_normalization_parameters,
        action_normalization_parameters,
    ):
        """ Creates ContinuousActionDQNPredictor from a list of action trainers

        :param trainer ContinuousActionDQNPredictor
        :param state_features list of state feature names
        :param action_features list of action feature names
        """
        # ensure state and action IDs have no intersection
        assert (len(
            set(state_normalization_parameters.keys())
            & set(action_normalization_parameters.keys())) == 0)

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob('input/float_features.lengths',
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob('input/float_features.keys',
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob('input/float_features.values',
                           np.zeros(1, dtype=np.float32))

        preprocessor = PreprocessorNet(net, True)
        parameters = []
        parameters.extend(preprocessor.parameters)
        state_normalized_dense_matrix, new_parameters = \
            preprocessor.normalize_sparse_matrix(
                'input/float_features.lengths',
                'input/float_features.keys',
                'input/float_features.values',
                state_normalization_parameters,
                'state_norm',
            )
        parameters.extend(new_parameters)
        action_normalized_dense_matrix, new_parameters = \
            preprocessor.normalize_sparse_matrix(
                'input/float_features.lengths',
                'input/float_features.keys',
                'input/float_features.values',
                action_normalization_parameters,
                'action_norm',
            )
        parameters.extend(new_parameters)
        state_action_normalized = 'state_action_normalized'
        state_action_normalized_dim = 'state_action_normalized_dim'
        net.Concat(
            [state_normalized_dense_matrix, action_normalized_dense_matrix],
            [state_action_normalized, state_action_normalized_dim],
            axis=1)
        new_parameters, q_values = RLPredictor._forward_pass(
            model,
            trainer,
            state_action_normalized,
            ['Q'],
        )
        parameters.extend(new_parameters)

        flat_q_values_key = \
            'output/string_weighted_multi_categorical_features.values.values'
        num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1])
        q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1])

        # Get 1 x n (number of examples) action index tensor under the max_q policy
        max_q_act_idxs = 'max_q_policy_actions'
        C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs])
        max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0)

        # Get 1 x n (number of examples) action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32))
        tempered_q_values = C2.Div(q_value_blob, "temperature", broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = 'softmax_act_idxs_nested'
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_blob = C2.Tile(C2.FlattenToVec(softmax_act_idxs_nested),
                                   num_examples,
                                   axis=0)

        # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]]
        # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_blob,
                                 to=caffe2_pb2.TensorProto.INT64)
        softmax_act_blob = C2.Cast(softmax_act_blob,
                                   to=caffe2_pb2.TensorProto.INT64)
        max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1])
        softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob,
                                                shape=[1, -1])
        C2.net().Append([max_q_act_blob_nested, softmax_act_blob_nested],
                        [max_q_act_blob_nested])
        transposed_action_idxs = C2.Transpose(max_q_act_blob_nested)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        output_values = 'output/int_single_categorical_features.values'
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64))
        C2.net().Copy([flat_transposed_action_idxs], [output_values])

        output_lengths = 'output/int_single_categorical_features.lengths'
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill([flat_q_values_key], [output_lengths],
                              value=2,
                              dtype=caffe2_pb2.TensorProto.INT32)

        output_keys = 'output/int_single_categorical_features.keys'
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1],
                            value=0,
                            dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1],
                            value=1,
                            dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0)
        C2.net().FlattenToVec([output_key_tile], [output_keys])

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net)
        return ContinuousActionDQNPredictor(net, parameters)
コード例 #24
0
    def export_actor(
        cls,
        trainer,
        state_normalization_parameters,
        min_action_range_tensor_serving,
        max_action_range_tensor_serving,
        int_features=False,
        model_on_gpu=False,
    ):
        """Export caffe2 preprocessor net and pytorch actor forward pass as one
        caffe2 net.

        :param trainer DDPGTrainer
        :param state_normalization_parameters state NormalizationParameters
        :param min_action_range_tensor_serving pytorch tensor that specifies
            min action value for each dimension
        :param max_action_range_tensor_serving pytorch tensor that specifies
            min action value for each dimension
        :param state_normalization_parameters state NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        :param model_on_gpu boolean indicating if the model is a GPU model or CPU model
        """
        input_dim = trainer.state_dim
        buffer = PytorchCaffe2Converter.pytorch_net_to_buffer(
            trainer.actor, input_dim, model_on_gpu
        )
        actor_input_blob, actor_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef(
            buffer
        )
        torch_workspace = caffe2_netdef.workspace

        parameters = torch_workspace.Blobs()
        for blob_str in parameters:
            workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str))

        torch_init_net = core.Net(caffe2_netdef.init_net)
        torch_predict_net = core.Net(caffe2_netdef.predict_net)

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        # Feed action scaling tensors for serving
        min_action_serving_blob = C2.NextBlob("min_action_range_tensor_serving")
        workspace.FeedBlob(
            min_action_serving_blob, min_action_range_tensor_serving.cpu().data.numpy()
        )
        parameters.append(str(min_action_serving_blob))

        max_action_serving_blob = C2.NextBlob("max_action_range_tensor_serving")
        workspace.FeedBlob(
            max_action_serving_blob, max_action_range_tensor_serving.cpu().data.numpy()
        )
        parameters.append(str(max_action_serving_blob))

        # Feed action scaling tensors for training [-1, 1] due to tanh actor
        min_vals_training = trainer.min_action_range_tensor_training.cpu().data.numpy()
        min_action_training_blob = C2.NextBlob("min_action_range_tensor_training")
        workspace.FeedBlob(min_action_training_blob, min_vals_training)
        parameters.append(str(min_action_training_blob))

        max_vals_training = trainer.max_action_range_tensor_training.cpu().data.numpy()
        max_action_training_blob = C2.NextBlob("max_action_range_tensor_training")
        workspace.FeedBlob(max_action_training_blob, max_vals_training)
        parameters.append(str(max_action_training_blob))

        workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32))
        workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64))
        workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32))

        input_feature_lengths = "input_feature_lengths"
        input_feature_keys = "input_feature_keys"
        input_feature_values = "input_feature_values"

        if int_features:
            workspace.FeedBlob(
                "input/int_features.lengths", np.zeros(1, dtype=np.int32)
            )
            workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64))
            workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32))
            C2.net().Cast(
                ["input/int_features.values"],
                ["input/int_features.values_float"],
                dtype=caffe2_pb2.TensorProto.FLOAT,
            )
            C2.net().MergeMultiScalarFeatureTensors(
                [
                    "input/float_features.lengths",
                    "input/float_features.keys",
                    "input/float_features.values",
                    "input/int_features.lengths",
                    "input/int_features.keys",
                    "input/int_features.values_float",
                ],
                [input_feature_lengths, input_feature_keys, input_feature_values],
            )
        else:
            C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths])
            C2.net().Copy(["input/float_features.keys"], [input_feature_keys])
            C2.net().Copy(["input/float_features.values"], [input_feature_values])

        preprocessor = PreprocessorNet(True)
        state_normalized_dense_matrix, new_parameters = preprocessor.normalize_sparse_matrix(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            state_normalization_parameters,
            "state_norm",
            False,
            False,
        )
        parameters.extend(new_parameters)
        net.Copy([state_normalized_dense_matrix], [actor_input_blob])

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(torch_init_net)

        net.AppendNet(torch_predict_net)

        C2.FlattenToVec(C2.ArgMax(actor_output_blob))
        output_lengths = "output/float_features.lengths"
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill(
            [C2.FlattenToVec(C2.ArgMax(actor_output_blob))],
            [output_lengths],
            value=trainer.actor.layers[-1].out_features,
            dtype=caffe2_pb2.TensorProto.INT32,
        )

        output_keys = "output/float_features.keys"
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int32))
        C2.net().LengthsRangeFill([output_lengths], [output_keys])

        output_values = "output/float_features.values"
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32))
        # Scale actors actions from [-1, 1] to serving range
        prev_range = C2.Sub(max_action_training_blob, min_action_training_blob)
        new_range = C2.Sub(max_action_serving_blob, min_action_serving_blob)
        subtract_prev_min = C2.Sub(actor_output_blob, min_action_training_blob)
        div_by_prev_range = C2.Div(subtract_prev_min, prev_range)
        scaled_for_serving_actions = C2.Add(
            C2.Mul(div_by_prev_range, new_range), min_action_serving_blob
        )
        C2.net().FlattenToVec([scaled_for_serving_actions], [output_values])

        workspace.CreateNet(net)
        return DDPGPredictor(net, parameters, int_features)
コード例 #25
0
ファイル: dqn_predictor.py プロジェクト: wenqingchu/Horizon
    def export(
        cls,
        trainer,
        actions,
        state_normalization_parameters,
        int_features=False,
        model_on_gpu=False,
        set_missing_value_to_zero=False,
    ):
        """Export caffe2 preprocessor net and pytorch DQN forward pass as one
        caffe2 net.

        :param trainer DQNTrainer
        :param state_normalization_parameters state NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        :param model_on_gpu boolean indicating if the model is a GPU model or CPU model
        """

        input_dim = trainer.num_features

        q_network = (trainer.q_network.module if isinstance(
            trainer.q_network, DataParallel) else trainer.q_network)

        buffer = PytorchCaffe2Converter.pytorch_net_to_buffer(
            q_network, input_dim, model_on_gpu)
        qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef(
            buffer)
        torch_workspace = caffe2_netdef.workspace

        parameters = torch_workspace.Blobs()
        for blob_str in parameters:
            workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str))

        torch_init_net = core.Net(caffe2_netdef.init_net)
        torch_predict_net = core.Net(caffe2_netdef.predict_net)
        logger.info("Generated ONNX predict net:")
        logger.info(str(torch_predict_net.Proto()))
        # While converting to metanetdef, the external_input of predict_net
        # will be recomputed. Add the real output of init_net to parameters
        # to make sure they will be counted.
        parameters.extend(
            set(caffe2_netdef.init_net.external_output) -
            set(caffe2_netdef.init_net.external_input))

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob("input/image", np.zeros([1, 1, 1, 1],
                                                   dtype=np.int32))
        workspace.FeedBlob("input/float_features.lengths",
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob("input/float_features.keys",
                           np.zeros(1, dtype=np.int64))
        workspace.FeedBlob("input/float_features.values",
                           np.zeros(1, dtype=np.float32))

        input_feature_lengths = "input_feature_lengths"
        input_feature_keys = "input_feature_keys"
        input_feature_values = "input_feature_values"

        if int_features:
            workspace.FeedBlob("input/int_features.lengths",
                               np.zeros(1, dtype=np.int32))
            workspace.FeedBlob("input/int_features.keys",
                               np.zeros(1, dtype=np.int64))
            workspace.FeedBlob("input/int_features.values",
                               np.zeros(1, dtype=np.int32))
            C2.net().Cast(
                ["input/int_features.values"],
                ["input/int_features.values_float"],
                dtype=caffe2_pb2.TensorProto.FLOAT,
            )
            C2.net().MergeMultiScalarFeatureTensors(
                [
                    "input/float_features.lengths",
                    "input/float_features.keys",
                    "input/float_features.values",
                    "input/int_features.lengths",
                    "input/int_features.keys",
                    "input/int_features.values_float",
                ],
                [
                    input_feature_lengths, input_feature_keys,
                    input_feature_values
                ],
            )
        else:
            C2.net().Copy(["input/float_features.lengths"],
                          [input_feature_lengths])
            C2.net().Copy(["input/float_features.keys"], [input_feature_keys])
            C2.net().Copy(["input/float_features.values"],
                          [input_feature_values])

        if state_normalization_parameters is not None:
            sorted_feature_ids = sort_features_by_normalization(
                state_normalization_parameters)[0]
            dense_matrix, new_parameters = sparse_to_dense(
                input_feature_lengths,
                input_feature_keys,
                input_feature_values,
                sorted_feature_ids,
                set_missing_value_to_zero=set_missing_value_to_zero,
            )
            parameters.extend(new_parameters)
            preprocessor_net = PreprocessorNet()
            state_normalized_dense_matrix, new_parameters = preprocessor_net.normalize_dense_matrix(
                dense_matrix,
                sorted_feature_ids,
                state_normalization_parameters,
                "state_norm_",
                True,
            )
            parameters.extend(new_parameters)
        else:
            # Image input.  Note: Currently this does the wrong thing if
            #   more than one image is passed at a time.
            state_normalized_dense_matrix = "input/image"

        net.Copy([state_normalized_dense_matrix], [qnet_input_blob])

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(torch_init_net)

        net.AppendNet(torch_predict_net)

        new_parameters, q_values = RLPredictor._forward_pass(
            model, trainer, state_normalized_dense_matrix, actions,
            qnet_output_blob)
        parameters.extend(new_parameters)

        # Get 1 x n action index tensor under the max_q policy
        max_q_act_idxs = "max_q_policy_actions"
        C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0)
        shape_of_num_of_states = "num_states_shape"
        C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states])
        num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1])

        # Get 1 x n action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32))
        tempered_q_values = C2.Div(q_values, temperature, broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = "softmax_act_idxs_nested"
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_idxs = "softmax_policy_actions"
        C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0)

        action_names = C2.NextBlob("action_names")
        parameters.append(action_names)
        workspace.FeedBlob(action_names, np.array(actions))

        # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]]
        # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_idxs,
                                 to=caffe2_pb2.TensorProto.INT32)
        softmax_act_blob = C2.Cast(softmax_act_idxs,
                                   to=caffe2_pb2.TensorProto.INT32)
        C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob])
        transposed_action_idxs = C2.Transpose(max_q_act_blob)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        workspace.FeedBlob(OUTPUT_SINGLE_CAT_VALS_NAME,
                           np.zeros(1, dtype=np.int64))
        C2.net().Gather([action_names, flat_transposed_action_idxs],
                        [OUTPUT_SINGLE_CAT_VALS_NAME])

        workspace.FeedBlob(OUTPUT_SINGLE_CAT_LENGTHS_NAME,
                           np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill(
            [shape_of_num_of_states],
            [OUTPUT_SINGLE_CAT_LENGTHS_NAME],
            value=2,
            dtype=caffe2_pb2.TensorProto.INT32,
        )

        workspace.FeedBlob(OUTPUT_SINGLE_CAT_KEYS_NAME,
                           np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1],
                            value=0,
                            dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1],
                            value=1,
                            dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0)
        C2.net().FlattenToVec([output_key_tile], [OUTPUT_SINGLE_CAT_KEYS_NAME])

        workspace.CreateNet(net)
        return DQNPredictor(net, torch_init_net, parameters, int_features)
コード例 #26
0
    def export(
        cls,
        trainer,
        state_normalization_parameters,
        action_normalization_parameters,
        int_features=False,
    ):
        """ Creates a ContinuousActionDQNPredictor from a ContinuousActionDQNTrainer.

        :param trainer ContinuousActionDQNTrainer
        :param state_normalization_parameters state NormalizationParameters
        :param action_normalization_parameters action NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        """
        # ensure state and action IDs have no intersection
        assert (
            len(
                set(state_normalization_parameters.keys())
                & set(action_normalization_parameters.keys())
            )
            == 0
        )

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)

        workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32))
        workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64))
        workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32))

        input_feature_lengths = "input_feature_lengths"
        input_feature_keys = "input_feature_keys"
        input_feature_values = "input_feature_values"

        if int_features:
            workspace.FeedBlob(
                "input/int_features.lengths", np.zeros(1, dtype=np.int32)
            )
            workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64))
            workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32))
            C2.net().Cast(
                ["input/int_features.values"],
                ["input/int_features.values_float"],
                dtype=caffe2_pb2.TensorProto.FLOAT,
            )
            C2.net().MergeMultiScalarFeatureTensors(
                [
                    "input/float_features.lengths",
                    "input/float_features.keys",
                    "input/float_features.values",
                    "input/int_features.lengths",
                    "input/int_features.keys",
                    "input/int_features.values_float",
                ],
                [input_feature_lengths, input_feature_keys, input_feature_values],
            )
        else:
            C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths])
            C2.net().Copy(["input/float_features.keys"], [input_feature_keys])
            C2.net().Copy(["input/float_features.values"], [input_feature_values])

        parameters = []
        state_normalized_dense_matrix, new_parameters = sparse_to_dense(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            state_normalization_parameters,
            None,
        )
        parameters.extend(new_parameters)
        action_normalized_dense_matrix, new_parameters = sparse_to_dense(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            action_normalization_parameters,
            None,
        )
        parameters.extend(new_parameters)
        state_action_normalized = "state_action_normalized"
        state_action_normalized_dim = "state_action_normalized_dim"
        net.Concat(
            [state_normalized_dense_matrix, action_normalized_dense_matrix],
            [state_action_normalized, state_action_normalized_dim],
            axis=1,
        )
        new_parameters, q_values = RLPredictor._forward_pass(
            model, trainer, state_action_normalized, ["Q"]
        )
        parameters.extend(new_parameters)

        flat_q_values_key = (
            "output/string_weighted_multi_categorical_features.values.values"
        )
        num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1])
        q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1])

        # Get 1 x n (number of examples) action index tensor under the max_q policy
        max_q_act_idxs = "max_q_policy_actions"
        C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs])
        max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0)

        # Get 1 x n (number of examples) action index tensor under the softmax policy
        temperature = C2.NextBlob("temperature")
        parameters.append(temperature)
        workspace.FeedBlob(
            temperature, np.array([trainer.rl_temperature], dtype=np.float32)
        )
        tempered_q_values = C2.Div(q_value_blob, temperature, broadcast=1)
        softmax_values = C2.Softmax(tempered_q_values)
        softmax_act_idxs_nested = "softmax_act_idxs_nested"
        C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested])
        softmax_act_blob = C2.Tile(
            C2.FlattenToVec(softmax_act_idxs_nested), num_examples, axis=0
        )

        # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]]
        # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...]
        max_q_act_blob = C2.Cast(max_q_act_blob, to=caffe2_pb2.TensorProto.INT64)
        softmax_act_blob = C2.Cast(softmax_act_blob, to=caffe2_pb2.TensorProto.INT64)
        max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1])
        softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob, shape=[1, -1])
        C2.net().Append(
            [max_q_act_blob_nested, softmax_act_blob_nested], [max_q_act_blob_nested]
        )
        transposed_action_idxs = C2.Transpose(max_q_act_blob_nested)
        flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs)
        output_values = "output/int_single_categorical_features.values"
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64))
        C2.net().Copy([flat_transposed_action_idxs], [output_values])

        output_lengths = "output/int_single_categorical_features.lengths"
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill(
            [flat_q_values_key],
            [output_lengths],
            value=2,
            dtype=caffe2_pb2.TensorProto.INT32,
        )

        output_keys = "output/int_single_categorical_features.keys"
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64))
        output_keys_tensor, _ = C2.Concat(
            C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64),
            C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64),
            axis=0,
        )
        output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0)
        C2.net().FlattenToVec([output_key_tile], [output_keys])

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net)
        return ContinuousActionDQNPredictor(net, parameters, int_features)
コード例 #27
0
    def export_actor(
        cls,
        trainer,
        state_normalization_parameters,
        action_feature_ids,
        min_action_range_tensor_serving,
        max_action_range_tensor_serving,
        int_features=False,
        model_on_gpu=False,
    ):
        """Export caffe2 preprocessor net and pytorch actor forward pass as one
        caffe2 net.

        :param trainer DDPGTrainer
        :param state_normalization_parameters state NormalizationParameters
        :param min_action_range_tensor_serving pytorch tensor that specifies
            min action value for each dimension
        :param max_action_range_tensor_serving pytorch tensor that specifies
            min action value for each dimension
        :param state_normalization_parameters state NormalizationParameters
        :param int_features boolean indicating if int features blob will be present
        :param model_on_gpu boolean indicating if the model is a GPU model or CPU model
        """
        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        C2.set_model(model)
        parameters: List[str] = []

        workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32))
        workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64))
        workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32))

        input_feature_lengths = "input_feature_lengths"
        input_feature_keys = "input_feature_keys"
        input_feature_values = "input_feature_values"

        if int_features:
            workspace.FeedBlob(
                "input/int_features.lengths", np.zeros(1, dtype=np.int32)
            )
            workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64))
            workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32))
            C2.net().Cast(
                ["input/int_features.values"],
                ["input/int_features.values_float"],
                dtype=caffe2_pb2.TensorProto.FLOAT,
            )
            C2.net().MergeMultiScalarFeatureTensors(
                [
                    "input/float_features.lengths",
                    "input/float_features.keys",
                    "input/float_features.values",
                    "input/int_features.lengths",
                    "input/int_features.keys",
                    "input/int_features.values_float",
                ],
                [input_feature_lengths, input_feature_keys, input_feature_values],
            )
        else:
            C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths])
            C2.net().Copy(["input/float_features.keys"], [input_feature_keys])
            C2.net().Copy(["input/float_features.values"], [input_feature_values])

        preprocessor = PreprocessorNet()
        sorted_features, _ = sort_features_by_normalization(
            state_normalization_parameters
        )
        state_dense_matrix, new_parameters = sparse_to_dense(
            input_feature_lengths,
            input_feature_keys,
            input_feature_values,
            sorted_features,
        )
        parameters.extend(new_parameters)
        state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix(
            state_dense_matrix,
            sorted_features,
            state_normalization_parameters,
            "state_norm",
            False,
        )
        parameters.extend(new_parameters)

        torch_init_net, torch_predict_net, new_parameters, actor_input_blob, actor_output_blob, min_action_training_blob, max_action_training_blob, min_action_serving_blob, max_action_serving_blob = DDPGPredictor.generate_train_net(
            trainer,
            model,
            min_action_range_tensor_serving,
            max_action_range_tensor_serving,
            model_on_gpu,
        )
        parameters.extend(new_parameters)
        net.Copy([state_normalized_dense_matrix], [actor_input_blob])

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(torch_init_net)

        net.AppendNet(torch_predict_net)

        # Scale actors actions from [-1, 1] to serving range
        prev_range = C2.Sub(max_action_training_blob, min_action_training_blob)
        new_range = C2.Sub(max_action_serving_blob, min_action_serving_blob)
        subtract_prev_min = C2.Sub(actor_output_blob, min_action_training_blob)
        div_by_prev_range = C2.Div(subtract_prev_min, prev_range)
        scaled_for_serving_actions = C2.Add(
            C2.Mul(div_by_prev_range, new_range), min_action_serving_blob
        )

        output_lengths = "output/float_features.lengths"
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        C2.net().ConstantFill(
            [C2.FlattenToVec(C2.ArgMax(actor_output_blob))],
            [output_lengths],
            value=trainer.actor.layers[-1].out_features,
            dtype=caffe2_pb2.TensorProto.INT32,
        )

        action_feature_ids_blob = C2.NextBlob("action_feature_ids")
        workspace.FeedBlob(
            action_feature_ids_blob, np.array(action_feature_ids, dtype=np.int64)
        )
        parameters.append(action_feature_ids_blob)

        output_keys = "output/float_features.keys"
        workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64))
        num_examples, _ = C2.Reshape(C2.Size("input/float_features.lengths"), shape=[1])
        C2.net().Tile([action_feature_ids_blob, num_examples], [output_keys], axis=1)

        output_values = "output/float_features.values"
        workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32))
        C2.net().FlattenToVec([scaled_for_serving_actions], [output_values])

        workspace.CreateNet(net)
        return DDPGPredictor(net, torch_init_net, parameters, int_features)