def test_preprocessing_network(self):
        feature_value_map = preprocessing_util.read_data()
        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                values
            )
        test_features = self.preprocess(
            feature_value_map, normalization_parameters
        )

        net = core.Net("PreprocessingTestNet")
        preprocessor = PreprocessorNet(net, False)
        for feature_name in feature_value_map:
            workspace.FeedBlob(feature_name, np.array([0], dtype=np.int32))
            preprocessor.preprocess_blob(
                feature_name, normalization_parameters[feature_name]
            )

        workspace.CreateNet(net)

        for feature_name in feature_value_map:
            workspace.FeedBlob(feature_name, feature_value_map[feature_name])
        workspace.RunNetOnce(net)

        for feature_name in feature_value_map:
            normalized_features = workspace.FetchBlob(
                feature_name + "_preprocessed"
            )
            tolerance = 0.01
            if feature_name == 'boxcox':
                # At the limit, boxcox has some numerical instability
                tolerance = 0.1
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )
                )
            )
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )
                ), '{} does not match: {} {}'.format(
                    feature_name, normalized_features[non_matching].tolist(),
                    test_features[feature_name][non_matching].tolist()
                )
            )
Esempio n. 2
0
    def test_preprocessing_network(self):
        feature_value_map = preprocessing_util.read_data()
        types = identify_types.identify_types_dict(feature_value_map)
        normalization_parameters = normalization.identify_parameters(
            feature_value_map, types)
        test_features = self.preprocess(feature_value_map,
                                        normalization_parameters)
        test_features[u'186'] = 0

        net = core.Net("PreprocessingTestNet")
        preprocessor = PreprocessorNet(net, False)
        for feature_name in feature_value_map:
            workspace.FeedBlob(feature_name, np.array([0], dtype=np.int32))
            preprocessor.preprocess_blob(
                feature_name, normalization_parameters[feature_name])

        workspace.CreateNet(net)

        for feature_name in feature_value_map:
            if feature_name != u'186':
                workspace.FeedBlob(
                    feature_name,
                    feature_value_map[feature_name].astype(np.float32))
            else:
                workspace.FeedBlob(
                    feature_name,
                    normalization.MISSING_VALUE * np.ones(1, dtype=np.float32))
        workspace.RunNetOnce(net)

        for feature_name in feature_value_map:
            normalized_features = workspace.FetchBlob(feature_name +
                                                      "_preprocessed")
            self.assertTrue(
                np.all(
                    np.isclose(normalized_features,
                               test_features[feature_name])))
        for feature_name in feature_value_map:
            if feature_name != u'186':
                workspace.FeedBlob(
                    feature_name,
                    feature_value_map[feature_name].astype(np.float32))
            else:
                workspace.FeedBlob(
                    feature_name,
                    normalization.MISSING_VALUE * np.ones(1, dtype=np.float32))
        workspace.RunNetOnce(net)

        for feature_name in feature_value_map:
            normalized_features = workspace.FetchBlob(feature_name +
                                                      "_preprocessed")

            self.assertTrue(
                np.all(
                    np.isclose(normalized_features,
                               test_features[feature_name])))
Esempio n. 3
0
    def test_preprocessing_network(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                name, values, feature_type=self._feature_type_override(name))
        test_features = NumpyFeatureProcessor.preprocess(
            feature_value_map, normalization_parameters)

        net = core.Net("PreprocessingTestNet")
        C2.set_net(net)
        preprocessor = PreprocessorNet()
        name_preprocessed_blob_map = {}
        for feature_name in feature_value_map:
            workspace.FeedBlob(str(feature_name), np.array([0],
                                                           dtype=np.int32))
            preprocessed_blob, _ = preprocessor.preprocess_blob(
                str(feature_name), [normalization_parameters[feature_name]])
            name_preprocessed_blob_map[feature_name] = preprocessed_blob

        workspace.CreateNet(net)

        for feature_name, feature_value in six.iteritems(feature_value_map):
            feature_value = np.expand_dims(feature_value, -1)
            workspace.FeedBlob(str(feature_name), feature_value)
        workspace.RunNetOnce(net)

        for feature_name in feature_value_map:
            normalized_features = workspace.FetchBlob(
                name_preprocessed_blob_map[feature_name])
            if feature_name != ENUM_FEATURE_ID:
                normalized_features = np.squeeze(normalized_features, -1)

            tolerance = 0.01
            if feature_name == BOXCOX_FEATURE_ID:
                # At the limit, boxcox has some numerical instability
                tolerance = 0.5
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )))
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )),
                "{} does not match: {} {}".format(
                    feature_name,
                    normalized_features[non_matching].tolist(),
                    test_features[feature_name][non_matching].tolist(),
                ),
            )
Esempio n. 4
0
    def from_trainers(cls, trainer, state_features, action_features,
                      state_normalization_parameters,
                      action_normalization_parameters):
        """ Creates DiscreteActionPredictor from a list of action trainers

        :param trainer DiscreteActionTrainer
        :param state_features list of state feature names
        :param action_features list of action feature names
        """
        # ensure state and action IDs have no intersection
        assert (len(set(state_features) & set(action_features)) == 0)
        normalization_parameters = dict(
            list(state_normalization_parameters.items()) +
            list(action_normalization_parameters.items()))
        input_blobs = state_features + action_features

        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        normalizer = PreprocessorNet(net, True)
        parameters = list(normalizer.parameters[:])
        normalized_input_blobs = []
        zero = "ZERO_from_trainers"
        workspace.FeedBlob(zero, np.array(0))
        parameters.append(zero)
        for input_blob in input_blobs:
            workspace.FeedBlob(input_blob,
                               MISSING_VALUE * np.ones(1, dtype=np.float32))
            reshaped_input_blob = input_blob + "_reshaped"
            net.Reshape([input_blob],
                        [reshaped_input_blob, input_blob + "_original_shape"],
                        shape=[-1, 1])
            normalized_input_blob, blob_parameters = normalizer.preprocess_blob(
                reshaped_input_blob, normalization_parameters[input_blob])
            parameters.extend(blob_parameters)
            normalized_input_blobs.append(normalized_input_blob)

        concatenated_input_blob = "PredictorInput"
        output_dim = "PredictorOutputDim"
        for i, inp in enumerate(normalized_input_blobs):
            logger.info("input# {}: {}".format(i, inp))
        net.Concat(normalized_input_blobs,
                   [concatenated_input_blob, output_dim],
                   axis=1)

        q_values = "Q"
        workspace.FeedBlob(q_values, np.zeros(1, dtype=np.float32))
        trainer.build_predictor(model, concatenated_input_blob, q_values)
        parameters.extend(model.GetAllParams())

        for input_blob in input_blobs:
            net.ConstantFill([input_blob], [input_blob],
                             value=MISSING_VALUE,
                             dtype=core.DataType.FLOAT)

        output_blobs = [q_values]

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net)
        predictor = cls(net, input_blobs, output_blobs, parameters,
                        workspace.CurrentWorkspace())
        return predictor
Esempio n. 5
0
    def test_preprocessing_network(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                name, values, feature_type=self._feature_type_override(name)
            )
        test_features = NumpyFeatureProcessor.preprocess(
            feature_value_map, normalization_parameters
        )

        net = core.Net("PreprocessingTestNet")
        C2.set_net(net)
        preprocessor = PreprocessorNet()
        name_preprocessed_blob_map = {}
        for feature_name in feature_value_map:
            workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32))
            preprocessed_blob, _ = preprocessor.preprocess_blob(
                str(feature_name), [normalization_parameters[feature_name]]
            )
            name_preprocessed_blob_map[feature_name] = preprocessed_blob

        workspace.CreateNet(net)

        for feature_name, feature_value in six.iteritems(feature_value_map):
            feature_value = np.expand_dims(feature_value, -1)
            workspace.FeedBlob(str(feature_name), feature_value)
        workspace.RunNetOnce(net)

        for feature_name in feature_value_map:
            normalized_features = workspace.FetchBlob(
                name_preprocessed_blob_map[feature_name]
            )
            if feature_name != ENUM_FEATURE_ID:
                normalized_features = np.squeeze(normalized_features, -1)

            tolerance = 0.01
            if feature_name == BOXCOX_FEATURE_ID:
                # At the limit, boxcox has some numerical instability
                tolerance = 0.5
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )
                )
            )
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )
                ),
                "{} does not match: {} {}".format(
                    feature_name,
                    normalized_features[non_matching].tolist(),
                    test_features[feature_name][non_matching].tolist(),
                ),
            )
Esempio n. 6
0
    def from_trainers(cls, trainer, features, actions,
                      normalization_parameters):
        """ Creates DiscreteActionPredictor from a list of action trainers

        :param trainer DiscreteActionTrainer
        :param features list of state feature names
        :param actions list of action names
        """
        int_features = [int(feature) for feature in features]
        inputs = [
            'input/float_features.lengths', 'input/float_features.keys',
            'input/float_features.values'
        ]
        workspace.FeedBlob('input/float_features.lengths',
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob('input/float_features.keys',
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob('input/float_features.values',
                           np.zeros(1, dtype=np.float32))
        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        dense_input = net.NextBlob('dense_input')
        workspace.FeedBlob(dense_input, np.zeros(1, dtype=np.float32))
        default_input_value = net.NextBlob('default_input_value')
        workspace.FeedBlob(default_input_value,
                           np.array([MISSING_VALUE], dtype=np.float32))
        net.GivenTensorFill([], [default_input_value],
                            shape=[],
                            values=[MISSING_VALUE])
        net.SparseToDenseMask([
            'input/float_features.keys',
            'input/float_features.values',
            default_input_value,
            'input/float_features.lengths',
        ], [dense_input],
                              mask=int_features)
        for i, feature in enumerate(features):
            net.Slice(
                [dense_input],
                [feature],
                starts=[0, i],
                ends=[-1, (i + 1)],
            )
        normalizer = PreprocessorNet(net, True)
        parameters = list(normalizer.parameters[:])
        parameters.append(default_input_value)
        normalized_input_blobs = []
        zero = "ZERO_from_trainers"
        workspace.FeedBlob(zero, np.array(0))
        parameters.append(zero)
        for feature in features:
            normalized_input_blob, blob_parameters = normalizer.preprocess_blob(
                feature,
                normalization_parameters[feature],
            )
            parameters.extend(blob_parameters)
            normalized_input_blobs.append(normalized_input_blob)

        concatenated_input_blob = "PredictorInput"
        output_dim = "PredictorOutputDim"
        for i, inp in enumerate(normalized_input_blobs):
            logger.info("input# {}: {}".format(i, inp))
        net.Concat(normalized_input_blobs,
                   [concatenated_input_blob, output_dim],
                   axis=1)
        net.NanCheck(concatenated_input_blob, concatenated_input_blob)

        q_values = "q_values"
        workspace.FeedBlob(q_values, np.zeros(1, dtype=np.float32))
        trainer.build_predictor(model, concatenated_input_blob, q_values)
        parameters.extend(model.GetAllParams())

        action_names = net.NextBlob("action_names")
        parameters.append(action_names)
        workspace.FeedBlob(action_names, np.array(actions))
        action_range = net.NextBlob("action_range")
        parameters.append(action_range)
        workspace.FeedBlob(action_range, np.array(list(range(len(actions)))))

        output_shape = net.NextBlob("output_shape")
        workspace.FeedBlob(output_shape, np.zeros(1, dtype=np.int64))
        net.Shape([q_values], [output_shape])
        output_shape_row_count = net.NextBlob("output_shape_row_count")
        net.Slice([output_shape], [output_shape_row_count],
                  starts=[0],
                  ends=[1])

        output_row_shape = net.NextBlob("output_row_shape")
        workspace.FeedBlob(output_row_shape, np.zeros(1, dtype=np.int64))
        net.Slice([q_values], [output_row_shape], starts=[0, 0], ends=[-1, 1])

        output_feature_keys = 'output/string_weighted_multi_categorical_features.keys'
        workspace.FeedBlob(output_feature_keys, np.zeros(1, dtype=np.int64))
        output_feature_keys_matrix = net.NextBlob('output_feature_keys_matrix')
        net.ConstantFill([output_row_shape], [output_feature_keys_matrix],
                         value=0,
                         dtype=caffe2_pb2.TensorProto.INT64)
        net.FlattenToVec(
            [output_feature_keys_matrix],
            [output_feature_keys],
        )

        output_feature_lengths = \
            'output/string_weighted_multi_categorical_features.lengths'
        workspace.FeedBlob(output_feature_lengths, np.zeros(1, dtype=np.int32))
        output_feature_lengths_matrix = net.NextBlob(
            'output_feature_lengths_matrix')
        net.ConstantFill([output_row_shape], [output_feature_lengths_matrix],
                         value=1,
                         dtype=caffe2_pb2.TensorProto.INT32)
        net.FlattenToVec(
            [output_feature_lengths_matrix],
            [output_feature_lengths],
        )

        output_keys = 'output/string_weighted_multi_categorical_features.values.keys'
        workspace.FeedBlob(output_keys, np.array(['a']))
        net.Tile([action_names, output_shape_row_count], [output_keys], axis=1)

        output_lengths_matrix = net.NextBlob('output_lengths_matrix')
        net.ConstantFill([output_row_shape], [output_lengths_matrix],
                         value=len(actions),
                         dtype=caffe2_pb2.TensorProto.INT32)
        output_lengths = \
            'output/string_weighted_multi_categorical_features.values.lengths'
        workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32))
        net.FlattenToVec(
            [output_lengths_matrix],
            [output_lengths],
        )

        output_values = \
            'output/string_weighted_multi_categorical_features.values.values'
        workspace.FeedBlob(output_values, np.array([1.0]))
        net.FlattenToVec([q_values], [output_values])

        output_blobs = [
            output_feature_keys,
            output_feature_lengths,
            output_keys,
            output_lengths,
            output_values,
        ]
        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net)
        predictor = cls(net, inputs, output_blobs, parameters,
                        workspace.CurrentWorkspace())
        return predictor
    def from_trainers(cls, trainer, features, actions,
                      normalization_parameters):
        """ Creates DiscreteActionPredictor from a list of action trainers

        :param trainer DiscreteActionTrainer
        :param features list of state feature names
        :param actions list of action names
        """
        input_blobs = features[:]
        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        normalizer = PreprocessorNet(net, True)
        parameters = list(normalizer.parameters[:])
        normalized_input_blobs = []
        zero = "ZERO_from_trainers"
        workspace.FeedBlob(zero, np.array(0))
        parameters.append(zero)
        for input_blob in input_blobs:
            workspace.FeedBlob(input_blob,
                               MISSING_VALUE * np.ones(1, dtype=np.float32))
            reshaped_input_blob = input_blob + "_reshaped"
            net.Reshape([input_blob],
                        [reshaped_input_blob, input_blob + "_original_shape"],
                        shape=[-1, 1])
            normalized_input_blob, blob_parameters = normalizer.preprocess_blob(
                reshaped_input_blob, normalization_parameters[input_blob])
            parameters.extend(blob_parameters)
            normalized_input_blobs.append(normalized_input_blob)

        concatenated_input_blob = "PredictorInput"
        output_dim = "PredictorOutputDim"
        for i, inp in enumerate(normalized_input_blobs):
            logger.info("input# {}: {}".format(i, inp))
        net.Concat(normalized_input_blobs,
                   [concatenated_input_blob, output_dim],
                   axis=1)
        net.NanCheck(concatenated_input_blob, concatenated_input_blob)

        q_values = "q_values"
        workspace.FeedBlob(q_values, np.zeros(1, dtype=np.float32))
        trainer.build_predictor(model, concatenated_input_blob, q_values)
        parameters.extend(model.GetAllParams())

        output_blobs = []
        for i, action_output in enumerate(actions):
            workspace.FeedBlob(action_output, np.zeros(1, dtype=np.float32))
            net.Slice(q_values,
                      action_output,
                      starts=np.array([0, i], dtype=np.int32),
                      ends=np.array([-1, i + 1], dtype=np.int32))
            output_blobs.append(action_output)
        for input_blob in input_blobs:
            net.ConstantFill([input_blob], [input_blob],
                             value=MISSING_VALUE,
                             dtype=core.DataType.FLOAT)

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net)
        predictor = cls(net, input_blobs, output_blobs, parameters,
                        workspace.CurrentWorkspace())
        return predictor
Esempio n. 8
0
    def from_trainers(cls, trainer, state_features, action_features,
                      state_normalization_parameters,
                      action_normalization_parameters):
        """ Creates DiscreteActionPredictor from a list of action trainers

        :param trainer DiscreteActionTrainer
        :param state_features list of state feature names
        :param action_features list of action feature names
        """
        # ensure state and action IDs have no intersection
        assert (len(set(state_features) & set(action_features)) == 0)
        normalization_parameters = dict(
            list(state_normalization_parameters.items()) +
            list(action_normalization_parameters.items()))
        features = state_features + action_features

        int_features = [int(feature) for feature in features]
        inputs = [
            'input/float_features.lengths', 'input/float_features.keys',
            'input/float_features.values'
        ]
        workspace.FeedBlob('input/float_features.lengths',
                           np.zeros(1, dtype=np.int32))
        workspace.FeedBlob('input/float_features.keys',
                           np.zeros(1, dtype=np.int64))
        workspace.FeedBlob('input/float_features.values',
                           np.zeros(1, dtype=np.float32))
        model = model_helper.ModelHelper(name="predictor")
        net = model.net
        dense_input = net.NextBlob('dense_input')
        default_input_value = net.NextBlob('default_input_value')
        net.GivenTensorFill([], [default_input_value],
                            shape=[],
                            values=[MISSING_VALUE])
        net.SparseToDenseMask([
            'input/float_features.keys',
            'input/float_features.values',
            default_input_value,
            'input/float_features.lengths',
        ], [dense_input],
                              mask=int_features)
        for i, feature in enumerate(features):
            net.Slice(
                [dense_input],
                [feature],
                starts=[0, i],
                ends=[-1, (i + 1)],
            )
        normalizer = PreprocessorNet(net, True)
        parameters = list(normalizer.parameters[:])
        normalized_input_blobs = []
        zero = "ZERO_from_trainers"
        workspace.FeedBlob(zero, np.array(0))
        parameters.append(zero)
        for feature in features:
            normalized_input_blob, blob_parameters = normalizer.preprocess_blob(
                feature,
                normalization_parameters[feature],
            )
            parameters.extend(blob_parameters)
            normalized_input_blobs.append(normalized_input_blob)

        concatenated_input_blob = "PredictorInput"
        output_dim = "PredictorOutputDim"
        for i, inp in enumerate(normalized_input_blobs):
            logger.info("input# {}: {}".format(i, inp))
        net.Concat(normalized_input_blobs,
                   [concatenated_input_blob, output_dim],
                   axis=1)
        net.NanCheck(concatenated_input_blob, concatenated_input_blob)

        q_lengths = "output/string_weighted_multi_categorical_features.values.lengths"
        workspace.FeedBlob(q_lengths, np.array([1], dtype=np.int32))
        q_keys = "output/string_weighted_multi_categorical_features.values.keys"
        workspace.FeedBlob(q_keys, np.array(['a']))
        q_values_matrix = net.NextBlob('q_values_matrix')
        trainer.build_predictor(model, concatenated_input_blob,
                                q_values_matrix)
        parameters.extend(model.GetAllParams())

        q_values = 'output/string_weighted_multi_categorical_features.values.values'
        workspace.FeedBlob(q_values, np.array([1.0]))
        net.FlattenToVec([q_values_matrix], [q_values])
        net.ConstantFill([q_values], [q_keys],
                         value="Q",
                         dtype=caffe2_pb2.TensorProto.STRING)
        net.ConstantFill([q_values], [q_lengths],
                         value=1,
                         dtype=caffe2_pb2.TensorProto.INT32)

        q_feature_lengths = "output/string_weighted_multi_categorical_features.lengths"
        workspace.FeedBlob(q_feature_lengths, np.array([1], dtype=np.int32))
        net.ConstantFill([q_values], [q_feature_lengths],
                         value=1,
                         dtype=caffe2_pb2.TensorProto.INT32)
        q_feature_keys = "output/string_weighted_multi_categorical_features.keys"
        workspace.FeedBlob(q_feature_keys, np.array([0], dtype=np.int64))
        net.ConstantFill([q_values], [q_feature_keys],
                         value=0,
                         dtype=caffe2_pb2.TensorProto.INT64)

        output_blobs = [
            q_feature_lengths,
            q_feature_keys,
            q_lengths,
            q_keys,
            q_values,
        ]

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(net)
        predictor = cls(net, inputs, output_blobs, parameters,
                        workspace.CurrentWorkspace())
        return predictor