def test_preprocessing_network(self): feature_value_map = preprocessing_util.read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( values ) test_features = self.preprocess( feature_value_map, normalization_parameters ) net = core.Net("PreprocessingTestNet") preprocessor = PreprocessorNet(net, False) for feature_name in feature_value_map: workspace.FeedBlob(feature_name, np.array([0], dtype=np.int32)) preprocessor.preprocess_blob( feature_name, normalization_parameters[feature_name] ) workspace.CreateNet(net) for feature_name in feature_value_map: workspace.FeedBlob(feature_name, feature_value_map[feature_name]) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob( feature_name + "_preprocessed" ) tolerance = 0.01 if feature_name == 'boxcox': # At the limit, boxcox has some numerical instability tolerance = 0.1 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ), '{} does not match: {} {}'.format( feature_name, normalized_features[non_matching].tolist(), test_features[feature_name][non_matching].tolist() ) )
def test_preprocessing_network(self): feature_value_map = preprocessing_util.read_data() types = identify_types.identify_types_dict(feature_value_map) normalization_parameters = normalization.identify_parameters( feature_value_map, types) test_features = self.preprocess(feature_value_map, normalization_parameters) test_features[u'186'] = 0 net = core.Net("PreprocessingTestNet") preprocessor = PreprocessorNet(net, False) for feature_name in feature_value_map: workspace.FeedBlob(feature_name, np.array([0], dtype=np.int32)) preprocessor.preprocess_blob( feature_name, normalization_parameters[feature_name]) workspace.CreateNet(net) for feature_name in feature_value_map: if feature_name != u'186': workspace.FeedBlob( feature_name, feature_value_map[feature_name].astype(np.float32)) else: workspace.FeedBlob( feature_name, normalization.MISSING_VALUE * np.ones(1, dtype=np.float32)) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob(feature_name + "_preprocessed") self.assertTrue( np.all( np.isclose(normalized_features, test_features[feature_name]))) for feature_name in feature_value_map: if feature_name != u'186': workspace.FeedBlob( feature_name, feature_value_map[feature_name].astype(np.float32)) else: workspace.FeedBlob( feature_name, normalization.MISSING_VALUE * np.ones(1, dtype=np.float32)) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob(feature_name + "_preprocessed") self.assertTrue( np.all( np.isclose(normalized_features, test_features[feature_name])))
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, feature_type=self._feature_type_override(name)) test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters) net = core.Net("PreprocessingTestNet") C2.set_net(net) preprocessor = PreprocessorNet() name_preprocessed_blob_map = {} for feature_name in feature_value_map: workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32)) preprocessed_blob, _ = preprocessor.preprocess_blob( str(feature_name), [normalization_parameters[feature_name]]) name_preprocessed_blob_map[feature_name] = preprocessed_blob workspace.CreateNet(net) for feature_name, feature_value in six.iteritems(feature_value_map): feature_value = np.expand_dims(feature_value, -1) workspace.FeedBlob(str(feature_name), feature_value) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob( name_preprocessed_blob_map[feature_name]) if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ))) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, )), "{} does not match: {} {}".format( feature_name, normalized_features[non_matching].tolist(), test_features[feature_name][non_matching].tolist(), ), )
def from_trainers(cls, trainer, state_features, action_features, state_normalization_parameters, action_normalization_parameters): """ Creates DiscreteActionPredictor from a list of action trainers :param trainer DiscreteActionTrainer :param state_features list of state feature names :param action_features list of action feature names """ # ensure state and action IDs have no intersection assert (len(set(state_features) & set(action_features)) == 0) normalization_parameters = dict( list(state_normalization_parameters.items()) + list(action_normalization_parameters.items())) input_blobs = state_features + action_features model = model_helper.ModelHelper(name="predictor") net = model.net normalizer = PreprocessorNet(net, True) parameters = list(normalizer.parameters[:]) normalized_input_blobs = [] zero = "ZERO_from_trainers" workspace.FeedBlob(zero, np.array(0)) parameters.append(zero) for input_blob in input_blobs: workspace.FeedBlob(input_blob, MISSING_VALUE * np.ones(1, dtype=np.float32)) reshaped_input_blob = input_blob + "_reshaped" net.Reshape([input_blob], [reshaped_input_blob, input_blob + "_original_shape"], shape=[-1, 1]) normalized_input_blob, blob_parameters = normalizer.preprocess_blob( reshaped_input_blob, normalization_parameters[input_blob]) parameters.extend(blob_parameters) normalized_input_blobs.append(normalized_input_blob) concatenated_input_blob = "PredictorInput" output_dim = "PredictorOutputDim" for i, inp in enumerate(normalized_input_blobs): logger.info("input# {}: {}".format(i, inp)) net.Concat(normalized_input_blobs, [concatenated_input_blob, output_dim], axis=1) q_values = "Q" workspace.FeedBlob(q_values, np.zeros(1, dtype=np.float32)) trainer.build_predictor(model, concatenated_input_blob, q_values) parameters.extend(model.GetAllParams()) for input_blob in input_blobs: net.ConstantFill([input_blob], [input_blob], value=MISSING_VALUE, dtype=core.DataType.FLOAT) output_blobs = [q_values] workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(net) predictor = cls(net, input_blobs, output_blobs, parameters, workspace.CurrentWorkspace()) return predictor
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, feature_type=self._feature_type_override(name) ) test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters ) net = core.Net("PreprocessingTestNet") C2.set_net(net) preprocessor = PreprocessorNet() name_preprocessed_blob_map = {} for feature_name in feature_value_map: workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32)) preprocessed_blob, _ = preprocessor.preprocess_blob( str(feature_name), [normalization_parameters[feature_name]] ) name_preprocessed_blob_map[feature_name] = preprocessed_blob workspace.CreateNet(net) for feature_name, feature_value in six.iteritems(feature_value_map): feature_value = np.expand_dims(feature_value, -1) workspace.FeedBlob(str(feature_name), feature_value) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob( name_preprocessed_blob_map[feature_name] ) if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ), "{} does not match: {} {}".format( feature_name, normalized_features[non_matching].tolist(), test_features[feature_name][non_matching].tolist(), ), )
def from_trainers(cls, trainer, features, actions, normalization_parameters): """ Creates DiscreteActionPredictor from a list of action trainers :param trainer DiscreteActionTrainer :param features list of state feature names :param actions list of action names """ int_features = [int(feature) for feature in features] inputs = [ 'input/float_features.lengths', 'input/float_features.keys', 'input/float_features.values' ] workspace.FeedBlob('input/float_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/float_features.keys', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/float_features.values', np.zeros(1, dtype=np.float32)) model = model_helper.ModelHelper(name="predictor") net = model.net dense_input = net.NextBlob('dense_input') workspace.FeedBlob(dense_input, np.zeros(1, dtype=np.float32)) default_input_value = net.NextBlob('default_input_value') workspace.FeedBlob(default_input_value, np.array([MISSING_VALUE], dtype=np.float32)) net.GivenTensorFill([], [default_input_value], shape=[], values=[MISSING_VALUE]) net.SparseToDenseMask([ 'input/float_features.keys', 'input/float_features.values', default_input_value, 'input/float_features.lengths', ], [dense_input], mask=int_features) for i, feature in enumerate(features): net.Slice( [dense_input], [feature], starts=[0, i], ends=[-1, (i + 1)], ) normalizer = PreprocessorNet(net, True) parameters = list(normalizer.parameters[:]) parameters.append(default_input_value) normalized_input_blobs = [] zero = "ZERO_from_trainers" workspace.FeedBlob(zero, np.array(0)) parameters.append(zero) for feature in features: normalized_input_blob, blob_parameters = normalizer.preprocess_blob( feature, normalization_parameters[feature], ) parameters.extend(blob_parameters) normalized_input_blobs.append(normalized_input_blob) concatenated_input_blob = "PredictorInput" output_dim = "PredictorOutputDim" for i, inp in enumerate(normalized_input_blobs): logger.info("input# {}: {}".format(i, inp)) net.Concat(normalized_input_blobs, [concatenated_input_blob, output_dim], axis=1) net.NanCheck(concatenated_input_blob, concatenated_input_blob) q_values = "q_values" workspace.FeedBlob(q_values, np.zeros(1, dtype=np.float32)) trainer.build_predictor(model, concatenated_input_blob, q_values) parameters.extend(model.GetAllParams()) action_names = net.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) action_range = net.NextBlob("action_range") parameters.append(action_range) workspace.FeedBlob(action_range, np.array(list(range(len(actions))))) output_shape = net.NextBlob("output_shape") workspace.FeedBlob(output_shape, np.zeros(1, dtype=np.int64)) net.Shape([q_values], [output_shape]) output_shape_row_count = net.NextBlob("output_shape_row_count") net.Slice([output_shape], [output_shape_row_count], starts=[0], ends=[1]) output_row_shape = net.NextBlob("output_row_shape") workspace.FeedBlob(output_row_shape, np.zeros(1, dtype=np.int64)) net.Slice([q_values], [output_row_shape], starts=[0, 0], ends=[-1, 1]) output_feature_keys = 'output/string_weighted_multi_categorical_features.keys' workspace.FeedBlob(output_feature_keys, np.zeros(1, dtype=np.int64)) output_feature_keys_matrix = net.NextBlob('output_feature_keys_matrix') net.ConstantFill([output_row_shape], [output_feature_keys_matrix], value=0, dtype=caffe2_pb2.TensorProto.INT64) net.FlattenToVec( [output_feature_keys_matrix], [output_feature_keys], ) output_feature_lengths = \ 'output/string_weighted_multi_categorical_features.lengths' workspace.FeedBlob(output_feature_lengths, np.zeros(1, dtype=np.int32)) output_feature_lengths_matrix = net.NextBlob( 'output_feature_lengths_matrix') net.ConstantFill([output_row_shape], [output_feature_lengths_matrix], value=1, dtype=caffe2_pb2.TensorProto.INT32) net.FlattenToVec( [output_feature_lengths_matrix], [output_feature_lengths], ) output_keys = 'output/string_weighted_multi_categorical_features.values.keys' workspace.FeedBlob(output_keys, np.array(['a'])) net.Tile([action_names, output_shape_row_count], [output_keys], axis=1) output_lengths_matrix = net.NextBlob('output_lengths_matrix') net.ConstantFill([output_row_shape], [output_lengths_matrix], value=len(actions), dtype=caffe2_pb2.TensorProto.INT32) output_lengths = \ 'output/string_weighted_multi_categorical_features.values.lengths' workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) net.FlattenToVec( [output_lengths_matrix], [output_lengths], ) output_values = \ 'output/string_weighted_multi_categorical_features.values.values' workspace.FeedBlob(output_values, np.array([1.0])) net.FlattenToVec([q_values], [output_values]) output_blobs = [ output_feature_keys, output_feature_lengths, output_keys, output_lengths, output_values, ] workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(net) predictor = cls(net, inputs, output_blobs, parameters, workspace.CurrentWorkspace()) return predictor
def from_trainers(cls, trainer, features, actions, normalization_parameters): """ Creates DiscreteActionPredictor from a list of action trainers :param trainer DiscreteActionTrainer :param features list of state feature names :param actions list of action names """ input_blobs = features[:] model = model_helper.ModelHelper(name="predictor") net = model.net normalizer = PreprocessorNet(net, True) parameters = list(normalizer.parameters[:]) normalized_input_blobs = [] zero = "ZERO_from_trainers" workspace.FeedBlob(zero, np.array(0)) parameters.append(zero) for input_blob in input_blobs: workspace.FeedBlob(input_blob, MISSING_VALUE * np.ones(1, dtype=np.float32)) reshaped_input_blob = input_blob + "_reshaped" net.Reshape([input_blob], [reshaped_input_blob, input_blob + "_original_shape"], shape=[-1, 1]) normalized_input_blob, blob_parameters = normalizer.preprocess_blob( reshaped_input_blob, normalization_parameters[input_blob]) parameters.extend(blob_parameters) normalized_input_blobs.append(normalized_input_blob) concatenated_input_blob = "PredictorInput" output_dim = "PredictorOutputDim" for i, inp in enumerate(normalized_input_blobs): logger.info("input# {}: {}".format(i, inp)) net.Concat(normalized_input_blobs, [concatenated_input_blob, output_dim], axis=1) net.NanCheck(concatenated_input_blob, concatenated_input_blob) q_values = "q_values" workspace.FeedBlob(q_values, np.zeros(1, dtype=np.float32)) trainer.build_predictor(model, concatenated_input_blob, q_values) parameters.extend(model.GetAllParams()) output_blobs = [] for i, action_output in enumerate(actions): workspace.FeedBlob(action_output, np.zeros(1, dtype=np.float32)) net.Slice(q_values, action_output, starts=np.array([0, i], dtype=np.int32), ends=np.array([-1, i + 1], dtype=np.int32)) output_blobs.append(action_output) for input_blob in input_blobs: net.ConstantFill([input_blob], [input_blob], value=MISSING_VALUE, dtype=core.DataType.FLOAT) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(net) predictor = cls(net, input_blobs, output_blobs, parameters, workspace.CurrentWorkspace()) return predictor
def from_trainers(cls, trainer, state_features, action_features, state_normalization_parameters, action_normalization_parameters): """ Creates DiscreteActionPredictor from a list of action trainers :param trainer DiscreteActionTrainer :param state_features list of state feature names :param action_features list of action feature names """ # ensure state and action IDs have no intersection assert (len(set(state_features) & set(action_features)) == 0) normalization_parameters = dict( list(state_normalization_parameters.items()) + list(action_normalization_parameters.items())) features = state_features + action_features int_features = [int(feature) for feature in features] inputs = [ 'input/float_features.lengths', 'input/float_features.keys', 'input/float_features.values' ] workspace.FeedBlob('input/float_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/float_features.keys', np.zeros(1, dtype=np.int64)) workspace.FeedBlob('input/float_features.values', np.zeros(1, dtype=np.float32)) model = model_helper.ModelHelper(name="predictor") net = model.net dense_input = net.NextBlob('dense_input') default_input_value = net.NextBlob('default_input_value') net.GivenTensorFill([], [default_input_value], shape=[], values=[MISSING_VALUE]) net.SparseToDenseMask([ 'input/float_features.keys', 'input/float_features.values', default_input_value, 'input/float_features.lengths', ], [dense_input], mask=int_features) for i, feature in enumerate(features): net.Slice( [dense_input], [feature], starts=[0, i], ends=[-1, (i + 1)], ) normalizer = PreprocessorNet(net, True) parameters = list(normalizer.parameters[:]) normalized_input_blobs = [] zero = "ZERO_from_trainers" workspace.FeedBlob(zero, np.array(0)) parameters.append(zero) for feature in features: normalized_input_blob, blob_parameters = normalizer.preprocess_blob( feature, normalization_parameters[feature], ) parameters.extend(blob_parameters) normalized_input_blobs.append(normalized_input_blob) concatenated_input_blob = "PredictorInput" output_dim = "PredictorOutputDim" for i, inp in enumerate(normalized_input_blobs): logger.info("input# {}: {}".format(i, inp)) net.Concat(normalized_input_blobs, [concatenated_input_blob, output_dim], axis=1) net.NanCheck(concatenated_input_blob, concatenated_input_blob) q_lengths = "output/string_weighted_multi_categorical_features.values.lengths" workspace.FeedBlob(q_lengths, np.array([1], dtype=np.int32)) q_keys = "output/string_weighted_multi_categorical_features.values.keys" workspace.FeedBlob(q_keys, np.array(['a'])) q_values_matrix = net.NextBlob('q_values_matrix') trainer.build_predictor(model, concatenated_input_blob, q_values_matrix) parameters.extend(model.GetAllParams()) q_values = 'output/string_weighted_multi_categorical_features.values.values' workspace.FeedBlob(q_values, np.array([1.0])) net.FlattenToVec([q_values_matrix], [q_values]) net.ConstantFill([q_values], [q_keys], value="Q", dtype=caffe2_pb2.TensorProto.STRING) net.ConstantFill([q_values], [q_lengths], value=1, dtype=caffe2_pb2.TensorProto.INT32) q_feature_lengths = "output/string_weighted_multi_categorical_features.lengths" workspace.FeedBlob(q_feature_lengths, np.array([1], dtype=np.int32)) net.ConstantFill([q_values], [q_feature_lengths], value=1, dtype=caffe2_pb2.TensorProto.INT32) q_feature_keys = "output/string_weighted_multi_categorical_features.keys" workspace.FeedBlob(q_feature_keys, np.array([0], dtype=np.int64)) net.ConstantFill([q_values], [q_feature_keys], value=0, dtype=caffe2_pb2.TensorProto.INT64) output_blobs = [ q_feature_lengths, q_feature_keys, q_lengths, q_keys, q_values, ] workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(net) predictor = cls(net, inputs, output_blobs, parameters, workspace.CurrentWorkspace()) return predictor