def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, feature_type=self._feature_type_override(name)) test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters) net = core.Net("PreprocessingTestNet") C2.set_net(net) preprocessor = PreprocessorNet() name_preprocessed_blob_map = {} for feature_name in feature_value_map: workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32)) preprocessed_blob, _ = preprocessor.preprocess_blob( str(feature_name), [normalization_parameters[feature_name]]) name_preprocessed_blob_map[feature_name] = preprocessed_blob workspace.CreateNet(net) for feature_name, feature_value in six.iteritems(feature_value_map): feature_value = np.expand_dims(feature_value, -1) workspace.FeedBlob(str(feature_name), feature_value) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob( name_preprocessed_blob_map[feature_name]) if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ))) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, )), "{} does not match: {} {}".format( feature_name, normalized_features[non_matching].tolist(), test_features[feature_name][non_matching].tolist(), ), )
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} name_preprocessed_blob_map = {} for feature_name, feature_values in feature_value_map.items(): normalization_parameters[ feature_name] = normalization.identify_parameter( feature_values, feature_type=self._feature_type_override(feature_name)) feature_values[ 0] = MISSING_VALUE # Set one entry to MISSING_VALUE to test that preprocessor = Preprocessor( {feature_name: normalization_parameters[feature_name]}, False) feature_values_matrix = np.expand_dims(feature_values, -1) normalized_feature_values = preprocessor.forward( feature_values_matrix) name_preprocessed_blob_map[ feature_name] = normalized_feature_values.numpy() test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters) for feature_name in feature_value_map: normalized_features = name_preprocessed_blob_map[feature_name] if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features.flatten(), test_features[feature_name].flatten(), rtol=tolerance, atol=tolerance, ))) self.assertTrue( np.all( np.isclose( normalized_features.flatten(), test_features[feature_name].flatten(), rtol=tolerance, atol=tolerance, )), "{} does not match: {} \n!=\n {}".format( feature_name, normalized_features.flatten()[non_matching], test_features[feature_name].flatten()[non_matching], ), )
def expected_action_features(self, normalize): # Feature order: 12, 11, 13 dense = np.array( [[21, 20, MISSING_VALUE], [24, 23, 25], [27, MISSING_VALUE, 26]], dtype=np.float32, ) if normalize: dense = NumpyFeatureProcessor.preprocess_array( dense, [12, 11, 13], self.get_action_normalization_parameters() ) return dense
def expected_next_action_features(self, normalize): # Feature order: 12, 11, 13 dense = np.array( [[31, 30, 33], [34, MISSING_VALUE, 35], [MISSING_VALUE, 36, 37]], dtype=np.float32, ) if normalize: dense = NumpyFeatureProcessor.preprocess_array( dense, [12, 11, 13], self.get_action_normalization_parameters() ) return dense
def expected_action_features(self, normalize): # Feature order: 12, 11, 13 dense = np.array( [[21, 20, MISSING_VALUE], [24, 23, 25], [27, MISSING_VALUE, 26]], dtype=np.float32, ) if normalize: dense = NumpyFeatureProcessor.preprocess_array( dense, [12, 11, 13], self.get_action_normalization_parameters()) return dense
def expected_next_action_features(self, normalize): # Feature order: 12, 11, 13 dense = np.array( [[31, 30, 33], [34, MISSING_VALUE, 35], [MISSING_VALUE, 36, 37]], dtype=np.float32, ) if normalize: dense = NumpyFeatureProcessor.preprocess_array( dense, [12, 11, 13], self.get_action_normalization_parameters()) return dense
def expected_next_state_features(self, normalize): # Feature order: 1, 3, 2, 4 dense = np.array( [ [11, MISSING_VALUE, 10, MISSING_VALUE], [13, MISSING_VALUE, MISSING_VALUE, MISSING_VALUE], [MISSING_VALUE, 15, 14, 16], ], dtype=np.float32, ) if normalize: dense = NumpyFeatureProcessor.preprocess_array( dense, [1, 3, 2, 4], self.get_state_normalization_parameters()) return dense
def expected_next_state_features(self, normalize): # Feature order: 1, 3, 2, 4 dense = np.array( [ [11, MISSING_VALUE, 10, MISSING_VALUE], [13, MISSING_VALUE, MISSING_VALUE, MISSING_VALUE], [MISSING_VALUE, 15, 14, 16], ], dtype=np.float32, ) if normalize: dense = NumpyFeatureProcessor.preprocess_array( dense, [1, 3, 2, 4], self.get_state_normalization_parameters() ) return dense
def expected_possible_next_actions_features(self, normalize): # Feature order: 12, 11, 13 dense = np.array( [ [MISSING_VALUE, 40, MISSING_VALUE], [MISSING_VALUE, MISSING_VALUE, MISSING_VALUE], [41, MISSING_VALUE, MISSING_VALUE], [45, 43, 44], [MISSING_VALUE, MISSING_VALUE, MISSING_VALUE], [MISSING_VALUE, MISSING_VALUE, MISSING_VALUE], ], dtype=np.float32, ) if normalize: dense = NumpyFeatureProcessor.preprocess_array( dense, [12, 11, 13], self.get_action_normalization_parameters() ) return dense
def expected_possible_next_actions_features(self, normalize): # Feature order: 12, 11, 13 dense = np.array( [ [MISSING_VALUE, 40, MISSING_VALUE], [MISSING_VALUE, MISSING_VALUE, MISSING_VALUE], [41, MISSING_VALUE, MISSING_VALUE], [45, 43, 44], [MISSING_VALUE, MISSING_VALUE, MISSING_VALUE], [MISSING_VALUE, MISSING_VALUE, MISSING_VALUE], ], dtype=np.float32, ) if normalize: dense = NumpyFeatureProcessor.preprocess_array( dense, [12, 11, 13], self.get_action_normalization_parameters()) return dense
def test_prepare_normalization_and_normalize(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, 10, feature_type=self._feature_type_override(name)) for k, v in normalization_parameters.items(): if id_to_type(k) == CONTINUOUS: self.assertEqual(v.feature_type, CONTINUOUS) self.assertIs(v.boxcox_lambda, None) self.assertIs(v.boxcox_shift, None) elif id_to_type(k) == BOXCOX: self.assertEqual(v.feature_type, BOXCOX) self.assertIsNot(v.boxcox_lambda, None) self.assertIsNot(v.boxcox_shift, None) else: assert v.feature_type == id_to_type(k) sorted_features, _ = sort_features_by_normalization( normalization_parameters) norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32) for i, feature in enumerate(sorted_features): input_matrix[:, i] = feature_value_map[feature] input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32)) output_blob, _ = preprocessor.normalize_dense_matrix( input_matrix_blob, sorted_features, normalization_parameters, "", False) workspace.FeedBlob(input_matrix_blob, input_matrix) workspace.RunNetOnce(norm_net) normalized_feature_matrix = workspace.FetchBlob(output_blob) normalized_features = {} on_column = 0 for feature in sorted_features: norm = normalization_parameters[feature] if norm.feature_type == ENUM: column_size = len(norm.possible_values) else: column_size = 1 normalized_features[ feature] = normalized_feature_matrix[:, on_column:(on_column + column_size)] on_column += column_size self.assertTrue( all([ np.isfinite(parameter.stddev) and np.isfinite(parameter.mean) for parameter in normalization_parameters.values() ])) for k, v in six.iteritems(normalized_features): self.assertTrue(np.all(np.isfinite(v))) feature_type = normalization_parameters[k].feature_type if feature_type == identify_types.PROBABILITY: sigmoidv = special.expit(v) self.assertTrue( np.all( np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1)))) elif feature_type == identify_types.ENUM: possible_values = normalization_parameters[k].possible_values self.assertEqual(v.shape[0], len(feature_value_map[k])) self.assertEqual(v.shape[1], len(possible_values)) possible_value_map = {} for i, possible_value in enumerate(possible_values): possible_value_map[possible_value] = i for i, row in enumerate(v): original_feature = feature_value_map[k][i] self.assertEqual(possible_value_map[original_feature], np.where(row == 1)[0][0]) elif feature_type == identify_types.QUANTILE: for i, feature in enumerate(v[0]): original_feature = feature_value_map[k][i] expected = NumpyFeatureProcessor.value_to_quantile( original_feature, normalization_parameters[k].quantiles) self.assertAlmostEqual(feature, expected, 2) elif feature_type == identify_types.BINARY: pass elif (feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX): one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01) zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01) zero_mean = np.isclose(np.mean(v), 0, atol=0.01) self.assertTrue( np.all(zero_mean), "mean of feature {} is {}, not 0".format(k, np.mean(v)), ) self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev))) elif feature_type == identify_types.CONTINUOUS_ACTION: less_than_max = v < 1 more_than_min = v > -1 self.assertTrue( np.all(less_than_max), "values are not less than 1: {}".format( v[less_than_max == False]), ) self.assertTrue( np.all(more_than_min), "values are not more than -1: {}".format( v[more_than_min == False]), ) else: raise NotImplementedError()
def test_prepare_normalization_and_normalize(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, 10, feature_type=self._feature_type_override(name) ) for k, v in normalization_parameters.items(): if id_to_type(k) == CONTINUOUS: self.assertEqual(v.feature_type, CONTINUOUS) self.assertIs(v.boxcox_lambda, None) self.assertIs(v.boxcox_shift, None) elif id_to_type(k) == BOXCOX: self.assertEqual(v.feature_type, BOXCOX) self.assertIsNot(v.boxcox_lambda, None) self.assertIsNot(v.boxcox_shift, None) else: assert v.feature_type == id_to_type(k) sorted_features, _ = sort_features_by_normalization(normalization_parameters) norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32) for i, feature in enumerate(sorted_features): input_matrix[:, i] = feature_value_map[feature] input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32)) output_blob, _ = preprocessor.normalize_dense_matrix( input_matrix_blob, sorted_features, normalization_parameters, "", False ) workspace.FeedBlob(input_matrix_blob, input_matrix) workspace.RunNetOnce(norm_net) normalized_feature_matrix = workspace.FetchBlob(output_blob) normalized_features = {} on_column = 0 for feature in sorted_features: norm = normalization_parameters[feature] if norm.feature_type == ENUM: column_size = len(norm.possible_values) else: column_size = 1 normalized_features[feature] = normalized_feature_matrix[ :, on_column : (on_column + column_size) ] on_column += column_size self.assertTrue( all( [ np.isfinite(parameter.stddev) and np.isfinite(parameter.mean) for parameter in normalization_parameters.values() ] ) ) for k, v in six.iteritems(normalized_features): self.assertTrue(np.all(np.isfinite(v))) feature_type = normalization_parameters[k].feature_type if feature_type == identify_types.PROBABILITY: sigmoidv = special.expit(v) self.assertTrue( np.all( np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1)) ) ) elif feature_type == identify_types.ENUM: possible_values = normalization_parameters[k].possible_values self.assertEqual(v.shape[0], len(feature_value_map[k])) self.assertEqual(v.shape[1], len(possible_values)) possible_value_map = {} for i, possible_value in enumerate(possible_values): possible_value_map[possible_value] = i for i, row in enumerate(v): original_feature = feature_value_map[k][i] self.assertEqual( possible_value_map[original_feature], np.where(row == 1)[0][0] ) elif feature_type == identify_types.QUANTILE: for i, feature in enumerate(v[0]): original_feature = feature_value_map[k][i] expected = NumpyFeatureProcessor.value_to_quantile( original_feature, normalization_parameters[k].quantiles ) self.assertAlmostEqual(feature, expected, 2) elif feature_type == identify_types.BINARY: pass elif ( feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX ): one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01) zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01) zero_mean = np.isclose(np.mean(v), 0, atol=0.01) self.assertTrue( np.all(zero_mean), "mean of feature {} is {}, not 0".format(k, np.mean(v)), ) self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev))) elif feature_type == identify_types.CONTINUOUS_ACTION: less_than_max = v < 1 more_than_min = v > -1 self.assertTrue( np.all(less_than_max), "values are not less than 1: {}".format(v[less_than_max == False]), ) self.assertTrue( np.all(more_than_min), "values are not more than -1: {}".format(v[more_than_min == False]), ) else: raise NotImplementedError()
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, feature_type=self._feature_type_override(name) ) test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters ) net = core.Net("PreprocessingTestNet") C2.set_net(net) preprocessor = PreprocessorNet() name_preprocessed_blob_map = {} for feature_name in feature_value_map: workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32)) preprocessed_blob, _ = preprocessor.preprocess_blob( str(feature_name), [normalization_parameters[feature_name]] ) name_preprocessed_blob_map[feature_name] = preprocessed_blob workspace.CreateNet(net) for feature_name, feature_value in six.iteritems(feature_value_map): feature_value = np.expand_dims(feature_value, -1) workspace.FeedBlob(str(feature_name), feature_value) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob( name_preprocessed_blob_map[feature_name] ) if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ), "{} does not match: {} {}".format( feature_name, normalized_features[non_matching].tolist(), test_features[feature_name][non_matching].tolist(), ), )
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} name_preprocessed_blob_map = {} for feature_name, feature_values in feature_value_map.items(): normalization_parameters[feature_name] = normalization.identify_parameter( feature_name, feature_values, feature_type=self._feature_type_override(feature_name), ) feature_values[ 0 ] = MISSING_VALUE # Set one entry to MISSING_VALUE to test that preprocessor = Preprocessor( {feature_name: normalization_parameters[feature_name]}, False ) feature_values_matrix = np.expand_dims(feature_values, -1) normalized_feature_values = preprocessor.forward(feature_values_matrix) name_preprocessed_blob_map[feature_name] = normalized_feature_values.numpy() test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters ) for feature_name in feature_value_map: normalized_features = name_preprocessed_blob_map[feature_name] if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features.flatten(), test_features[feature_name].flatten(), rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_features.flatten(), test_features[feature_name].flatten(), rtol=tolerance, atol=tolerance, ) ), "{} does not match: {} \n!=\n {}".format( feature_name, normalized_features.flatten()[non_matching], test_features[feature_name].flatten()[non_matching], ), )