def testRealValuedVarLenColumnDtypes(self): rvc = fc._real_valued_var_len_column("rvc", is_sparse=True) self.assertDictEqual( { "rvc": parsing_ops.VarLenFeature(dtype=dtypes.float32) }, rvc.config) rvc = fc._real_valued_var_len_column("rvc", default_value=0, is_sparse=False) self.assertDictEqual( { "rvc": parsing_ops.FixedLenSequenceFeature(shape=[], dtype=dtypes.float32, allow_missing=True, default_value=0.0) }, rvc.config) rvc = fc._real_valued_var_len_column("rvc", dtype=dtypes.int32, default_value=0, is_sparse=True) self.assertDictEqual( { "rvc": parsing_ops.VarLenFeature(dtype=dtypes.int32) }, rvc.config) with self.assertRaisesRegexp(TypeError, "dtype must be convertible to float"): fc._real_valued_var_len_column("rvc", dtype=dtypes.string, default_value="", is_sparse=True)
def testCreateSequenceFeatureSpec(self): sparse_col = fc.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) embedding_col = fc.embedding_column( fc.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) sparse_id_col = fc.sparse_column_with_keys("id_column", ["marlo", "omar", "stringer"]) weighted_id_col = fc.weighted_sparse_column(sparse_id_col, "id_weights_column") real_valued_col1 = fc.real_valued_column("real_valued_column", dimension=2) real_valued_col2 = fc.real_valued_column( "real_valued_default_column", dimension=5, default_value=3.0) real_valued_col3 = fc._real_valued_var_len_column( "real_valued_var_len_column", default_value=3.0, is_sparse=True) real_valued_col4 = fc._real_valued_var_len_column( "real_valued_var_len_dense_column", default_value=4.0, is_sparse=False) feature_columns = set([ sparse_col, embedding_col, weighted_id_col, real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4 ]) feature_spec = fc._create_sequence_feature_spec_for_parsing(feature_columns) expected_feature_spec = { "sparse_column": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_embedding": parsing_ops.VarLenFeature(dtypes.string), "id_column": parsing_ops.VarLenFeature(dtypes.string), "id_weights_column": parsing_ops.VarLenFeature(dtypes.float32), "real_valued_column": parsing_ops.FixedLenSequenceFeature( shape=[2], dtype=dtypes.float32, allow_missing=False), "real_valued_default_column": parsing_ops.FixedLenSequenceFeature( shape=[5], dtype=dtypes.float32, allow_missing=True), "real_valued_var_len_column": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_var_len_dense_column": parsing_ops.FixedLenSequenceFeature( shape=[], dtype=dtypes.float32, allow_missing=True, default_value=4.0), } self.assertDictEqual(expected_feature_spec, feature_spec)
def testRealValuedVarLenColumn(self): c = fc._real_valued_var_len_column("ccc", is_sparse=True) self.assertTrue(c.is_sparse) self.assertTrue(c.default_value is None) # default_value is an integer. c5 = fc._real_valued_var_len_column("c5", default_value=2) self.assertEqual(c5.default_value, 2) # default_value is a float. d4 = fc._real_valued_var_len_column("d4", is_sparse=True) self.assertEqual(d4.default_value, None) self.assertEqual(d4.is_sparse, True) # Default value is a list but dimension is None. with self.assertRaisesRegexp(ValueError, "Only scalar default value.*"): fc._real_valued_var_len_column("g5", default_value=[2., 3.])
def testMakePlaceHolderTensorsForBaseFeatures(self): sparse_col = fc.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) real_valued_col = fc.real_valued_column("real_valued_column", 5) vlen_real_valued_col = fc._real_valued_var_len_column( "vlen_real_valued_column", is_sparse=True) bucketized_col = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization"), [0, 4]) feature_columns = set( [sparse_col, real_valued_col, vlen_real_valued_col, bucketized_col]) placeholders = ( fc.make_place_holder_tensors_for_base_features(feature_columns)) self.assertEqual(4, len(placeholders)) self.assertTrue( isinstance(placeholders["sparse_column"], sparse_tensor_lib.SparseTensor)) self.assertTrue( isinstance(placeholders["vlen_real_valued_column"], sparse_tensor_lib.SparseTensor)) placeholder = placeholders["real_valued_column"] self.assertGreaterEqual( placeholder.name.find(u"Placeholder_real_valued_column"), 0) self.assertEqual(dtypes.float32, placeholder.dtype) self.assertEqual([None, 5], placeholder.get_shape().as_list()) placeholder = placeholders["real_valued_column_for_bucketization"] self.assertGreaterEqual( placeholder.name.find( u"Placeholder_real_valued_column_for_bucketization"), 0) self.assertEqual(dtypes.float32, placeholder.dtype) self.assertEqual([None, 1], placeholder.get_shape().as_list())
def testCreateFeatureSpec_ExperimentalColumns(self): real_valued_col0 = fc._real_valued_var_len_column( "real_valued_column0", is_sparse=True) real_valued_col1 = fc._real_valued_var_len_column( "real_valued_column1", dtype=dtypes.int64, default_value=0, is_sparse=False) feature_columns = set([real_valued_col0, real_valued_col1]) expected_config = { "real_valued_column0": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_column1": parsing_ops.FixedLenSequenceFeature( [], dtype=dtypes.int64, allow_missing=True, default_value=0), } config = fc.create_feature_spec_for_parsing(feature_columns) self.assertDictEqual(expected_config, config)
def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self): real_valued_col1 = fc.real_valued_column( "real_valued_column1", default_value=2) real_valued_col2 = fc.real_valued_column( "real_valued_column2", 5, default_value=4) real_valued_col3 = fc.real_valued_column( "real_valued_column3", default_value=[8]) real_valued_col4 = fc.real_valued_column( "real_valued_column4", 3, default_value=[1, 0, 6]) real_valued_col5 = fc._real_valued_var_len_column( "real_valued_column5", default_value=2, is_sparse=True) real_valued_col6 = fc._real_valued_var_len_column( "real_valued_column6", dtype=dtypes.int64, default_value=1, is_sparse=False) feature_columns = [ real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4, real_valued_col5, real_valued_col6 ] config = fc.create_feature_spec_for_parsing(feature_columns) self.assertEqual(6, len(config)) self.assertDictEqual( { "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32, default_value=[2.]), "real_valued_column2": parsing_ops.FixedLenFeature( [5], dtype=dtypes.float32, default_value=[4., 4., 4., 4., 4.]), "real_valued_column3": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32, default_value=[8.]), "real_valued_column4": parsing_ops.FixedLenFeature( [3], dtype=dtypes.float32, default_value=[1., 0., 6.]), "real_valued_column5": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_column6": parsing_ops.FixedLenSequenceFeature( [], dtype=dtypes.int64, allow_missing=True, default_value=1) }, config)
def testRealValuedColumnDensification(self): """Tests densification behavior of `RealValuedColumn`.""" # No default value, dimension 1 float. real_valued_column = fc._real_valued_var_len_column( "sparse_real_valued1", is_sparse=True) sparse_tensor = sparse_tensor_lib.SparseTensor( values=[2.0, 5.0], indices=[[0, 0], [2, 0]], dense_shape=[3, 1]) with self.assertRaisesRegexp(ValueError, "Set is_sparse to False"): real_valued_column._to_dnn_input_layer(sparse_tensor)
def testRealValuedColumnDensification(self): """Tests densification behavior of `RealValuedColumn`.""" # No default value, dimension 1 float. real_valued_column = fc._real_valued_var_len_column( "sparse_real_valued1", is_sparse=True) sparse_tensor = sparse_tensor_lib.SparseTensor( values=[2.0, 5.0], indices=[[0, 0], [2, 0]], dense_shape=[3, 1]) with self.assertRaisesRegexp( ValueError, "Calling an abstract method."): real_valued_column._to_dnn_input_layer(sparse_tensor)
def testCreateFeatureSpec(self): sparse_col = fc.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) embedding_col = fc.embedding_column( fc.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) str_sparse_id_col = fc.sparse_column_with_keys( "str_id_column", ["marlo", "omar", "stringer"]) int32_sparse_id_col = fc.sparse_column_with_keys( "int32_id_column", [42, 1, -1000], dtype=dtypes.int32) int64_sparse_id_col = fc.sparse_column_with_keys( "int64_id_column", [42, 1, -1000], dtype=dtypes.int64) weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col, "str_id_weights_column") real_valued_col1 = fc.real_valued_column("real_valued_column1") real_valued_col2 = fc.real_valued_column("real_valued_column2", 5) real_valued_col3 = fc._real_valued_var_len_column( "real_valued_column3", is_sparse=True) real_valued_col4 = fc._real_valued_var_len_column( "real_valued_column4", dtype=dtypes.int64, default_value=0, is_sparse=False) bucketized_col1 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4]) bucketized_col2 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization2", 4), [0, 4]) a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100) b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100) cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000) one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket( "sparse_column_for_one_hot", hash_bucket_size=100)) scattered_embedding_col = fc.scattered_embedding_column( "scattered_embedding_column", size=100, dimension=10, hash_key=1) feature_columns = set([ sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col, int64_sparse_id_col, real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4, bucketized_col1, bucketized_col2, cross_col, one_hot_col, scattered_embedding_col ]) expected_config = { "sparse_column": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_embedding": parsing_ops.VarLenFeature(dtypes.string), "str_id_column": parsing_ops.VarLenFeature(dtypes.string), "int32_id_column": parsing_ops.VarLenFeature(dtypes.int32), "int64_id_column": parsing_ops.VarLenFeature(dtypes.int64), "str_id_weights_column": parsing_ops.VarLenFeature(dtypes.float32), "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column2": parsing_ops.FixedLenFeature( [5], dtype=dtypes.float32), "real_valued_column3": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_column4": parsing_ops.FixedLenSequenceFeature( [], dtype=dtypes.int64, allow_missing=True, default_value=0), "real_valued_column_for_bucketization1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column_for_bucketization2": parsing_ops.FixedLenFeature( [4], dtype=dtypes.float32), "cross_aaa": parsing_ops.VarLenFeature(dtypes.string), "cross_bbb": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_one_hot": parsing_ops.VarLenFeature(dtypes.string), "scattered_embedding_column": parsing_ops.VarLenFeature(dtypes.string), } config = fc.create_feature_spec_for_parsing(feature_columns) self.assertDictEqual(expected_config, config) # Test that the same config is parsed out if we pass a dictionary. feature_columns_dict = { str(i): val for i, val in enumerate(feature_columns) } config = fc.create_feature_spec_for_parsing(feature_columns_dict) self.assertDictEqual(expected_config, config)
def testBucketizedColumnRequiresRealValuedColumnDimension(self): with self.assertRaisesRegexp( TypeError, "source_column must be an instance of _RealValuedColumn.*"): fc.bucketized_column(fc._real_valued_var_len_column("bbb", is_sparse=True), [0])