def extract_features(features, feature_columns): """Extracts columns from a dictionary of features. Args: features: `dict` of `Tensor` objects. feature_columns: A list of feature_columns. Returns: Seven values: - A list of all feature column names. - A list of dense floats. - A list of sparse float feature indices. - A list of sparse float feature values. - A list of sparse float feature shapes. - A list of sparse int feature indices. - A list of sparse int feature values. - A list of sparse int feature shapes. Raises: ValueError: if features is not valid. """ if not features: raise ValueError("Features dictionary must be specified.") # Make a shallow copy of features to ensure downstream usage # is unaffected by modifications in the model function. features = copy.copy(features) if feature_columns: scope = "gbdt" with variable_scope.variable_scope(scope): feature_columns = list(feature_columns) transformed_features = {} for fc in feature_columns: # pylint: disable=protected-access if isinstance(fc, feature_column_lib._EmbeddingColumn): # pylint: enable=protected-access transformed_features[fc.name] = fc_core.input_layer( features, [fc], weight_collections=[scope]) else: result = feature_column_ops.transform_features( features, [fc]) if len(result) > 1: raise ValueError( "Unexpected number of output features") transformed_features[fc.name] = result[list( result.keys())[0]] features = transformed_features dense_float_names = [] dense_floats = [] sparse_float_names = [] sparse_float_indices = [] sparse_float_values = [] sparse_float_shapes = [] sparse_int_names = [] sparse_int_indices = [] sparse_int_values = [] sparse_int_shapes = [] for key in sorted(features.keys()): tensor = features[key] if isinstance(tensor, sparse_tensor.SparseTensor): if tensor.values.dtype == dtypes.float32: sparse_float_names.append(key) sparse_float_indices.append(tensor.indices) sparse_float_values.append(tensor.values) sparse_float_shapes.append(tensor.dense_shape) elif tensor.values.dtype == dtypes.int64: sparse_int_names.append(key) sparse_int_indices.append(tensor.indices) sparse_int_values.append(tensor.values) sparse_int_shapes.append(tensor.dense_shape) else: raise ValueError( "Unsupported sparse feature %s with dtype %s." % (tensor.indices.name, tensor.dtype)) else: if tensor.dtype == dtypes.float32: if len(tensor.shape) > 1 and tensor.shape[1] > 1: unstacked = array_ops.unstack(tensor, axis=1) for i in range(len(unstacked)): dense_float_names.append(_FEATURE_NAME_TEMPLATE % (key, i)) dense_floats.append( array_ops.reshape(unstacked[i], [-1, 1])) else: dense_float_names.append(key) dense_floats.append(tensor) else: raise ValueError( "Unsupported dense feature %s with dtype %s." % (tensor.name, tensor.dtype)) # Feature columns are logically organized into incrementing slots starting # from dense floats, then sparse floats then sparse ints. fc_names = (dense_float_names + sparse_float_names + sparse_int_names) return (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, sparse_int_shapes)
def extract_features(features, feature_columns): """Extracts columns from a dictionary of features. Args: features: `dict` of `Tensor` objects. feature_columns: A list of feature_columns. Returns: Seven values: - A list of all feature column names. - A list of dense floats. - A list of sparse float feature indices. - A list of sparse float feature values. - A list of sparse float feature shapes. - A list of sparse int feature indices. - A list of sparse int feature values. - A list of sparse int feature shapes. Raises: ValueError: if features is not valid. """ if not features: raise ValueError("Features dictionary must be specified.") # Make a shallow copy of features to ensure downstream usage # is unaffected by modifications in the model function. features = copy.copy(features) if feature_columns: scope = "gbdt" with variable_scope.variable_scope(scope): feature_columns = list(feature_columns) transformed_features = {} for fc in feature_columns: # pylint: disable=protected-access if isinstance(fc, feature_column_lib._EmbeddingColumn): # pylint: enable=protected-access transformed_features[fc.name] = fc_core.input_layer( features, [fc], weight_collections=[scope]) else: result = feature_column_ops.transform_features(features, [fc]) if len(result) > 1: raise ValueError("Unexpected number of output features") transformed_features[fc.name] = result[list(result.keys())[0]] features = transformed_features dense_float_names = [] dense_floats = [] sparse_float_names = [] sparse_float_indices = [] sparse_float_values = [] sparse_float_shapes = [] sparse_int_names = [] sparse_int_indices = [] sparse_int_values = [] sparse_int_shapes = [] for key in sorted(features.keys()): tensor = features[key] if isinstance(tensor, sparse_tensor.SparseTensor): if tensor.values.dtype == dtypes.float32: sparse_float_names.append(key) sparse_float_indices.append(tensor.indices) sparse_float_values.append(tensor.values) sparse_float_shapes.append(tensor.dense_shape) elif tensor.values.dtype == dtypes.int64: sparse_int_names.append(key) sparse_int_indices.append(tensor.indices) sparse_int_values.append(tensor.values) sparse_int_shapes.append(tensor.dense_shape) else: raise ValueError("Unsupported sparse feature %s with dtype %s." % (tensor.indices.name, tensor.dtype)) else: if tensor.dtype == dtypes.float32: if len(tensor.shape) > 1 and tensor.shape[1] > 1: unstacked = array_ops.unstack(tensor, axis=1) for i in xrange(len(unstacked)): dense_float_names.append(_FEATURE_NAME_TEMPLATE % (key, i)) dense_floats.append(array_ops.reshape(unstacked[i], [-1, 1])) else: dense_float_names.append(key) dense_floats.append(tensor) else: raise ValueError("Unsupported dense feature %s with dtype %s." % (tensor.name, tensor.dtype)) # Feature columns are logically organized into incrementing slots starting # from dense floats, then sparse floats then sparse ints. fc_names = (dense_float_names + sparse_float_names + sparse_int_names) return (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, sparse_int_shapes)