Ejemplo n.º 1
0
def extract_features(features, feature_columns):
    """Extracts columns from a dictionary of features.

  Args:
    features: `dict` of `Tensor` objects.
    feature_columns: A list of feature_columns.

  Returns:
    Seven values:
      - A list of all feature column names.
      - A list of dense floats.
      - A list of sparse float feature indices.
      - A list of sparse float feature values.
      - A list of sparse float feature shapes.
      - A list of sparse int feature indices.
      - A list of sparse int feature values.
      - A list of sparse int feature shapes.
  Raises:
    ValueError: if features is not valid.
  """
    if not features:
        raise ValueError("Features dictionary must be specified.")

    # Make a shallow copy of features to ensure downstream usage
    # is unaffected by modifications in the model function.
    features = copy.copy(features)
    if feature_columns:
        scope = "gbdt"
        with variable_scope.variable_scope(scope):
            feature_columns = list(feature_columns)
            transformed_features = {}
            for fc in feature_columns:
                # pylint: disable=protected-access
                if isinstance(fc, feature_column_lib._EmbeddingColumn):
                    # pylint: enable=protected-access
                    transformed_features[fc.name] = fc_core.input_layer(
                        features, [fc], weight_collections=[scope])
                else:
                    result = feature_column_ops.transform_features(
                        features, [fc])
                    if len(result) > 1:
                        raise ValueError(
                            "Unexpected number of output features")
                    transformed_features[fc.name] = result[list(
                        result.keys())[0]]
        features = transformed_features

    dense_float_names = []
    dense_floats = []
    sparse_float_names = []
    sparse_float_indices = []
    sparse_float_values = []
    sparse_float_shapes = []
    sparse_int_names = []
    sparse_int_indices = []
    sparse_int_values = []
    sparse_int_shapes = []
    for key in sorted(features.keys()):
        tensor = features[key]
        if isinstance(tensor, sparse_tensor.SparseTensor):
            if tensor.values.dtype == dtypes.float32:
                sparse_float_names.append(key)
                sparse_float_indices.append(tensor.indices)
                sparse_float_values.append(tensor.values)
                sparse_float_shapes.append(tensor.dense_shape)
            elif tensor.values.dtype == dtypes.int64:
                sparse_int_names.append(key)
                sparse_int_indices.append(tensor.indices)
                sparse_int_values.append(tensor.values)
                sparse_int_shapes.append(tensor.dense_shape)
            else:
                raise ValueError(
                    "Unsupported sparse feature %s with dtype %s." %
                    (tensor.indices.name, tensor.dtype))
        else:
            if tensor.dtype == dtypes.float32:
                if len(tensor.shape) > 1 and tensor.shape[1] > 1:
                    unstacked = array_ops.unstack(tensor, axis=1)
                    for i in range(len(unstacked)):
                        dense_float_names.append(_FEATURE_NAME_TEMPLATE %
                                                 (key, i))
                        dense_floats.append(
                            array_ops.reshape(unstacked[i], [-1, 1]))
                else:
                    dense_float_names.append(key)
                    dense_floats.append(tensor)
            else:
                raise ValueError(
                    "Unsupported dense feature %s with dtype %s." %
                    (tensor.name, tensor.dtype))
    # Feature columns are logically organized into incrementing slots starting
    # from dense floats, then sparse floats then sparse ints.
    fc_names = (dense_float_names + sparse_float_names + sparse_int_names)
    return (fc_names, dense_floats, sparse_float_indices, sparse_float_values,
            sparse_float_shapes, sparse_int_indices, sparse_int_values,
            sparse_int_shapes)
Ejemplo n.º 2
0
def extract_features(features, feature_columns):
  """Extracts columns from a dictionary of features.

  Args:
    features: `dict` of `Tensor` objects.
    feature_columns: A list of feature_columns.

  Returns:
    Seven values:
      - A list of all feature column names.
      - A list of dense floats.
      - A list of sparse float feature indices.
      - A list of sparse float feature values.
      - A list of sparse float feature shapes.
      - A list of sparse int feature indices.
      - A list of sparse int feature values.
      - A list of sparse int feature shapes.
  Raises:
    ValueError: if features is not valid.
  """
  if not features:
    raise ValueError("Features dictionary must be specified.")

  # Make a shallow copy of features to ensure downstream usage
  # is unaffected by modifications in the model function.
  features = copy.copy(features)
  if feature_columns:
    scope = "gbdt"
    with variable_scope.variable_scope(scope):
      feature_columns = list(feature_columns)
      transformed_features = {}
      for fc in feature_columns:
        # pylint: disable=protected-access
        if isinstance(fc, feature_column_lib._EmbeddingColumn):
          # pylint: enable=protected-access
          transformed_features[fc.name] = fc_core.input_layer(
              features, [fc],
              weight_collections=[scope])
        else:
          result = feature_column_ops.transform_features(features, [fc])
          if len(result) > 1:
            raise ValueError("Unexpected number of output features")
          transformed_features[fc.name] = result[list(result.keys())[0]]
    features = transformed_features

  dense_float_names = []
  dense_floats = []
  sparse_float_names = []
  sparse_float_indices = []
  sparse_float_values = []
  sparse_float_shapes = []
  sparse_int_names = []
  sparse_int_indices = []
  sparse_int_values = []
  sparse_int_shapes = []
  for key in sorted(features.keys()):
    tensor = features[key]
    if isinstance(tensor, sparse_tensor.SparseTensor):
      if tensor.values.dtype == dtypes.float32:
        sparse_float_names.append(key)
        sparse_float_indices.append(tensor.indices)
        sparse_float_values.append(tensor.values)
        sparse_float_shapes.append(tensor.dense_shape)
      elif tensor.values.dtype == dtypes.int64:
        sparse_int_names.append(key)
        sparse_int_indices.append(tensor.indices)
        sparse_int_values.append(tensor.values)
        sparse_int_shapes.append(tensor.dense_shape)
      else:
        raise ValueError("Unsupported sparse feature %s with dtype %s." %
                         (tensor.indices.name, tensor.dtype))
    else:
      if tensor.dtype == dtypes.float32:
        if len(tensor.shape) > 1 and tensor.shape[1] > 1:
          unstacked = array_ops.unstack(tensor, axis=1)
          for i in xrange(len(unstacked)):
            dense_float_names.append(_FEATURE_NAME_TEMPLATE % (key, i))
            dense_floats.append(array_ops.reshape(unstacked[i], [-1, 1]))
        else:
          dense_float_names.append(key)
          dense_floats.append(tensor)
      else:
        raise ValueError("Unsupported dense feature %s with dtype %s." %
                         (tensor.name, tensor.dtype))
  # Feature columns are logically organized into incrementing slots starting
  # from dense floats, then sparse floats then sparse ints.
  fc_names = (dense_float_names + sparse_float_names + sparse_int_names)
  return (fc_names, dense_floats, sparse_float_indices, sparse_float_values,
          sparse_float_shapes, sparse_int_indices, sparse_int_values,
          sparse_int_shapes)