예제 #1
0
파일: svm.py 프로젝트: zxie/tensorflow
  def __init__(self,
               example_id_column,
               feature_columns,
               weight_column_name=None,
               model_dir=None,
               l1_regularization=0.0,
               l2_regularization=0.0,
               kernels=None,
               config=None):
    if kernels is not None:
      raise ValueError("Kernel SVMs are not currently supported.")
    self._optimizer = sdca_optimizer.SDCAOptimizer(
        example_id_column=example_id_column,
        symmetric_l1_regularization=l1_regularization,
        symmetric_l2_regularization=l2_regularization)

    self._feature_columns = feature_columns
    self._model_dir = model_dir or tempfile.mkdtemp()
    self._estimator = estimator.Estimator(
        model_fn=linear.sdca_classifier_model_fn,
        model_dir=self._model_dir,
        config=config,
        params={
            "feature_columns": feature_columns,
            "optimizer": self._optimizer,
            "weight_column_name": weight_column_name,
            "loss_type": "hinge_loss",
        })
예제 #2
0
    def __init__(self,
                 example_id_column,
                 feature_columns,
                 weight_column_name=None,
                 model_dir=None,
                 l1_regularization=0.0,
                 l2_regularization=0.0,
                 num_loss_partitions=1,
                 kernels=None,
                 config=None):
        """Constructs a `SVM~ estimator object.

    Args:
      example_id_column: A string defining the feature column name representing
        example ids. Used to initialize the underlying optimizer.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      weight_column_name: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      l1_regularization: L1-regularization parameter. Refers to global L1
        regularization (across all examples).
      l2_regularization: L2-regularization parameter. Refers to global L2
        regularization (across all examples).
      num_loss_partitions: number of partitions of the (global) loss function
        optimized by the underlying optimizer (SDCAOptimizer).
      kernels: A list of kernels for the SVM. Currently, no kernels are
        supported. Reserved for future use for non-linear SVMs.
     config: RunConfig object to configure the runtime settings.

    Raises:
      ValueError: if kernels passed is not None.
    """
        if kernels is not None:
            raise ValueError("Kernel SVMs are not currently supported.")
        self._optimizer = sdca_optimizer.SDCAOptimizer(
            example_id_column=example_id_column,
            num_loss_partitions=num_loss_partitions,
            symmetric_l1_regularization=l1_regularization,
            symmetric_l2_regularization=l2_regularization)

        self._feature_columns = feature_columns
        self._model_dir = model_dir or tempfile.mkdtemp()
        self._estimator = estimator.Estimator(
            model_fn=linear.sdca_classifier_model_fn,
            model_dir=self._model_dir,
            config=config,
            params={
                "feature_columns": feature_columns,
                "optimizer": self._optimizer,
                "weight_column_name": weight_column_name,
                "loss_type": "hinge_loss",
            })
예제 #3
0
  def __init__(self,
               example_id_column,
               feature_columns=None,
               weight_column_name=None,
               model_dir=None,
               l1_regularization=0.0,
               l2_regularization=0.0,
               kernels=None,
               config=None):
    if kernels is not None:
      raise ValueError('Kernel SVMs are not currently supported.')
    optimizer = sdca_optimizer.SDCAOptimizer(
        example_id_column=example_id_column,
        symmetric_l1_regularization=l1_regularization,
        symmetric_l2_regularization=l2_regularization)

    super(SVM, self).__init__(model_dir=model_dir,
                              n_classes=2,
                              weight_column_name=weight_column_name,
                              feature_columns=feature_columns,
                              optimizer=optimizer,
                              config=config)
    self._target_column = layers.binary_svm_target(
        weight_column_name=weight_column_name)
예제 #4
0
    def __init__(self,
                 example_id_column,
                 feature_columns,
                 weight_column_name=None,
                 model_dir=None,
                 l1_regularization=0.0,
                 l2_regularization=0.0,
                 num_loss_partitions=1,
                 kernels=None,
                 config=None,
                 feature_engineering_fn=None):
        """Constructs a `SVM~ estimator object.

    Args:
      example_id_column: A string defining the feature column name representing
        example ids. Used to initialize the underlying optimizer.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      weight_column_name: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      l1_regularization: L1-regularization parameter. Refers to global L1
        regularization (across all examples).
      l2_regularization: L2-regularization parameter. Refers to global L2
        regularization (across all examples).
      num_loss_partitions: number of partitions of the (global) loss function
        optimized by the underlying optimizer (SDCAOptimizer).
      kernels: A list of kernels for the SVM. Currently, no kernels are
        supported. Reserved for future use for non-linear SVMs.
      config: RunConfig object to configure the runtime settings.
      feature_engineering_fn: Feature engineering function. Takes features and
                        labels which are the output of `input_fn` and
                        returns features and labels which will be fed
                        into the model.

    Raises:
      ValueError: if kernels passed is not None.
    """
        if kernels is not None:
            raise ValueError("Kernel SVMs are not currently supported.")
        self._optimizer = sdca_optimizer.SDCAOptimizer(
            example_id_column=example_id_column,
            num_loss_partitions=num_loss_partitions,
            symmetric_l1_regularization=l1_regularization,
            symmetric_l2_regularization=l2_regularization)

        self._feature_columns = feature_columns
        self._model_dir = model_dir or tempfile.mkdtemp()
        self._chief_hook = linear._SdcaUpdateWeightsHook()  # pylint: disable=protected-access
        self._estimator = estimator.Estimator(
            model_fn=linear.sdca_model_fn,
            model_dir=self._model_dir,
            config=config,
            params={
                "head":
                head_lib._binary_svm_head(  # pylint: disable=protected-access
                    weight_column_name=weight_column_name,
                    enable_centered_bias=False),
                "feature_columns":
                feature_columns,
                "optimizer":
                self._optimizer,
                "weight_column_name":
                weight_column_name,
                "update_weights_hook":
                self._chief_hook,
            },
            feature_engineering_fn=feature_engineering_fn)
        if not self._estimator.config.is_chief:
            self._chief_hook = None
예제 #5
0
def sdca_model_fn(features, labels, mode, params, config=None):
  """A model_fn for linear models that use the SDCA optimizer.

  Args:
    features: A dict of `Tensor` keyed by column name.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` with values in the set {0, 1}.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance. Type must be one of `_BinarySvmHead`,
          `_RegressionHead` or `_BinaryLogisticHead`.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * l1_regularization: Global (across all examples) L1-regularization
          parameter.
      * l2_regularization: Global (across all examples) L2-regularization
          parameter.
      * num_loss_partitions: Number of partitions of the global loss function
          optimized by `SDCAOptimizer`.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * update_weights_hook: A `SessionRunHook` object or None. Used to update
          model weights.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If the type of head is not one of `_BinarySvmHead`,
      `_RegressionHead` or `_MultiClassHead`.
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  example_id_column = params["example_id_column"]
  l1_regularization = params["l1_regularization"]
  l2_regularization = params["l2_regularization"]
  num_loss_partitions = params["num_loss_partitions"]
  weight_column_name = params["weight_column_name"]
  update_weights_hook = params.get("update_weights_hook", None)
  partitioner = params["partitioner"]

  loss_type = None
  if isinstance(head, head_lib._BinarySvmHead):  # pylint: disable=protected-access
    loss_type = "hinge_loss"
  elif isinstance(head, head_lib._BinaryLogisticHead):  # pylint: disable=protected-access
    loss_type = "logistic_loss"
  elif isinstance(head, head_lib._RegressionHead):  # pylint: disable=protected-access
    loss_type = "squared_loss"
  else:
    raise ValueError("Unsupported head type: {}".format(type(head)))

  assert head.logits_dimension == 1, (
      "SDCA only applies to logits_dimension=1.")

  # Update num_loss_partitions based on number of workers.
  n_loss_partitions = num_loss_partitions or max(1, config.num_worker_replicas)
  optimizer = sdca_optimizer.SDCAOptimizer(
      example_id_column=example_id_column,
      num_loss_partitions=n_loss_partitions,
      symmetric_l1_regularization=l1_regularization,
      symmetric_l2_regularization=l2_regularization,
      partitioner=partitioner)

  parent_scope = "linear"

  with variable_scope.variable_scope(
      values=features.values(), name_or_scope=parent_scope,
      partitioner=partitioner) as scope:
    features = features.copy()
    features.update(layers.transform_features(features, feature_columns))
    logits, columns_to_variables, bias = (
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            num_outputs=1,
            scope=scope))

    _add_bias_column(feature_columns, features, bias, columns_to_variables)

  def _train_op_fn(unused_loss):
    global_step = training_util.get_global_step()
    sdca_model, train_op = optimizer.get_train_step(
        columns_to_variables, weight_column_name, loss_type, features, labels,
        global_step)
    if update_weights_hook is not None:
      update_weights_hook.set_parameters(sdca_model, train_op)
    return train_op

  model_fn_ops = head.create_model_fn_ops(
      features=features,
      labels=labels,
      mode=mode,
      train_op_fn=_train_op_fn,
      logits=logits)
  if update_weights_hook is not None:
    return model_fn_ops._replace(training_chief_hooks=(
        model_fn_ops.training_chief_hooks + [update_weights_hook]))
  return model_fn_ops
	def __init__(self,
		example_id_column,
		feature_columns,
		weight_column_name=None,
		model_dir=None,
		l1_regularization=0.0,
		l2_regularization=0.0,
		num_loss_partitions=1,
		kernels=None,
		config=None,
		feature_engineering_fn=None):
	if kernels is not None:
		raise ValueError("Kernel SVMs are not currently supported.")

	optimizer = sdca_optimizer.SDCAOptimizer(
		example_id_column=example_id_column,
		num_loss_partitions=num_loss_partitions,
		symmetric_l1_regularization=l1_regularization,
		symmetric_l2_regularization=l2_regularization)

	self._feature_columns = feature_columns
	chief_hook = linear._SdcaUpdateWeightsHook()
	super(SVM, self).__init__(
		model_fn=linear.sdca_model_fn,
		model_dir=model_dir,
		config=config,
		params={
		"head" : head_lib.binary_svm_head(
			weight_column_name=weight_column_name,
			enable_centered_bias=False),
		"feature_columns" : feature_columns,
		"optimizer": optimizer,
		"weight_column_name" : weight_column_name,
		"update_weights_hook" : chief_hook,
		},
		feature_engineering_fn=feature_engineering_fn)

	@deprecated_arg_values(
		estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, as_iterable=False)

	def predict_proba(self, x=None, input_fn=None, batch_size=None, outputs=None, as_iterable=False):

		"""Runs inference to determine the class probability predictions."""
		key = prediction_key.PredictionKey.PROBABILITIES
		preds = super(SVM, self).predict(
			x=x,
			input_fn=input_fn,
			batch_size=batch_size,
			outputs=[key],
			as_iterable=as_iterable)
		if as_iterable:
			return _as_iterable(preds, output=key)

		return preds[key]

	@deprecated("2017-09-25", "Please use Estimator.export_savedmodel() instead.")
	def export(self, export_dir, signature_fn=None,input_fn=None, default_batch_size=1, exports_to_keep=None):
		return self.export_with_defaults(
			export_dir=export_dir,
			signature_fn=signature_fn,
			input_fn=input_fn,
			default_batch_size=default_batch_size,
			exports_to_keep=exports_to_keep)

	@deprecated("2017-09-25", "Please use Estimator.export_savedmodel() instead.")
	def export_with_defaults(self, export_dir, signature_fn=None, default_batch_size=1, exports_to_keep=None):
		def default_input_fn(unused_estimator, examples):
			return layers.parse_feature_columns_from_examples(
				examples, self._feature_columns)
		return super(SVM, self).export(export_dir=export_dir,
			signature_fn=signature_fn,
			input_fn=input_fn or default_input_fn,
			default_batch_size=default_batch_size,
			exports_to_keep=exports_to_keep)
예제 #7
0
def svm_model_fn(features, labels, mode, params):
    """A model_fn for linear models that use the SDCA optimizer.
    Args:
      features: A dict of `Tensor` keyed by column name.
      labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
        dtype `int32` or `int64` in the range `[0, n_classes)`.
      mode: Defines whether this is training, evaluation or prediction.
        See `ModeKeys`.
      params: A dict of hyperparameters.
        The following hyperparameters are expected:
        * head: A `Head` instance. Type must be one of `_BinarySvmHead`,
            `_RegressionHead` or `_BinaryLogisticHead`.
        * feature_columns: An iterable containing all the feature columns
        used by
            the model.
        * optimizer: An `SDCAOptimizer` instance.
        * weight_column_name: A string defining the weight feature column, or
            None if there are no weights.
        * update_weights_hook: A `SessionRunHook` object or None. Used to
        update
            model weights.
    Returns:
      A `ModelFnOps` instance.
    Raises:
      ValueError: If `optimizer` is not an `SDCAOptimizer` instance.
      ValueError: If the type of head is neither `_BinarySvmHead`, nor
        `_RegressionHead` nor `_MultiClassHead`.
      ValueError: If mode is not any of the `ModeKeys`.
    """

    feature_columns = [layers.real_valued_column(i) for i in features.keys()]
    example_id_column(features)

    weight_column_name = params.get("weight_column_name")

    head = head_lib.binary_svm_head(weight_column_name=weight_column_name,
                                    enable_centered_bias=False)

    optimizer = sdca_optimizer.SDCAOptimizer(
        example_id_column="index",
        num_loss_partitions=params["num_loss_partitions"],
        symmetric_l1_regularization=params["l1_regularization"],
        symmetric_l2_regularization=params["l2_regularization"])

    chief_hook = linear._SdcaUpdateWeightsHook()
    update_weights_hook = chief_hook

    if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
        raise ValueError("Optimizer must be of type SDCAOptimizer")

    if isinstance(head, head_lib._BinarySvmHead):  # pylint: disable=protected-access
        loss_type = "hinge_loss"
    elif isinstance(head, head_lib._BinaryLogisticHead):  # pylint:
        # disable=protected-access
        loss_type = "logistic_loss"
    elif isinstance(head, head_lib._RegressionHead):  # pylint:
        # disable=protected-access
        assert head.logits_dimension == 1, ("SDCA only applies for "
                                            "logits_dimension=1.")
        loss_type = "squared_loss"
    else:
        raise ValueError("Unsupported head type: {}".format(head))

    parent_scope = "linear"

    with variable_scope.variable_op_scope(features.values(),
                                          parent_scope) as scope:
        features = features.copy()
        features.update(layers.transform_features(features, feature_columns))
        logits, columns_to_variables, bias = (
            layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=1,
                scope=scope))

        linear._add_bias_column(feature_columns, features, bias,
                                columns_to_variables)

    def _train_op_fn(unused_loss):
        global_step = contrib_variables.get_global_step()
        sdca_model, train_op = optimizer.get_train_step(
            columns_to_variables, weight_column_name, loss_type, features,
            labels, global_step)
        if update_weights_hook is not None:
            update_weights_hook.set_parameters(sdca_model, train_op)
        return train_op

    model_fn_ops = head.create_model_fn_ops(features=features,
                                            labels=labels,
                                            mode=mode,
                                            train_op_fn=_train_op_fn,
                                            logits=logits)
    if update_weights_hook is not None:
        return model_fn_ops._replace(
            training_chief_hooks=(model_fn_ops.training_chief_hooks +
                                  [update_weights_hook]))
    return model_fn_ops