def test_bounded_expression(self): """Tests that `BoundedExpression`s select their components correctly.""" structure_memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.Variable(0, dtype=tf.int32), defaults.VARIABLE_FN_KEY: tf.Variable } term1 = term.TensorTerm(1.0) term2 = term.TensorTerm(2.0) term3 = term.TensorTerm(4.0) term4 = term.TensorTerm(8.0) basic_expression1 = basic_expression.BasicExpression([term1]) basic_expression2 = basic_expression.BasicExpression([term2]) basic_expression3 = basic_expression.BasicExpression([term3]) basic_expression4 = basic_expression.BasicExpression([term4]) expression1 = expression.ExplicitExpression(basic_expression1, basic_expression1) expression2 = expression.ExplicitExpression(basic_expression2, basic_expression2) expression3 = expression.ExplicitExpression(basic_expression3, basic_expression3) expression4 = expression.ExplicitExpression(basic_expression4, basic_expression4) # Each of our BasicExpressions contains exactly one term, and while we might # negate it, by taking the absolute value we can uniquely determine which # BasicExpression is which. def term_value(expression_object): terms = expression_object.penalty_expression._terms self.assertEqual(1, len(terms)) return abs(terms[0].tensor(structure_memoizer)) bounded_expression1 = expression.BoundedExpression( lower_bound=expression1, upper_bound=expression2) self.assertEqual(term_value(bounded_expression1), 2.0) self.assertEqual(term_value(-bounded_expression1), 1.0) bounded_expression2 = expression.BoundedExpression( lower_bound=expression3, upper_bound=expression4) self.assertEqual(term_value(bounded_expression2), 8.0) self.assertEqual(term_value(-bounded_expression2), 4.0) bounded_expression3 = -(bounded_expression1 - bounded_expression2) self.assertEqual(term_value(bounded_expression3), 8.0 - 1.0) self.assertEqual(term_value(-bounded_expression3), 4.0 - 2.0) # Checks that nested BoundedExpressions work. bounded_expression4 = expression.BoundedExpression( lower_bound=bounded_expression1, upper_bound=expression3) self.assertEqual(term_value(bounded_expression4), 4.0) self.assertEqual(term_value(-bounded_expression4), 1.0) # Checks that nested negated BoundedExpressions work. bounded_expression5 = expression.BoundedExpression( lower_bound=-bounded_expression1, upper_bound=-bounded_expression2) self.assertEqual(term_value(bounded_expression5), 4.0) self.assertEqual(term_value(-bounded_expression5), 2.0)
def test_not_merging(self): """Checks that `BasicExpression`s don't merge incompatible `Term`s.""" predictions = tf.constant([1.0, -1.0, 0.5], dtype=tf.float32) weights1 = 1.0 weights2 = tf.constant([0.7, 0.3, 1.0], dtype=tf.float32) numerator_predicate1 = helpers.Predicate(True) numerator_predicate2 = helpers.Predicate( tf.constant([True, False, False])) denominator_predicate1 = helpers.Predicate(True) denominator_predicate2 = helpers.Predicate( tf.constant([True, False, True])) # The two terms have different losses, so they're incompatible. term_object1 = term.BinaryClassificationTerm.ratio( 1.0, 0.0, predictions, weights1, numerator_predicate1, denominator_predicate1, loss.ZeroOneLoss()) term_object2 = term.BinaryClassificationTerm.ratio( 1.0, 0.0, predictions, weights2, numerator_predicate2, denominator_predicate2, loss.HingeLoss()) self.assertNotEqual(term_object1.key, term_object2.key) expression_object1 = basic_expression.BasicExpression([term_object1]) self.assertEqual(1, len(expression_object1.terms)) expression_object2 = basic_expression.BasicExpression([term_object2]) self.assertEqual(1, len(expression_object2.terms)) # Check that __init__ doesn't merge incompatible terms. expression_object = basic_expression.BasicExpression( [term_object1, term_object2]) self.assertEqual(2, len(expression_object.terms)) # Check that __add__ doesn't merge incompatible terms. expression_object = expression_object1 + expression_object2 self.assertEqual(2, len(expression_object.terms)) # Check that __sub__ doesn't merge incompatible terms. expression_object = expression_object1 - expression_object2 self.assertEqual(2, len(expression_object.terms))
def create_dummy_expression(penalty_variable, constraint_variable): """Creates an empty `Expression` from the given extra variables.""" return expression.ExplicitExpression( basic_expression.BasicExpression( [term.TensorTerm(penalty_variable)]), basic_expression.BasicExpression( [term.TensorTerm(constraint_variable)]))
def test_arithmetic(self): """Tests `Expression`'s arithmetic operators.""" memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.compat.v2.Variable(0, dtype=tf.int32) } penalty_values = [-3.6, 1.5, 0.4] constraint_values = [-0.2, -0.5, 2.3] # Create three expressions containing the constants in "penalty_values" in # their penalty_expressions, and "constraint_values" in their # constraint_expressions. expression_objects = [] for penalty_value, constraint_value in zip(penalty_values, constraint_values): expression_object = expression.Expression( basic_expression.BasicExpression( [], deferred_tensor.DeferredTensor( tf.constant(penalty_value, dtype=tf.float32))), basic_expression.BasicExpression( [], deferred_tensor.DeferredTensor( tf.constant(constraint_value)))) expression_objects.append(expression_object) # This expression exercises all of the operators. expression_object = ( 0.3 - (expression_objects[0] / 2.3 + 0.7 * expression_objects[1]) - (1.2 + expression_objects[2] - 0.1) * 0.6 + 0.8) actual_penalty_value, penalty_variables = ( expression_object.penalty_expression.evaluate(memoizer)) actual_constraint_value, constraint_variables = ( expression_object.constraint_expression.evaluate(memoizer)) # We need to explicitly create the variables before creating the wrapped # session. variables = deferred_tensor.DeferredVariableList(penalty_variables + constraint_variables) for variable in variables: variable.create(memoizer) # This is the same expression as above, applied directly to the python # floats. expected_penalty_value = ( 0.3 - (penalty_values[0] / 2.3 + 0.7 * penalty_values[1]) - (1.2 + penalty_values[2] - 0.1) * 0.6 + 0.8) expected_constraint_value = ( 0.3 - (constraint_values[0] / 2.3 + 0.7 * constraint_values[1]) - (1.2 + constraint_values[2] - 0.1) * 0.6 + 0.8) with self.wrapped_session() as session: self.assertNear(expected_penalty_value, session.run(actual_penalty_value(memoizer)), err=1e-6) self.assertNear(expected_constraint_value, session.run(actual_constraint_value(memoizer)), err=1e-6)
def create_dummy_expression(extra_constraints=None): """Creates an empty `Expression` with the given extra constraints.""" return expression.ConstrainedExpression( expression.ExplicitExpression( basic_expression.BasicExpression([]), basic_expression.BasicExpression([])), extra_constraints=extra_constraints)
def test_arithmetic(self): """Tests `Expression`'s arithmetic operators.""" denominator_lower_bound = 0.0 global_step = tf.Variable(0, dtype=tf.int32) evaluation_context = basic_expression.BasicExpression.EvaluationContext( denominator_lower_bound, global_step) penalty_values = [-3.6, 1.5, 0.4] constraint_values = [-0.2, -0.5, 2.3] # Create three expressions containing the constants in "penalty_values" in # their penalty_expressions, and "constraint_values" in their # constraint_expressions. expression_objects = [] for penalty_value, constraint_value in zip(penalty_values, constraint_values): expression_object = expression.Expression( basic_expression.BasicExpression([], tf.constant( penalty_value, dtype=tf.float32)), basic_expression.BasicExpression([], tf.constant(constraint_value))) expression_objects.append(expression_object) # This expression exercises all of the operators. expression_object = ( 0.3 - (expression_objects[0] / 2.3 + 0.7 * expression_objects[1]) - (1.2 + expression_objects[2] - 0.1) * 0.6 + 0.8) actual_penalty_value, _, _ = expression_object.penalty_expression.evaluate( evaluation_context) actual_constraint_value, _, _ = ( expression_object.constraint_expression.evaluate(evaluation_context)) # This is the same expression as above, applied directly to the python # floats. expected_penalty_value = ( 0.3 - (penalty_values[0] / 2.3 + 0.7 * penalty_values[1]) - (1.2 + penalty_values[2] - 0.1) * 0.6 + 0.8) expected_constraint_value = ( 0.3 - (constraint_values[0] / 2.3 + 0.7 * constraint_values[1]) - (1.2 + constraint_values[2] - 0.1) * 0.6 + 0.8) with self.session() as session: session.run( [tf.global_variables_initializer(), tf.local_variables_initializer()]) self.assertNear( expected_penalty_value, session.run(actual_penalty_value), err=1e-6) self.assertNear( expected_constraint_value, session.run(actual_constraint_value), err=1e-6)
def constant_expression(penalty_constant, constraint_constant=None): penalty_basic_expression = basic_expression.BasicExpression([ term.TensorTerm(tf.constant(penalty_constant, dtype=tf.float32)) ]) if constraint_constant is None: constraint_basic_expression = penalty_basic_expression else: constraint_basic_expression = basic_expression.BasicExpression( [ term.TensorTerm( tf.constant(constraint_constant, dtype=tf.float32)) ]) return expression.ExplicitExpression(penalty_basic_expression, constraint_basic_expression)
def constraint_expression(self): # See comment in penalty_expression. if self._scalar < 0: return (self._lower_bound * self._scalar).constraint_expression elif self._scalar > 0: return (self._upper_bound * self._scalar).constraint_expression return basic_expression.BasicExpression([])
def constraint_expression(self): # Notice that this summation will perform some simplification, since # BasicExpressions combine compatible Terms when added. result = basic_expression.BasicExpression([]) for subexpression in self._expressions: result += subexpression.constraint_expression return result
def lower_bound(expressions): """Creates an `Expression` lower bounding the given expressions. This function introduces a slack variable, and adds constraints forcing this variable to lower bound all elements of the given expression list. It then returns the slack variable. If you're going to be lower-bounding or maximizing the result of this function, then you can think of it as taking the `min` of its arguments. It's different from `min` if you're going to be upper-bounding or minimizing the result, however, since the consequence would be to decrease the value of the slack variable, without affecting the contents of the expressions list. Args: expressions: list of `Expression`s, the quantities to lower-bound. Returns: An `Expression` representing an lower bound on the given expressions. Raises: ValueError: if the expressions list is empty. TypeError: if the expressions list contains a non-`Expression`, or if any `Expression` has a different dtype. """ bound = _create_slack_variable(expressions, "lower_bound") bound_expression = basic_expression.BasicExpression(terms=[], tensor=bound) extra_constraints = set(ee >= bound for ee in set(expressions)) return expression.Expression( penalty_expression=bound_expression, constraint_expression=bound_expression, extra_constraints=extra_constraints)
def wrap_rate(penalty_tensor, constraint_tensor=None): """Creates an `Expression` representing the given `Tensor`(s). The reason an `Expression` contains two `BasicExpression`s is that the "penalty" `BasicExpression` will be differentiable, while the "constraint" `BasicExpression` need not be. During optimization, the former will be used whenever we need to take gradients, and the latter otherwise. Args: penalty_tensor: scalar `Tensor`, the quantity to store in the "penalty" portion of the result (and also the "constraint" portion, if constraint_tensor is not provided). constraint_tensor: scalar `Tensor`, the quantity to store in the "constraint" portion of the result. Returns: An `Expression` wrapping the given `Tensor`(s). Raises: TypeError: if wrap_rate() is called on an `Expression`. """ # Ideally, we'd check that "penalty_tensor" and "constraint_tensor" are scalar # Tensors, or are types that can be converted to a scalar Tensor. # Unfortunately, this includes a lot of possible types, so the easiest # solution would be to actually perform the conversion, and then check that # the resulting Tensor has only one element. This, however, would add a dummy # element to the Tensorflow graph, and wouldn't work for a Tensor with an # unknown size. Hence, we only check that "penalty_tensor" and # "constraint_tensor" are not types that we know for certain are disallowed: # objects internal to this library. if (isinstance(penalty_tensor, helpers.RateObject) or isinstance(constraint_tensor, helpers.RateObject)): raise TypeError( "you cannot wrap an object that has already been wrapped") penalty_basic_expression = basic_expression.BasicExpression([ term.TensorTerm(deferred_tensor.ExplicitDeferredTensor(penalty_tensor)) ]) if constraint_tensor is None: constraint_basic_expression = penalty_basic_expression else: constraint_basic_expression = basic_expression.BasicExpression([ term.TensorTerm( deferred_tensor.ExplicitDeferredTensor(constraint_tensor)) ]) return expression.ExplicitExpression(penalty_basic_expression, constraint_basic_expression)
def upper_bound(expressions): """Creates an `Expression` upper bounding the given expressions. This function introduces a slack variable, and adds constraints forcing this variable to upper bound all elements of the given expression list. It then returns the slack variable. If you're going to be upper-bounding or minimizing the result of this function, then you can think of it as taking the `max` of its arguments. You should *never* lower-bound or maximize the result, however, since the consequence would be to increase the value of the slack variable, without affecting the contents of the expressions list. Args: expressions: list of `Expression`s, the quantities to upper-bound. Returns: An `Expression` representing an upper bound on the given expressions. Raises: ValueError: if the expressions list is empty. TypeError: if the expressions list contains a non-`Expression`. """ if not expressions: raise ValueError( "upper_bound cannot be given an empty expression list") if not all(isinstance(ee, expression.Expression) for ee in expressions): raise TypeError( "upper_bound expects a list of rate Expressions (perhaps you need to " "call wrap_rate() to create an Expression from a Tensor?)") # Ideally the slack variable would have the same dtype as the predictions, but # we might not know their dtype (e.g. in eager mode), so instead we always use # float32 with auto_cast=True. bound = deferred_tensor.DeferredVariable(0.0, trainable=True, name="tfco_upper_bound", dtype=tf.float32, auto_cast=True) bound_basic_expression = basic_expression.BasicExpression( [term.TensorTerm(bound)]) bound_expression = expression.ExplicitExpression( penalty_expression=bound_basic_expression, constraint_expression=bound_basic_expression) extra_constraints = [ee <= bound_expression for ee in expressions] # We wrap the result in a BoundedExpression so that we'll check if the user # attempts to maximize of lower-bound the result of this function, and will # raise an error if they do. return expression.BoundedExpression( lower_bound=expression.InvalidExpression( "the result of a call to upper_bound() can only be minimized or " "upper-bounded; it *cannot* be maximized or lower-bounded"), upper_bound=expression.ConstrainedExpression( expression.ExplicitExpression( penalty_expression=bound_basic_expression, constraint_expression=bound_basic_expression), extra_constraints=extra_constraints))
def lower_bound(expressions): """Creates an `Expression` lower bounding the given expressions. This function introduces a slack variable, and adds constraints forcing this variable to lower bound all elements of the given expression list. It then returns the slack variable. If you're going to be lower-bounding or maximizing the result of this function, then you can think of it as taking the `min` of its arguments. You should *never* upper-bound or minimize the result, however, since the consequence would be to decrease the value of the slack variable, without affecting the contents of the expressions list. Args: expressions: list of `Expression`s, the quantities to lower-bound. Returns: An `Expression` representing an lower bound on the given expressions. Raises: ValueError: if the expressions list is empty. TypeError: if the expressions list contains a non-`Expression`. """ if not expressions: raise ValueError("lower_bound cannot be given an empty expression list") if not all(isinstance(ee, expression.Expression) for ee in expressions): raise TypeError( "lower_bound expects a list of rate Expressions (perhaps you need to " "call wrap_rate() to create an Expression from a Tensor?)") # Ideally the slack variable would have the same dtype as the predictions, but # we might not know their dtype (e.g. in eager mode), so instead we always use # float32 with auto_cast=True. bound = deferred_tensor.DeferredVariable( 0.0, trainable=True, name="tfco_lower_bound", dtype=tf.float32, auto_cast=True) bound_basic_expression = basic_expression.BasicExpression( terms=[], tensor=bound) bound_expression = expression.Expression( penalty_expression=bound_basic_expression, constraint_expression=bound_basic_expression, extra_variables=[bound]) extra_constraints = [ee >= bound_expression for ee in expressions] return expression.Expression( penalty_expression=bound_basic_expression, constraint_expression=bound_basic_expression, extra_variables=[bound], extra_constraints=extra_constraints)
def __add__(self, other): """Returns the result of adding two `Expression`s.""" if not isinstance(other, helpers.RateObject): # BasicExpressions do not support scalar addition, so we first need to # convert the scalar into an Expression. other_basic_expression = basic_expression.BasicExpression( [term.TensorTerm(other)]) other = ExplicitExpression(other_basic_expression, other_basic_expression) elif not isinstance(other, Expression): raise TypeError("Expression objects can only be added to each other, or " "scalars") return SumExpression([self, other])
def test_merging(self): """Checks that `BasicExpression`s merge compatible `Term`s.""" predictions = deferred_tensor.ExplicitDeferredTensor( tf.constant([1.0, -1.0, 0.5], dtype=tf.float32)) weights1 = deferred_tensor.ExplicitDeferredTensor(1.0) weights2 = deferred_tensor.ExplicitDeferredTensor( tf.constant([0.7, 0.3, 1.0], dtype=tf.float32)) numerator_predicate1 = predicate.Predicate(True) numerator_predicate2 = predicate.Predicate( tf.constant([True, False, False])) denominator_predicate1 = predicate.Predicate(True) denominator_predicate2 = predicate.Predicate( tf.constant([True, False, True])) # The two terms have the same predictions and loss, so they're compatible. term_object1 = term.BinaryClassificationTerm.ratio( 1.0, 0.0, predictions, weights1, numerator_predicate1, denominator_predicate1, loss.ZeroOneLoss()) term_object2 = term.BinaryClassificationTerm.ratio( 1.0, 0.0, predictions, weights2, numerator_predicate2, denominator_predicate2, loss.ZeroOneLoss()) self.assertEqual(term_object1.key, term_object2.key) expression_object1 = basic_expression.BasicExpression([term_object1]) self.assertEqual(1, len(expression_object1._terms)) expression_object2 = basic_expression.BasicExpression([term_object2]) self.assertEqual(1, len(expression_object2._terms)) # Check that __init__ correctly merges compatible terms. expression_object = basic_expression.BasicExpression( [term_object1, term_object2]) self.assertEqual(1, len(expression_object._terms)) # Check that __add__ correctly merges compatible terms. expression_object = expression_object1 + expression_object2 self.assertEqual(1, len(expression_object._terms)) # Check that __sub__ correctly merges compatible terms. expression_object = expression_object1 - expression_object2 self.assertEqual(1, len(expression_object._terms))
def penalty_expression(self): # When this is called, we will always be extracting the penalty expression # for minimization or upper-bounding. Hence, we'll use the lower bound if # the scalar is negative, and the upper bound otherwise. # # Notice that we scale the lower bound or upper bound Expression before # extracting the BasicExpression, instead of extracting the BasicExpression # and scaling it. The reason for this is that further BoundedExpressions # could be nested inside this one, and we need to be sure that their scalars # are up-to-date. if self._scalar < 0: return (self._lower_bound * self._scalar).penalty_expression elif self._scalar > 0: return (self._upper_bound * self._scalar).penalty_expression return basic_expression.BasicExpression([])
def create_dummy_expression(extra_variables=None): """Creates an empty `Expression` with the given extra variables.""" return expression.Expression(basic_expression.BasicExpression([]), basic_expression.BasicExpression([]), extra_variables=extra_variables)
def test_arithmetic(self): """Tests `BasicExpression`'s arithmetic operators.""" # We need a _RatioWeights evaluation context, instead of a BasicExpression # one, since we'll be evaluating _RatioWeights objects directly. denominator_lower_bound = 0.0 global_step = tf.Variable(0, dtype=tf.int32) evaluation_context = term._RatioWeights.EvaluationContext( denominator_lower_bound, global_step) positive_coefficients = np.array([1.0, 0.5, 0.0], dtype=np.float32) negative_coefficients = np.array([0.0, 0.5, 1.0], dtype=np.float32) losses = [loss.ZeroOneLoss(), loss.HingeLoss(), loss.ZeroOneLoss()] # The first and third terms will have the same losses (and everything else # except the coefficients, and will therefore be compatible. The second has # a different loss, and will be incompatible with the other two. dummy_predictions = tf.constant(0, dtype=tf.float32, shape=(1, )) dummy_weights = 1.0 true_predicate = helpers.Predicate(True) term_objects = [ term.BinaryClassificationTerm.ratio(positive_coefficients[ii], negative_coefficients[ii], dummy_predictions, dummy_weights, true_predicate, true_predicate, losses[ii]) for ii in xrange(3) ] expression_objects = [ basic_expression.BasicExpression([term_object]) for term_object in term_objects ] # This expression exercises all of the operators. expression_object = ( 0.3 - (expression_objects[0] / 2.3 + 0.7 * expression_objects[1]) + (1.2 + expression_objects[2] - 0.1) * 0.6 + 0.8) expected_constant = 0.3 + (1.2 - 0.1) * 0.6 + 0.8 coefficients = np.array([-1.0 / 2.3, -0.7, 0.6], dtype=np.float32) positive_coefficients *= coefficients negative_coefficients *= coefficients # The expected weights for the two zero-one terms will be merged, since # they're compatible. There is only one hinge term. expected_zero_one_positive_weights = (positive_coefficients[0] + positive_coefficients[2]) expected_zero_one_negative_weights = (negative_coefficients[0] + negative_coefficients[2]) expected_hinge_positive_weights = positive_coefficients[1] expected_hinge_negative_weights = negative_coefficients[1] # We should have two terms, since the two compatible terms will be merged. expression_terms = expression_object.terms self.assertEqual(2, len(expression_terms)) zero_one_term, hinge_term = expression_terms if zero_one_term.loss != loss.ZeroOneLoss(): zero_one_term, hinge_term = hinge_term, zero_one_term self.assertEqual(zero_one_term.loss, loss.ZeroOneLoss()) self.assertEqual(hinge_term.loss, loss.HingeLoss()) # The "tensor" stored in the expression is actually just a scalar, since # we used scalar constants when constructing it. actual_constant = expression_object.tensor self.assertAllClose(expected_constant, actual_constant, rtol=0, atol=1e-6) # Ignore the pre_train_ops---we'll just check the values of the weights. actual_zero_one_positive_weights, _, _ = ( zero_one_term.positive_ratio_weights.evaluate(evaluation_context)) actual_zero_one_negative_weights, _, _ = ( zero_one_term.negative_ratio_weights.evaluate(evaluation_context)) actual_hinge_positive_weights, _, _ = ( hinge_term.positive_ratio_weights.evaluate(evaluation_context)) actual_hinge_negative_weights, _, _ = ( hinge_term.negative_ratio_weights.evaluate(evaluation_context)) with self.session() as session: session.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) self.assertAllClose(np.array([expected_zero_one_positive_weights]), session.run(actual_zero_one_positive_weights), rtol=0, atol=1e-6) self.assertAllClose(np.array([expected_zero_one_negative_weights]), session.run(actual_zero_one_negative_weights), rtol=0, atol=1e-6) self.assertAllClose(np.array([expected_hinge_positive_weights]), session.run(actual_hinge_positive_weights), rtol=0, atol=1e-6) self.assertAllClose(np.array([expected_hinge_negative_weights]), session.run(actual_hinge_negative_weights), rtol=0, atol=1e-6)
def create_dummy_expression(value): """Creates an empty `Expression` with the given extra constraints.""" basic_expression_object = basic_expression.BasicExpression( [term.TensorTerm(value)]) return expression.ExplicitExpression(basic_expression_object, basic_expression_object)
def ratio_expression(positive_coefficient, negative_coefficient, loss_function): term_object = term.BinaryClassificationTerm.ratio( positive_coefficient, negative_coefficient, dummy_predictions, dummy_weights, true_predicate, true_predicate, loss_function) return basic_expression.BasicExpression([term_object])
def constant_expression(constant): return basic_expression.BasicExpression( [term.TensorTerm(tf.constant(constant, dtype=tf.float32))])
def _roc_auc(context, bins, lower_bound=False, upper_bound=False, penalty_loss=_DEFAULT_PENALTY_LOSS, constraint_loss=_DEFAULT_CONSTRAINT_LOSS): """Creates an `Expression` representing an approximate ROC AUC. The result of this function represents a Riemann approximation to the area under the ROC curve (false positive rate on the horizontal axis, true positive rate on the vertical axis), using the constraint-based method proposed by: > Eban, Schain, Mackey, Gordon, Rifkin and Elidan. "Scalable Learning of > Non-Decomposable Objectives". AISTATS 2017. If you're going to be lower-bounding or maximizing the result of this function, then need to set the lower_bound parameter to `True`. Likewise, if you're going to be upper-bounding or minimizing the result of this function (which normally wouldn't make much sense for ROC AUC), then the upper_bound parameter must be `True`. At least one of these parameters *must* be `True`, and it's permitted for both of them to be `True` (but we recommend against this, since it would result in equality constraints, which might cause problems during optimization and/or post-processing). Args: context: `SubsettableContext`, the block of data to use when calculating the rate. This context *must* contain labels. bins: positive integer, the number of "rectangles" to use for the Riemann approximation to ROC AUC. lower_bound: bool, `True` if you want the result of this function to lower-bound the approximate ROC AUC. upper_bound: bool, `True` if you want the result of this function to upper-bound the approximate ROC AUC. penalty_loss: `BinaryClassificationLoss`, the (differentiable) loss function to use when calculating the "penalty" approximation to the rate. constraint_loss: `BinaryClassificationLoss`, the (not necessarily differentiable) loss function to use when calculating the "constraint" approximation to the rate. This loss must be "normalized" (see `BinaryClassificationLoss.is_normalized`). Returns: An `Expression` representing a Riemann approximation to ROC AUC. Raises: TypeError: if the context is not a SubsettableContext, the number of bins is not an integer, or either loss is not a BinaryClassificationLoss. ValueError: if the context doesn't contain labels, the number of bins is nonpositive, both lower_bound and upper_bound are `False`, or the constraint_loss is not normalized. """ if not isinstance(context, subsettable_context.SubsettableContext): raise TypeError("context must be a SubsettableContext") raw_context = context.raw_context if (raw_context.penalty_labels is None or raw_context.constraint_labels is None): raise ValueError("roc_auc_lower_bound requires a context with labels") if not isinstance(bins, numbers.Integral): raise TypeError( "number of roc_auc_lower_bound bins must be an integer") if bins <= 0: raise ValueError("number of roc_auc_lower_bound bins must be strictly " "positive") # One could set both lower_bound and upper_bound to True, in which case the # result of this function could be treated as the Riemann approximation to ROC # AUC itself (instead of a {lower,upper} bound of it). However, this would # come with some drawbacks: it would of course make optimization more # difficult, but more importantly, it would potentially cause post-processing # for feasibility (e.g. using "shrinking") to fail to find a feasible # solution. if not (lower_bound or upper_bound): raise ValueError( "at least one of lower_bound or upper_bound must be True") if not (isinstance(penalty_loss, loss.BinaryClassificationLoss) and isinstance(constraint_loss, loss.BinaryClassificationLoss)): raise TypeError("penalty and constraint losses must be " "BinaryClassificationLosses") # For the constraints on the false positive rates to make sense, it would be # best to be using a normalized loss. The reason for this is that, if both # lower_bound and upper_bound are True (or if one imposes constraints # including separate lower and upper bounds), our constraints on the false # positive rates will be equality constraints, which could be infeasible for # an unnormalized loss. This could be changed to a warning, however. if not constraint_loss.is_normalized: raise ValueError( "roc_auc_lower_bound can only be used with a normalized " "constraint_loss (e.g. zero/one, sigmoid or ramp)") dtype = raw_context.penalty_predictions.dtype.real_dtype if dtype != raw_context.constraint_predictions.dtype.real_dtype: raise ValueError( "penalty and constraint predictions must have the same " "dtype") # We use a lambda to initialize the thresholds so that, if this function call # is inside the scope of a tf.control_dependencies() block, the dependencies # will not be applied to the initializer. thresholds = tf.Variable(lambda: tf.zeros((bins, )), dtype=dtype, name="roc_auc_thresholds") positive_context = context.subset(raw_context.penalty_labels > 0, raw_context.constraint_labels > 0) negative_context = context.subset(raw_context.penalty_labels <= 0, raw_context.constraint_labels <= 0) penalty_average_tpr_terms = [] constraint_average_tpr_terms = [] extra_constraints = set() for bin_index in xrange(bins): threshold = thresholds[bin_index] # It's tempting to wrap tf.stop_gradient() around the threshold, so that # only the model parameters (and not the thresholds) will be adjusted to # increase the average true positive rate. However, this would prevent the # one-sided constraint, as described below, from working, since we need # something to be "pushing against" the constraint. penalty_tpr_term = term.BinaryClassificationTerm.ratio( 1.0, 0.0, raw_context.penalty_predictions - threshold, raw_context.penalty_weights, positive_context.penalty_predicate, positive_context.penalty_predicate, penalty_loss) constraint_tpr_term = term.BinaryClassificationTerm.ratio( 1.0, 0.0, raw_context.constraint_predictions - threshold, raw_context.constraint_weights, positive_context.constraint_predicate, positive_context.constraint_predicate, constraint_loss) penalty_average_tpr_terms.append(penalty_tpr_term / bins) constraint_average_tpr_terms.append(constraint_tpr_term / bins) # We wrap tf.stop_gradient() around the predictions because we want to # adjust the thresholds, and only the thresholds, to satisfy the false # positive rate constraints. penalty_fpr_term = term.BinaryClassificationTerm.ratio( 1.0, 0.0, tf.stop_gradient(raw_context.penalty_predictions) - threshold, raw_context.penalty_weights, negative_context.penalty_predicate, negative_context.penalty_predicate, penalty_loss) constraint_fpr_term = term.BinaryClassificationTerm.ratio( 1.0, 0.0, tf.stop_gradient(raw_context.constraint_predictions) - threshold, raw_context.constraint_weights, negative_context.constraint_predicate, negative_context.constraint_predicate, constraint_loss) fpr_expression = expression.Expression( basic_expression.BasicExpression([penalty_fpr_term]), basic_expression.BasicExpression([constraint_fpr_term])) target_fpr = (bin_index + 0.5) / bins # Ideally fpr_expression would equal target_fpr, but we prefer to only # impose a one-sided constraint (when exactly one of lower_bound or # upper_bound is True) since using an equality constraint would come with # drawbacks: it would of course make optimization more difficult, but more # importantly, it would potentially cause post-processing for feasibility # (e.g. using "shrinking") to fail to find a feasible solution. # # The reason why a <= constraint results in a lower bound, and a >= # constraint results in an upper bound, is that, in the lower-bound case # (the upper-bound case is similar), adjusting the threshold to increase the # FPR will increase the corresponding TPR, and therefore the ROC AUC # estimate. In other words, the objective (increasing ROC AUC, and therefore # the FPR of each bin) will be "pushing against" the constraint. if lower_bound: extra_constraints.add(fpr_expression <= target_fpr) if upper_bound: extra_constraints.add(fpr_expression >= target_fpr) return expression.Expression( basic_expression.BasicExpression(penalty_average_tpr_terms), basic_expression.BasicExpression(constraint_average_tpr_terms), extra_constraints)
def _binary_classification_rate(positive_coefficient=0.0, negative_coefficient=0.0, numerator_context=None, denominator_context=None, penalty_loss=_DEFAULT_PENALTY_LOSS, constraint_loss=_DEFAULT_CONSTRAINT_LOSS): """Creates an `Expression` representing positive and negative rates. The result of this function represents: total_rate := (positive_coefficient * positive_rate + negative_coefficient * negative_rate) where: positive_rate := sum_i{w_i * c_i * d_i * 1{z_i > 0}} / sum_i{w_i * d_i} negative_rate := sum_i{w_i * c_i * d_i * 1{z_i <= 0}} / sum_i{w_i * d_i} where z_i and w_i are the given predictions and weights, and c_i and d_i are indicators for which examples to include the numerator and denominator (all four of z, w, c and d are in the contexts). The resulting `Expression` contains *two* different approximations to "total_rate". The "penalty" `BasicExpression` is an approximation to using penalty_loss, while the "constraint" `BasicExpression` is based on constraint_loss (if constraint_loss is the zero-one loss, which is the default, then the "constraint" expression will be exactly total_rate as defined above, with no approximation). The reason an `Expression` contains two `BasicExpression`s is that the "penalty" `BasicExpression` will be differentiable, while the "constraint" `BasicExpression` need not be. During optimization, the former will be used whenever we need to take gradients, and the latter otherwise. Args: positive_coefficient: float, scalar coefficient on the positive prediction rate. negative_coefficient: float, scalar coefficient on the negative prediction rate. numerator_context: `SubsettableContext`, the block of data to use when calculating the numerators of the rates. denominator_context: `SubsettableContext`, the block of data to use when calculating the denominators of the rates. penalty_loss: `BinaryClassificationLoss`, the (differentiable) loss function to use when calculating the "penalty" approximation to the rates. constraint_loss: `BinaryClassificationLoss`, the (not necessarily differentiable) loss function to use when calculating the "constraint" approximation to the rates. Returns: An `Expression` representing total_rate (as defined above). Raises: TypeError: if either context is not a SubsettableContext, or either loss is not a BinaryClassificationLoss. ValueError: if either context is not provided, or the two contexts are incompatible (have different predictions, labels or weights). """ if numerator_context is None or denominator_context is None: raise ValueError("both numerator_context and denominator_context must be " "provided") if not ( isinstance(numerator_context, subsettable_context.SubsettableContext) and isinstance(denominator_context, subsettable_context.SubsettableContext)): raise TypeError("numerator and denominator contexts must be " "SubsettableContexts") raw_context = numerator_context.raw_context if denominator_context.raw_context != raw_context: raise ValueError("numerator and denominator contexts must be compatible") if not (isinstance(penalty_loss, loss.BinaryClassificationLoss) and isinstance(constraint_loss, loss.BinaryClassificationLoss)): raise TypeError("penalty and constraint losses must be " "BinaryClassificationLosses") penalty_term = term.BinaryClassificationTerm.ratio( positive_coefficient, negative_coefficient, raw_context.penalty_predictions, raw_context.penalty_weights, numerator_context.penalty_predicate, denominator_context.penalty_predicate, penalty_loss) constraint_term = term.BinaryClassificationTerm.ratio( positive_coefficient, negative_coefficient, raw_context.constraint_predictions, raw_context.constraint_weights, numerator_context.constraint_predicate, denominator_context.constraint_predicate, constraint_loss) return expression.Expression( basic_expression.BasicExpression([penalty_term]), basic_expression.BasicExpression([constraint_term]))