def _calibrator_wrinkle(output_keypoints, l1_reg=None, l2_reg=None, name='calibrator_wrinkle'): """Returns a calibrator wrinkle regularization. A calibrator wrinkle regularization (change in second derivative) = l1_reg * ||third_derivative||_1 + l2_reg * ||third_derivative||_2^2 where third_derivative is: +3 * output_keypoints[1:end-2] -3 * output_keypoints[2:end-1] - output_keypoints[0:end-3] + output_keypoints[3:end]. This regularizer is zero when the output_keypoints form a 2nd order polynomial of the index (and not necessarily in input values, e.g. when using non-uniform input keypoints). Args: output_keypoints: (Rank-1 tensor with shape [num_keypoints]) 1d calibrator's output keypoints tensor. l1_reg: (float) l1 regularization amount. l2_reg: (float) l2 regularization amount. name: name scope of calibrator wrinkle regularizer. Returns: A rank-0 tensor (scalar) that contains regularizer or None if there is no regularization. This can happen if l1_reg and l2_reg amounts are not set, or num_keypoints <= 3. Raises: ValueError: * If output_keypoints is not rank-1 tensor. * If the shape of output_keypoints is unknown. """ dims = output_keypoints.shape.as_list() if len(dims) != 1: raise ValueError('calibrator_wrinkle expects output_keypoints as a ' 'rank-1 tensor but got shape: %s' % dims) num_kpts = dims[0] if num_kpts is None: raise ValueError( 'calibrator_wrinkle expects output_keypoints dimension ' 'to be known, but the first dimension is not set.') if num_kpts < 4 or (l1_reg is None and l2_reg is None): return None reg = None with tf.name_scope(name): third_drv = (3 * tf.slice(output_keypoints, [1], [num_kpts - 3]) - 3 * tf.slice(output_keypoints, [2], [num_kpts - 3]) - tf.slice(output_keypoints, [0], [num_kpts - 3]) + tf.slice(output_keypoints, [3], [num_kpts - 3])) if l1_reg: reg = tools.add_if_not_none( reg, l1_reg * tf.reduce_sum(tf.abs(third_drv))) if l2_reg: reg = tools.add_if_not_none( reg, l2_reg * tf.reduce_sum(tf.square(third_drv))) return reg
def _calibrator_hessian(output_keypoints, l1_reg=None, l2_reg=None, name='calibrator_hessian'): """Returns a calibrator hessian regularization. A calibrator hessian regularization (change in slope) = l1_reg * ||nonlinearity||_1 + l2_reg * ||nonlinearity||_2^2 where nonlinearity is: 2 * output_keypoints[1:end-1] - output_keypoints[0:end-2] - output_keypoints[2:end]. This regularizer is zero when the output_keypoints form a linear function of the index (and not necessarily linear in input values, e.g. when using non-uniform input keypoints). Args: output_keypoints: (Rank-1 tensor with shape [num_keypoints]) 1d calibrator's output keypoints tensor. l1_reg: (float) l1 regularization amount. l2_reg: (float) l2 regularization amount. name: name scope of calibrator hessian regularizer. Returns: A rank-0 tensor (scalar) that contains regularizer or None if there is no regularization. This can happen if l1_reg and l2_reg amounts are not set, or num_keypoints <= 2. Raises: ValueError: * If output_keypoints is not rank-1 tensor. * If the shape of output_keypoints is unknown. """ dims = output_keypoints.shape.as_list() if len(dims) != 1: raise ValueError('calibrator_hessian expects output_keypoints as a ' 'rank-1 tensor but got shape: %s' % dims) num_kpts = dims[0] if num_kpts is None: raise ValueError( 'calibrator_hessian expects output_keypoints dimension ' 'to be known, but the first dimension is not set.') if num_kpts < 3 or (l1_reg is None and l2_reg is None): return None reg = None with tf.name_scope(name): slope_diff = (2 * tf.slice(output_keypoints, [1], [num_kpts - 2]) - tf.slice(output_keypoints, [0], [num_kpts - 2]) - tf.slice(output_keypoints, [2], [num_kpts - 2])) if l1_reg: reg = tools.add_if_not_none( reg, l1_reg * tf.reduce_sum(tf.abs(slope_diff))) if l2_reg: reg = tools.add_if_not_none( reg, l2_reg * tf.reduce_sum(tf.square(slope_diff))) return reg
def lattice_regularization(lattice_params, lattice_sizes, l1_reg=None, l2_reg=None, l1_torsion_reg=None, l2_torsion_reg=None, l1_laplacian_reg=None, l2_laplacian_reg=None, name='lattice_regularization'): """Returns a lattice regularization op. Args: lattice_params: (Rank-2 tensor with shape [output_dim, param_dim]) Lattice parameter tensor. lattice_sizes: (list of integers) lattice size of each dimension. l1_reg: (float) l1 regularization amount. l2_reg: (float) l2 regularization amount. l1_torsion_reg: (float) l1 torsion regularization amount. l2_torsion_reg: (float) l2 torsion regularization amount. l1_laplacian_reg: (list of floats or float) list of L1 Laplacian regularization amount per each dimension. If a single float value is provided, then all diemnsion will get the same value. l2_laplacian_reg: (list of floats or float) list of L2 Laplacian regularization amount per each dimension. If a single float value is provided, then all diemnsion will get the same value. name: name scope of lattice regularization. Returns: Rank-0 tensor (scalar) that contains lattice regularization. Raises: ValueError: * lattice_param is not rank-2 tensor. * output_dim or param_dim is unknown. """ with ops.name_scope(name): reg = _lattice_laplacian(lattice_params, lattice_sizes, l1_reg=l1_laplacian_reg, l2_reg=l2_laplacian_reg) reg = tools.add_if_not_none( reg, _lattice_torsion(lattice_params, lattice_sizes, l1_reg=l1_torsion_reg, l2_reg=l2_torsion_reg)) if l1_reg: reg = tools.add_if_not_none( reg, l1_reg * math_ops.reduce_sum( math_ops.reduce_sum(math_ops.abs(lattice_params)))) if l2_reg: reg = tools.add_if_not_none( reg, l2_reg * math_ops.reduce_sum( math_ops.reduce_sum(math_ops.square(lattice_params)))) return reg
def calibrator_regularization(output_keypoints, l1_reg=None, l2_reg=None, l1_laplacian_reg=None, l2_laplacian_reg=None, l1_hessian_reg=None, l2_hessian_reg=None, l1_wrinkle_reg=None, l2_wrinkle_reg=None, name='calibrator_regularization'): """Returns a calibrator regularization op. Args: output_keypoints: (Rank-1 tensor with shape [num_keypoints]) 1d calibrator's output keypoints tensor. l1_reg: (float) l1 regularization amount. l2_reg: (float) l2 regularization amount. l1_laplacian_reg: (float) l1 Laplacian regularization amount. l2_laplacian_reg: (float) l2 Laplacian regularization amount. l1_hessian_reg: (float) l1 Hessian regularization amount. l2_hessian_reg: (float) l2 Hessian regularization amount. l1_wrinkle_reg: (float) l1 Wrinkle regularization amount. l2_wrinkle_reg: (float) l2 Wrinkle regularization amount. name: name scope of calibrator regularization. Returns: Rank-0 tensor (scalar) that contains calibrator regularization. Raises: ValueError: * If output_keypoints is not rank-1 tensor. * If the shape of output_keypoints is unknown. """ with tf.name_scope(name): reg = _calibrator_laplacian(output_keypoints, l1_reg=l1_laplacian_reg, l2_reg=l2_laplacian_reg) reg = tools.add_if_not_none( reg, _calibrator_hessian(output_keypoints, l1_reg=l1_hessian_reg, l2_reg=l2_hessian_reg)) reg = tools.add_if_not_none( reg, _calibrator_wrinkle(output_keypoints, l1_reg=l1_wrinkle_reg, l2_reg=l2_wrinkle_reg)) if l1_reg: reg = tools.add_if_not_none( reg, l1_reg * tf.reduce_sum(tf.abs(output_keypoints))) if l2_reg: reg = tools.add_if_not_none( reg, l2_reg * tf.reduce_sum(tf.square(output_keypoints))) return reg
def _calibrator_laplacian(output_keypoints, l1_reg=None, l2_reg=None, name='calibrator_laplacian'): """Returns a calibrator laplacian regularization. A calibrator laplacian regularization = l1_reg * ||output_keypoints[1:end] - output_keypoints[0:end-1]||_1 + l2_reg * ||output_keypoints[1:end] - output_keypoints[0:end-1]||_2^2 Args: output_keypoints: (Rank-1 tensor with shape [num_keypoints]) 1d calibrator's output keypoints tensor. l1_reg: (float) l1 regularization amount. l2_reg: (float) l2 regularization amount. name: name scope of calibrator laplacian regularizer. Returns: A rank-0 tensor (scalar) that contains regularizer or None if there is no regularization. This can happen if l1_reg and l2_reg amounts are not set, or num_keypoints <= 1. Raises: ValueError: * If output_keypoints is not rank-1 tensor. * If the shape of output_keypoints is unknown. """ dims = output_keypoints.shape.as_list() if len(dims) != 1: raise ValueError('calibrator_laplacian expects output_keypoints as a ' 'rank-1 tensor but got shape: %s' % dims) num_kpts = dims[0] if num_kpts is None: raise ValueError( 'calibrator_laplacian expects output_keypoints dimension ' 'to be known, but the first dimension is not set.') if num_kpts <= 1 or (l1_reg is None and l2_reg is None): return None reg = None with tf.name_scope(name): diff = (tf.slice(output_keypoints, [1], [num_kpts - 1]) - tf.slice(output_keypoints, [0], [num_kpts - 1])) if l1_reg: reg = tools.add_if_not_none(reg, l1_reg * tf.reduce_sum(tf.abs(diff))) if l2_reg: reg = tools.add_if_not_none( reg, l2_reg * tf.reduce_sum(tf.square(diff))) return reg
def _embedded_lattices(calibrated_input_tensor, input_dim, output_dim, interpolation_type, monotonic_num_lattices, monotonic_lattice_rank, monotonic_lattice_size, non_monotonic_num_lattices, non_monotonic_lattice_rank, non_monotonic_lattice_size, linear_embedding_calibration_min, linear_embedding_calibration_max, linear_embedding_calibration_num_keypoints, is_monotone=None, lattice_l1_reg=None, lattice_l2_reg=None, lattice_l1_torsion_reg=None, lattice_l2_torsion_reg=None, lattice_l1_laplacian_reg=None, lattice_l2_laplacian_reg=None): """Creates an ensemble of lattices with a linear embedding. This function constructs the following deep lattice network: calibrated_input -> linear_embedding -> calibration -> ensemble of lattices. Then ensemble of lattices' output are averaged and bias term is added to make a final prediction. ensemble of lattices is consists of two parts: monotonic lattices and non-monotonic lattices. The input to the monotonic lattices is an output of linear_embedding that contains both monotonic and non-monotonic calibrated_input. All inputs to the monotonic lattices are set to be monotonic to preserve end-to-end monotonicity in the monotonic feature. The input to the non-monotonic lattices is an output of linear_embedding that only contains non-monotonic calibrated_input. All inputs to the non-monotonic lattices are set to be non-monotonic, since we do not need to guarantee monotonicity. Args: calibrated_input_tensor: [batch_size, input_dim] tensor. input_dim: (int) input dimnension. output_dim: (int) output dimension. interpolation_type: defines whether the lattice will interpolate using the full hypercube or only the simplex ("hyper-triangle") around the point being evaluated. Valid values: 'hypercube' or 'simplex' monotonic_num_lattices: (int) number of monotonic lattices in the ensemble lattices layer. monotonic_lattice_rank: (int) number of inputs to each monotonic lattice in the ensemble lattices layer. monotonic_lattice_size: (int) lattice cell size for each monotonic lattice in the ensemble lattices layer. non_monotonic_num_lattices: (int) number of non monotonic lattices in the ensemble lattices layer. non_monotonic_lattice_rank: (int) number of inputs to each non monotonic lattice in the ensemble lattices layer. non_monotonic_lattice_size: (int) lattice cell size for each non monotonic lattice in the ensemble lattices layer. linear_embedding_calibration_min: (float) a minimum input keypoints value for linear_embedding calibration. linear_embedding_calibration_max: (float) a maximum input keypoints value for linear_embedding calibration. linear_embedding_calibration_num_keypoints: (int) a number of eypoints for linear_embedding calibration. is_monotone: (bool, list of booleans) is_monotone[k] == true then calibrated_input_tensor[:, k] is considered to be a monotonic input. lattice_l1_reg: (float) lattice l1 regularization amount. lattice_l2_reg: (float) lattice l2 regularization amount. lattice_l1_torsion_reg: (float) lattice l1 torsion regularization amount. lattice_l2_torsion_reg: (float) lattice l2 torsion regularization amount. lattice_l1_laplacian_reg: (float) lattice l1 laplacian regularization amount. lattice_l2_laplacian_reg: (float) lattice l2 laplacian regularization amount. Returns: A tuple of (output_tensor, projection_ops, regularization). Raises: ValueError: If there is no non-monotonic inputs but non_monotonic_num_lattices is not zero. """ projections = [] regularization = None # Explictly assign number of lattices to zero for any empty cases. if not monotonic_num_lattices: monotonic_num_lattices = 0 if not non_monotonic_num_lattices: non_monotonic_num_lattices = 0 # Step 1. Create a linear embedding. if monotonic_num_lattices: monotonic_embedding_dim = monotonic_num_lattices * monotonic_lattice_rank else: monotonic_num_lattices = 0 monotonic_embedding_dim = 0 if non_monotonic_num_lattices: non_monotonic_embedding_dim = (non_monotonic_num_lattices * non_monotonic_lattice_rank) else: non_monotonic_num_lattices = 0 non_monotonic_embedding_dim = 0 if is_monotone is not None: is_monotone = tools.cast_to_list(is_monotone, input_dim, 'is_monotone') with variable_scope.variable_scope('linear_embedding'): packed_results = monotone_linear_layers.split_monotone_linear_layer( calibrated_input_tensor, input_dim, monotonic_embedding_dim, non_monotonic_embedding_dim, is_monotone=is_monotone) (monotonic_output, _, non_monotonic_output, _, proj, _) = packed_results if proj is not None: projections.append(proj) # Step 2. Create ensemble of monotonic lattices. if monotonic_num_lattices == 0: m_lattice_outputs = None else: with variable_scope.variable_scope('monotonic_lattices'): m_lattice_outputs, projs, reg = _ensemble_lattices_layer( monotonic_output, monotonic_embedding_dim, output_dim, interpolation_type, linear_embedding_calibration_min, linear_embedding_calibration_max, linear_embedding_calibration_num_keypoints, monotonic_num_lattices, monotonic_lattice_rank, monotonic_lattice_size, is_monotone=True, l1_reg=lattice_l1_reg, l2_reg=lattice_l2_reg, l1_torsion_reg=lattice_l1_torsion_reg, l2_torsion_reg=lattice_l2_torsion_reg, l1_laplacian_reg=lattice_l1_laplacian_reg, l2_laplacian_reg=lattice_l2_laplacian_reg) if projs: projections += projs regularization = tools.add_if_not_none(regularization, reg) # Step 3. Construct non-monotonic ensembles. if non_monotonic_output is None and non_monotonic_num_lattices > 0: raise ValueError( 'All input signals are monotonic but the number of non monotonic ' 'lattices is not zero.') if non_monotonic_num_lattices == 0: n_lattice_outputs = None else: with variable_scope.variable_scope('non_monotonic_lattices'): n_lattice_outputs, projs, reg = _ensemble_lattices_layer( non_monotonic_output, non_monotonic_embedding_dim, output_dim, interpolation_type, linear_embedding_calibration_min, linear_embedding_calibration_max, linear_embedding_calibration_num_keypoints, non_monotonic_num_lattices, non_monotonic_lattice_rank, non_monotonic_lattice_size, is_monotone=False, l1_reg=lattice_l1_reg, l2_reg=lattice_l2_reg, l1_torsion_reg=lattice_l1_torsion_reg, l2_torsion_reg=lattice_l2_torsion_reg, l1_laplacian_reg=lattice_l1_laplacian_reg, l2_laplacian_reg=lattice_l2_laplacian_reg) if projs: projections += projs regularization = tools.add_if_not_none(regularization, reg) # Step 4. Take average to make a final prediction. with variable_scope.variable_scope('ensemble_average'): output = variable_scope.get_variable( name='ensemble_bias', initializer=[0.0] * output_dim, dtype=calibrated_input_tensor.dtype) if m_lattice_outputs: output += math_ops.divide(math_ops.add_n(m_lattice_outputs), monotonic_num_lattices) if n_lattice_outputs is not None: output += math_ops.divide(math_ops.add_n(n_lattice_outputs), non_monotonic_num_lattices) return (output, projections, regularization)
def split_monotone_linear_layer(input_tensor, input_dim, monotonic_output_dim, non_monotonic_output_dim, is_monotone=None, init_weight_mean=2.0, init_weight_stddev=0.5, init_bias=None, l1_reg=None, l2_reg=None): """Creates a split monotonic linear embedding layer. Returns outputs of partially monotonic linear embedding layers, weights in the linear embedding layers, projection ops and regularizers. This function splits monotonic and non-monotonic input based on is_monotone, and creates two separate linear embedding in the following form: monotonic_output = monotonic_input * monotonic_weight + non-monotonic_input * nm_weight + bias non_monotonic_output = non-monotonic_input * nn_weight + bias where monotonic_weight has to be non-negative. All elements in monotonic_output should be treated as a monotonic signal, otherwise there would be no monotonicity guarantee. Weights are initialized as in monotone_linear_layer. Args: input_tensor: [batch_size, input_dim] tensor. input_dim: (int) input dimension. monotonic_output_dim: (int) monotonic_output's dimension. non_monotonic_output_dim: (int) non_monotonic_output's dimension. is_monotone: A list of input_dim booleans, or None. If None, all inputs are set to be non-monotonic. In a boolean list case, the input_tensor[:, k] is set to be monotonic input if is_monotone[k] == True. init_weight_mean: (float) A mean for Normal random weight initializer. init_weight_stddev: (float) A standard deviation for Normal random weight initializer. init_bias: (float) initial bias. If not provided, -1/2 * init_weight_mean * input_dim is used. l1_reg: (float) amount of l1 regularization. l2_reg: (float) amount of l2 regularization. Returns: A tuple of: * monotonic_output tensor of shape [batch_size, monotonic_output_dim] or None if monotonic_outpu_dim == 0. * monotonic output's weight tensor of shape [input_dim, monotonic_output_dim] or None if monotonic_outpu_dim == 0. * non_monotonic_output tensor of shape [batch_size, non_monotonic_output_dim] or None if non_monotonic_output_dim == 0. * non_monotonic_output's weight tensor of shape [non_monotonic_input_dim, non_monotonic_output_dim] or None if non_monotonic_output_dim == 0. * None or projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonicity. * None or a regularization loss, if regularization is configured. Raises: ValueError: * If is_monotone is not None nor a list. * is_monotone is a list but its length != input_dim. * All values is_monotone is True, but non_monotonic_output_dim is not 0. """ monotonic_output = None m_weight = None non_monotonic_output = None n_weight = None projections = [] regularization = None if monotonic_output_dim > 0: with variable_scope.variable_scope('split_monotone'): packed_results = monotone_linear_layer( input_tensor, input_dim=input_dim, output_dim=monotonic_output_dim, is_monotone=is_monotone, init_weight_mean=init_weight_mean, init_weight_stddev=init_weight_stddev, init_bias=init_bias, l1_reg=l1_reg, l2_reg=l2_reg) (monotonic_output, m_weight, projection, regularizer) = packed_results projections.append(projection) regularization = tools.add_if_not_none(regularization, regularizer) if non_monotonic_output_dim > 0: with variable_scope.variable_scope('split_non_monotone'): # Construct non_monotone_input_tensor. if is_monotone is None: non_monotone_input_tensor = input_tensor else: if not isinstance(is_monotone, list): raise ValueError('is_monotone should be None or a list of booleans') if len(is_monotone) != input_dim: raise ValueError('input_dim (%d) != is_monotone length (%d)' % (input_dim, len(is_monotone))) input_columns = array_ops.unstack(input_tensor, axis=1) non_monotone_columns = [] for (monotone, input_column) in zip(is_monotone, input_columns): if not monotone: non_monotone_columns.append(input_column) if not non_monotone_columns: raise ValueError( 'non_monotonic_output_dim is not None nor zero, but all inputs ' 'are required to be non-monotonic.') non_monotone_input_tensor = array_ops.stack( non_monotone_columns, axis=1) # Create a linear embedding. packed_results = monotone_linear_layer( non_monotone_input_tensor, input_dim=len(non_monotone_columns), output_dim=non_monotonic_output_dim, is_monotone=None, init_weight_mean=init_weight_mean, init_weight_stddev=init_weight_stddev, init_bias=init_bias, l1_reg=l1_reg, l2_reg=l2_reg) (non_monotonic_output, n_weight, _, regularizer) = packed_results regularization = tools.add_if_not_none(regularization, regularizer) return (monotonic_output, m_weight, non_monotonic_output, n_weight, projections, regularization)
def calibration_layer(uncalibrated_tensor, num_keypoints, keypoints_initializers=None, keypoints_initializer_fns=None, bound=False, monotonic=None, missing_input_values=None, missing_output_values=None, name=None, **regularizer_amounts): """Creates a calibration layer for uncalibrated values. Returns a calibrated tensor of the same shape as the uncalibrated continuous signals passed in, and a list of projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonicity -- the list will be empty if bound and monotonic are not set. Args: uncalibrated_tensor: Tensor of shape [batch_size, ...] with uncalibrated values. num_keypoints: Number of keypoints to use. Either a scalar value that will be used for every uncalibrated signal, or a list of n values, per uncalibrated signal -- uncalibrated is first flattened ( see tf.contrib.layers.flatten) to [batch_size, n], and there should be one value in the list per n. If a value of the list is 0 or None the correspondent signal won't be calibrated. keypoints_initializers: For evaluation or inference (or when resuming training from a checkpoint) the values will be loaded from disk, so they don't need to be given (leave it as None). Otherwise provide either a tuple of two tensors of shape [num_keypoints], or a list of n pairs of tensors, each of shape [num_keypoints]. In this list there should be one pair per uncalibrated signal, just like num_keypoints above. Notice that num_keypoints can be different per signal. keypoints_initializer_fns: Like keypoints_initializers but using lambda initializers. They should be compatible with tf.get_variable. If this is set, then keypoints_initializers must be None. bound: boolean whether output of calibration must be bound. Alternatively a list of n booleans, one per uncalibrated value, like num_keypoints above. monotonic: whether calibration is monotonic: None or 0 means no monotonicity. Positive or negative values mean increasing or decreasing monotonicity respectively. Alternatively a list of n monotonic values, one per uncalibrated value, like num_keypoints above. missing_input_values: If set, and if the input has this value it is assumed to be missing and the output will either be calibrated to some value between `[calibration_output_min, calibration_output_max]` or set to a fixed value set by missing_output_value. Limitation: it only works for scalars. Either one value for all inputs, or a list with one value per uncalibrated value. missing_output_values: Requires missing_input_value also to be set. If set if will convert missing input to this value. Either one value for all outputs, or a list with one value per uncalibrated value. name: Name scope for operations. **regularizer_amounts: Keyword args of regularization amounts passed to regularizers.calibrator_regularization(). Keyword names should be among supported regularizers.CALIBRATOR_REGULARIZERS and values should be either float or list of floats. If float, then same value is applied to all input signals. Returns: A tuple of: * calibrated tensor of shape [batch_size, ...], the same shape as uncalibrated. * list of projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonicity. Empty if none are requested. * None or tensor with regularization loss. Raises: ValueError: If dimensions don't match. """ with ops.name_scope(name or 'calibration_layer'): # Flattening uncalibrated tensor [batch_Size, k1, k2, ..., kn] to # [batch_size, k1 * k2 * ... * kn]. uncalibrated_shape = uncalibrated_tensor.get_shape().as_list() n = 1 for non_batch_dim in uncalibrated_shape[1:]: n *= non_batch_dim flat_uncalibrated = array_ops.reshape(uncalibrated_tensor, shape=[-1, n], name='flat_uncalibrated') num_keypoints = tools.cast_to_list(num_keypoints, n, 'num_keypoints') keypoints_initializers = tools.cast_to_list(keypoints_initializers, n, 'keypoints_initializers') keypoints_initializer_fns = tools.cast_to_list( keypoints_initializer_fns, n, 'keypoints_initializer_fns') bound = tools.cast_to_list(bound, n, 'bound') monotonic = tools.cast_to_list(monotonic, n, 'monotonic') missing_input_values = tools.cast_to_list(missing_input_values, n, 'missing_input_values') missing_output_values = tools.cast_to_list(missing_output_values, n, 'missing_output_values') regularizer_amounts = { regularizer_name: tools.cast_to_list(regularizer_amounts[regularizer_name], n, regularizer_name) for regularizer_name in regularizer_amounts } signal_names = ['signal_%d' % ii for ii in range(n)] uncalibrated_splits = array_ops.unstack(flat_uncalibrated, axis=1) calibrated_splits = [] projection_ops = [] total_regularization = None for ii in range(n): if not num_keypoints[ii]: # No calibration for this signal. calibrated_splits += [uncalibrated_splits[ii]] else: signal_regularizer_amounts = { regularizer_name: regularizer_amounts[regularizer_name][ii] for regularizer_name in regularizer_amounts } calibrated, projection, reg = one_dimensional_calibration_layer( uncalibrated_splits[ii], num_keypoints[ii], signal_name=signal_names[ii], keypoints_initializers=keypoints_initializers[ii], keypoints_initializer_fns=keypoints_initializer_fns[ii], bound=bound[ii], monotonic=monotonic[ii], missing_input_value=missing_input_values[ii], missing_output_value=missing_output_values[ii], **signal_regularizer_amounts) calibrated_splits += [calibrated] if projection is not None: projection_ops += [projection] total_regularization = tools.add_if_not_none( total_regularization, reg) flat_calibrated = array_ops.stack(calibrated_splits, axis=1, name='stack_calibrated') reshaped_calibrated = array_ops.reshape( flat_calibrated, shape=array_ops.shape(uncalibrated_tensor), name='reshape_calibrated') return reshaped_calibrated, projection_ops, total_regularization
def input_calibration_layer(columns_to_tensors, num_keypoints, feature_columns=None, keypoints_initializers=None, keypoints_initializer_fns=None, bound=False, monotonic=None, missing_input_values=None, missing_output_values=None, dtype=dtypes.float32, **regularizer_amounts): """Creates a calibration layer for the given input and feature_columns. Returns a tensor with the calibrated values of the given features, a list of the names of the features in the order they feature in the returned, and a list of projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonic -- the list will be empty if bound and monotonic are not set. Args: columns_to_tensors: A mapping from feature name to tensors. 'string' key means a base feature (not-transformed). If feature_columns is not set these are the features calibrated. Otherwise the transformed feature_columns are the ones calibrated. num_keypoints: Number of keypoints to use. Either a single int, or a dict mapping feature names to num_keypoints. If a value of the dict is 0 or None the correspondent feature won't be calibrated. feature_columns: Optional. If set to a set of FeatureColumns, these will be the features used and calibrated. keypoints_initializers: For evaluation or inference (or when resuming training from a checkpoint) the values will be loaded from disk, so they don't need to be given (leave it as None). Either a tuple of two tensors of shape [num_keypoints], or a dict mapping feature names to pair of tensors of shape [num_keypoints[feature_name]]. See load_keypoints_from_quantiles or uniform_keypoints_for_signal on how to generate these (module keypoints_initialization). keypoints_initializer_fns: Like keypoints_initializers but using lambda initializers. They should be compatible with tf.get_variable. If this is set, then keypoints_initializers must be None. bound: boolean whether output of calibration must be bound. Alternatively a dict mapping feature name to boundness. monotonic: whether calibration has to be kept monotonic: None or 0 means no monotonic. Positive or negative values mean increasing or decreasing monotonic respectively. Alternatively a dict mapping feature name to monotonic. missing_input_values: If set, and if the input has this value it is assumed to be missing and the output will either be calibrated to some value between `[calibration_output_min, calibration_output_max]` or set to a fixed value set by missing_output_value. Limitation: it only works for scalars. Either one value for all inputs, or a dict mapping feature name to missing_input_value for the respective feature. missing_output_values: Requires missing_input_value also to be set. If set if will convert missing input to this value. Either one value for all inputs, or a dict mapping feature name to missing_input_value for the respective feature. dtype: If any of the scalars are not given as tensors, they are converted to tensors with this dtype. **regularizer_amounts: Keyword args of regularization amounts passed to regularizers.calibrator_regularization(). Keyword names should be among supported regularizers.CALIBRATOR_REGULARIZERS and values should be either float or {feature_name: float}. If float, then same value is applied to all features. Returns: A tuple of: * calibrated tensor of shape [batch_size, sum(features dimensions)]. * list of the feature names in the order they feature in the calibrated tensor. A name may appear more than once if the feature is multi-dimension (for instance a multi-dimension embedding) * list of projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonicity. Empty if none are requested. * None or tensor with regularization loss. Raises: ValueError: if dtypes are incompatible. """ with ops.name_scope('input_calibration_layer'): feature_names = tools.get_sorted_feature_names(columns_to_tensors, feature_columns) num_keypoints = tools.cast_to_dict(num_keypoints, feature_names, 'num_keypoints') bound = tools.cast_to_dict(bound, feature_names, 'bound') monotonic = tools.cast_to_dict(monotonic, feature_names, 'monotonic') keypoints_initializers = tools.cast_to_dict(keypoints_initializers, feature_names, 'keypoints_initializers') keypoints_initializer_fns = tools.cast_to_dict( keypoints_initializer_fns, feature_names, 'keypoints_initializer_fns') missing_input_values = tools.cast_to_dict(missing_input_values, feature_names, 'missing_input_values') missing_output_values = tools.cast_to_dict(missing_output_values, feature_names, 'missing_output_values') regularizer_amounts = { regularizer_name: tools.cast_to_dict(regularizer_amounts[regularizer_name], feature_names, regularizer_name) for regularizer_name in regularizer_amounts } per_dimension_feature_names = [] # Get uncalibrated tensors, either from columns_to_tensors, or using # feature_columns. if feature_columns is None: uncalibrated_features = [ columns_to_tensors[name] for name in feature_names ] else: transformed_columns_to_tensors = columns_to_tensors.copy() dict_feature_columns = { f_col.name: f_col for f_col in feature_columns } uncalibrated_features = [ tools.input_from_feature_column(transformed_columns_to_tensors, dict_feature_columns[name], dtype) for name in feature_names ] projection_ops = [] calibrated_splits = [] total_regularization = None for feature_idx in range(len(feature_names)): name = feature_names[feature_idx] uncalibrated_feature = uncalibrated_features[feature_idx] if uncalibrated_feature.shape.ndims == 1: feature_dim = 1 uncalibrated_splits = [uncalibrated_feature] elif uncalibrated_feature.shape.ndims == 2: feature_dim = uncalibrated_feature.shape.dims[1].value uncalibrated_splits = array_ops.unstack(uncalibrated_feature, axis=1) else: raise ValueError( 'feature {}: it has rank {}, but only ranks 1 or 2 are ' 'supported; feature shape={}'.format( name, uncalibrated_feature.shape.ndims, uncalibrated_feature.shape)) missing_input_value = missing_input_values[name] missing_output_value = missing_output_values[name] feature_regularizer_amounts = { regularizer_name: regularizer_amounts[regularizer_name][name] for regularizer_name in regularizer_amounts } # FutureWork: make the interpolation ops handle multi-dimension values, # so this step is not needed. for dim_idx in range(feature_dim): per_dimension_feature_names += [name] split_name = name if feature_dim > 1: split_name = '{}_dim_{}'.format(name, dim_idx) uncalibrated = uncalibrated_splits[dim_idx] if not num_keypoints[name]: # No calibration for this feature: calibrated_splits += [uncalibrated] if (missing_input_value is not None or missing_output_value is not None): raise ValueError( 'feature %s: cannot handle missing values if feature is not ' 'calibrated, missing_input_value=%s, missing_output_value=%s' % (name, missing_input_value, missing_output_value)) else: calibrated, projection, reg = one_dimensional_calibration_layer( uncalibrated, num_keypoints[name], signal_name=split_name, keypoints_initializers=keypoints_initializers[name], keypoints_initializer_fns=keypoints_initializer_fns[ name], bound=bound[name], monotonic=monotonic[name], missing_input_value=missing_input_value, missing_output_value=missing_output_value, **feature_regularizer_amounts) calibrated_splits += [calibrated] if projection is not None: projection_ops += [projection] total_regularization = tools.add_if_not_none( total_regularization, reg) all_calibrated = array_ops.stack(calibrated_splits, axis=1, name='stack_calibrated') return (all_calibrated, per_dimension_feature_names, projection_ops, total_regularization)
def _lattice_torsion(lattice_param, lattice_sizes, l1_reg=None, l2_reg=None, name='lattice_torsion'): """Returns a lattice torsion regularization. Torsion regularizers penalizes how much the lattice function twists from side-to-side, a non-linear interactions in each 2 x 2 cells. See Monotonic Calibrated Interpolated Look-Up Tables, JMLR, 2016 for the details, but we provide a 2d example in here. Consider a 3 x 2 lattice: 3-------4--------5 | | | | | | 0-------1--------2 where the number at each node represents the parameter index. In this case, the torsion l2 regularizer is defined as reg = l2_reg * ((param[4] + param[0] - param[3] - param[1]) ** 2 + (param[5] + param[1] - param[4] - param[2]) ** 2 where param is a lattice_param tensor assuming one output. In l1 case, the squared value is replaced with the absolte value. If num_outputs > 1, the op is total_reg = sum_{d=1}^{output_dim} reg(lattice_param[d, :]) i.e., a sum across all output dimensions. Args: lattice_param: (Rank-2 tensor with shape [num_outputs, num_parameters]) lattice model's parameter. lattice_sizes: (list of integers) lattice size of each dimension. l1_reg: (float) l1 regularization amount. l2_reg: (float) l2 regularization amount. name: name scope of lattice torsion regularizer. Returns: A rank-0 tensor (scalar) that contains regularizer or None if there is no regularization. This can happen if l1_reg and l2_reg amounts are not set. Raises: ValueError: * lattice_param is not rank-2 tensor. * output_dim or param_dim is unknown. """ dims = lattice_param.shape.as_list() if len(dims) != 2: raise ValueError( 'lattice_laplacian expects lattice_param as a ' 'rank-2 tensor but got dimensions: ', dims) output_dim = dims[0] param_dim = dims[1] lattice_rank = len(lattice_sizes) if output_dim is None or param_dim is None: raise ValueError( 'lattice_laplacian expects all the dimensions in ' 'lattice_param to be known, but got dimensions: ', dims) if l1_reg is None and l2_reg is None: return None regularization = None with tf.name_scope(name): for dim1 in range(lattice_rank - 1): slice_size1 = lattice_sizes[dim1] - 1 param_0x = tools.lattice_1d_slice( lattice_param, lattice_sizes=lattice_sizes, lattice_axis=dim1, begin=0, size=slice_size1) param_1x = tools.lattice_1d_slice( lattice_param, lattice_sizes=lattice_sizes, lattice_axis=dim1, begin=1, size=slice_size1) resized_lattice_sizes = copy.deepcopy(lattice_sizes) resized_lattice_sizes[dim1] -= 1 for dim2 in range(dim1 + 1, lattice_rank): slice_size2 = resized_lattice_sizes[dim2] - 1 param_00 = tools.lattice_1d_slice( param_0x, lattice_sizes=resized_lattice_sizes, lattice_axis=dim2, begin=0, size=slice_size2) param_01 = tools.lattice_1d_slice( param_0x, lattice_sizes=resized_lattice_sizes, lattice_axis=dim2, begin=1, size=slice_size2) param_10 = tools.lattice_1d_slice( param_1x, lattice_sizes=resized_lattice_sizes, lattice_axis=dim2, begin=0, size=slice_size2) param_11 = tools.lattice_1d_slice( param_1x, lattice_sizes=resized_lattice_sizes, lattice_axis=dim2, begin=1, size=slice_size2) torsion = param_00 + param_11 - param_01 - param_10 if l1_reg: regularization = tools.add_if_not_none( regularization, l1_reg * tf.reduce_sum(tf.abs(torsion))) if l2_reg: regularization = tools.add_if_not_none( regularization, l2_reg * tf.reduce_sum(tf.square(torsion))) return regularization
def _lattice_laplacian(lattice_param, lattice_sizes, l1_reg=None, l2_reg=None, name='lattice_laplacian'): """Returns a lattice laplacian regularization. Laplacian regularizers penalize the difference between adjacent vertices in multi-cell lattice. See Lattice Regression, NIPS, 2009 for the details, but we provide a 2d example in here. Consider a 3 x 2 lattice: 3-------4--------5 | | | | | | 0-------1--------2 where the number at each node represents the parameter index. In this case, the laplacian l1 regularizer is defined as reg = l1_reg[0] * (|param[1] - param[0]| + |param[2] - param[1]| + |param[4] - param[3]| + |param[5] - param[4]|) + l1_reg[1] * (|param[3] - param[0]| + |param[4] - param[1]| + |param[5] - param[2]}) where param is a lattice_param tensor assuming one output. In l2 case, the absolute value is replaced with a square. If num_outputs > 1, the op is total_reg = sum_{d=1}^{output_dim} reg(lattice_param[d, :]) i.e., a sum across all output dimensions. Args: lattice_param: (Rank-2 tensor with shape [num_outputs, num_parameters]) lattice model's parameter. lattice_sizes: (list of integers) lattice size of each dimension. l1_reg: (list of floats or float) l1 regularization amount per each lattice dimension. If float, a same number will be accrossed to all lattice dimensions. l2_reg: (list of floats or float) l2 regularization amount per each lattice dimension. If float, a same number will be accrossed to all lattice dimensions. name: name scope of lattice laplacian regularizer. Returns: A rank-0 tensor (scalar) that contains regularizer or None if there is no regularization. This can happen if l1_reg and l2_reg amounts are not set. Raises: ValueError: * lattice_param is not rank-2 tensor. * output_dim or param_dim is unknown. """ dims = lattice_param.shape.as_list() if len(dims) != 2: raise ValueError( 'lattice_laplacian expects lattice_param as a ' 'rank-2 tensor but got dimensions: ', dims) output_dim = dims[0] param_dim = dims[1] if output_dim is None or param_dim is None: raise ValueError( 'lattice_laplacian expects all the dimensions in ' 'lattice_param to be known, but got dimensions: ', dims) l1_reg = tools.cast_to_list(l1_reg, len(lattice_sizes), 'laplacian_l1_reg') l2_reg = tools.cast_to_list(l2_reg, len(lattice_sizes), 'laplacian_l2_reg') # Collect all dimensions that has non-trivial regularization amount. reg_dims = [] lattice_rank = len(lattice_sizes) for dim in range(lattice_rank): if l1_reg[dim] or l2_reg[dim]: reg_dims.append(dim) if not reg_dims: return None regularization = None with tf.name_scope(name): for dim in reg_dims: slice_size = lattice_sizes[dim] - 1 per_dim_upper = tools.lattice_1d_slice( lattice_param, lattice_sizes=lattice_sizes, lattice_axis=dim, begin=1, size=slice_size) per_dim_lower = tools.lattice_1d_slice( lattice_param, lattice_sizes=lattice_sizes, lattice_axis=dim, begin=0, size=slice_size) per_dim_diff = per_dim_upper - per_dim_lower if l1_reg[dim]: regularization = tools.add_if_not_none( regularization, l1_reg[dim] * tf.reduce_sum(tf.abs(per_dim_diff))) if l2_reg[dim]: regularization = tools.add_if_not_none( regularization, l2_reg[dim] * tf.reduce_sum(tf.square(per_dim_diff))) return regularization
def ensemble_lattices_layer(input_tensor, lattice_sizes, structure_indices, is_monotone=None, output_dim=1, interpolation_type='hypercube', lattice_initializers=None, l1_reg=None, l2_reg=None, l1_torsion_reg=None, l2_torsion_reg=None, l1_laplacian_reg=None, l2_laplacian_reg=None): """Creates a ensemble of lattices layer. Returns a list of output of lattices, lattice parameters, and projection ops. Args: input_tensor: [batch_size, input_dim] tensor. lattice_sizes: A list of lattice sizes of each dimension. structure_indices: A list of list of ints. structure_indices[k] is a list of indices that belongs to kth lattices. is_monotone: A list of input_dim booleans, boolean or None. If None or False, lattice will not have monotonicity constraints. If is_monotone[k] == True, then the lattice output has the non-decreasing monotonicity with respect to input_tensor[?, k] (the kth coordinate). If True, all the input coordinate will have the non-decreasing monotonicity. output_dim: Number of outputs. interpolation_type: 'hypercube' or 'simplex'. lattice_initializers: (Optional) A list of initializer for each lattice parameter vectors. lattice_initializer[k] is a 2D tensor [output_dim, parameter_dim[k]], where parameter_dim[k] is the number of parameter in the kth lattice. If None, lattice_param_as_linear initializer will be used with linear_weights=[1 if monotone else 0 for monotone in is_monotone]. l1_reg: (float) l1 regularization amount. l2_reg: (float) l2 regularization amount. l1_torsion_reg: (float) l1 torsion regularization amount. l2_torsion_reg: (float) l2 torsion regularization amount. l1_laplacian_reg: (list of floats or float) list of L1 Laplacian regularization amount per each dimension. If a single float value is provided, then all diemnsion will get the same value. l2_laplacian_reg: (list of floats or float) list of L2 Laplacian regularization amount per each dimension. If a single float value is provided, then all diemnsion will get the same value. Returns: A tuple of: * a list of output tensors, [batch_size, output_dim], with length len(structure_indices), i.e., one for each lattice. * a list of parameter tensors shape [output_dim, parameter_dim] * None or projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonicity. * None or a regularization loss, if regularization is configured. """ num_lattices = len(structure_indices) lattice_initializers = tools.cast_to_list(lattice_initializers, num_lattices, 'lattice initializers') if l1_laplacian_reg is not None: l1_laplacian_reg = tools.cast_to_list(l1_laplacian_reg, len(lattice_sizes), 'l1_laplacian_reg') if l2_laplacian_reg is not None: l2_laplacian_reg = tools.cast_to_list(l2_laplacian_reg, len(lattice_sizes), 'l2_laplacian_reg') # input_slices[k] = input_tensor[:, k]. input_slices = array_ops.unstack(input_tensor, axis=1) output_tensors = [] param_tensors = [] projections = [] regularization = None if is_monotone: is_monotone = tools.cast_to_list(is_monotone, len(lattice_sizes), 'is_monotone') # Now iterate through structure_indices to construct lattices. for (cnt, structure) in enumerate(structure_indices): with variable_scope.variable_scope('lattice_%d' % cnt): sub_lattice_sizes = [lattice_sizes[idx] for idx in structure] sub_is_monotone = None if is_monotone: sub_is_monotone = [is_monotone[idx] for idx in structure] sub_input_tensor_list = [input_slices[idx] for idx in structure] sub_input_tensor = array_ops.stack(sub_input_tensor_list, axis=1) if l1_laplacian_reg is not None: sub_l1_laplacian_reg = [ l1_laplacian_reg[idx] for idx in structure ] else: sub_l1_laplacian_reg = None if l2_laplacian_reg is not None: sub_l2_laplacian_reg = [ l2_laplacian_reg[idx] for idx in structure ] else: sub_l2_laplacian_reg = None packed_results = lattice_layer( sub_input_tensor, sub_lattice_sizes, sub_is_monotone, output_dim=output_dim, interpolation_type=interpolation_type, lattice_initializer=lattice_initializers[cnt], l1_reg=l1_reg, l2_reg=l2_reg, l1_torsion_reg=l1_torsion_reg, l2_torsion_reg=l2_torsion_reg, l1_laplacian_reg=sub_l1_laplacian_reg, l2_laplacian_reg=sub_l2_laplacian_reg) (sub_output, sub_param, sub_proj, sub_reg) = packed_results output_tensors.append(sub_output) param_tensors.append(sub_param) if sub_proj: projections += sub_proj regularization = tools.add_if_not_none(regularization, sub_reg) return (output_tensors, param_tensors, projections, regularization)
def model_fn(features, labels, mode, config): # pylint: disable=unused-argument """Creates the prediction, loss, and train ops. Args: features: A dictionary of tensors keyed by the feature name. labels: A tensor representing the label. mode: The execution mode, as defined in model_fn_lib.ModeKeys. config: Optional configuration object. Will receive what is passed to Estimator in `config` parameter, or the default `config`. Allows updating things in your model_fn based on configuration such as `num_ps_replicas`. Returns: ModelFnOps, with the predictions, loss, and train_op. Raises: ValueError: if incompatible parameters are given, or if the keypoints initializers given during construction return invalid number of keypoints. """ with variable_scope.variable_scope("/".join( [_SCOPE_CALIBRATED_TENSORFLOW_LATTICE, self._name])): if mode == model_fn_lib.ModeKeys.TRAIN: if (self._quantiles_dir is None and self._keypoints_initializers_fn is None): raise ValueError( "At least one of quantiles_dir or keypoints_initializers_fn " "must be given for training") # If keypoint_initializer closures were given, now it materializes # them, into the initializers tensors. kp_init_explicit = None if self._keypoints_initializers_fn is not None: kp_init_explicit = _call_keypoints_inializers_fn( self._keypoints_initializers_fn) # Add feature names to hparams so that builders can make use of them. add_feature_names_to_hparams(features, self._feature_columns, self._hparams) total_projection_ops = None total_regularization = None total_prediction = None # Get the ensemble structure. calibration_structure = self.calibration_structure_builder( features, self._feature_columns, self._hparams) if calibration_structure is None: # Single model or shared calibration. (calibrated, per_dimension_feature_names, calibration_projections, calibration_regularization) = ( input_calibration_layer_from_hparams( columns_to_tensors=features, feature_columns=self._feature_columns, hparams=self._hparams, quantiles_dir=self._quantiles_dir, keypoints_initializers=kp_init_explicit, name=_SCOPE_INPUT_CALIBRATION, dtype=self._dtype)) (total_prediction, prediction_projections, prediction_regularization) = self.prediction_builder( mode, per_dimension_feature_names, self._hparams, calibrated) total_projection_ops = tools.add_if_not_none( calibration_projections, prediction_projections) total_regularization = tools.add_if_not_none( calibration_regularization, prediction_regularization) else: # Ensemble model with separate calibration. predictions = [] for (index, (sub_columns_to_tensors, sub_feature_columns) ) in enumerate(calibration_structure): # Calibrate. with variable_scope.variable_scope( "submodel_{}".format(index)): (calibrated, per_dimension_feature_names, calibration_projections, calibration_regularization) = ( input_calibration_layer_from_hparams( columns_to_tensors=sub_columns_to_tensors, feature_columns=sub_feature_columns, hparams=self._hparams, quantiles_dir=self._quantiles_dir, keypoints_initializers=kp_init_explicit, name=_SCOPE_INPUT_CALIBRATION, dtype=self._dtype)) (prediction, prediction_projections, prediction_regularization ) = self.prediction_builder( mode, per_dimension_feature_names, self._hparams, calibrated) projection_ops = tools.add_if_not_none( calibration_projections, prediction_projections) regularization = tools.add_if_not_none( calibration_regularization, prediction_regularization) # Merge back the results. total_projection_ops = tools.add_if_not_none( total_projection_ops, projection_ops) total_regularization = tools.add_if_not_none( total_regularization, regularization) predictions.append(prediction) # Final prediction is a mean of predictions, plus a bias term. stacked_predictions = array_ops.stack( predictions, axis=0, name="stacked_predictions") ensemble_output = math_ops.reduce_mean(stacked_predictions, axis=0) ensemble_bias_init = self._hparams.get_param( "ensemble_bias") b = variables.Variable([ensemble_bias_init], name="ensemble_bias") total_prediction = ensemble_output + b def _train_op_fn(loss): """Returns train_op tensor if TRAIN mode, or None.""" train_op = None if mode == model_fn_lib.ModeKeys.TRAIN: if total_regularization is not None: loss += total_regularization optimizer = _get_optimizer(self._optimizer, self._hparams) train_op = optimizer.minimize( loss, global_step=training_util.get_global_step(), name=_SCOPE_TRAIN_OP) self._projection_hook.set_projection_ops( total_projection_ops) return train_op # Use head to generate model_fn outputs. estimator_spec = self._head.create_estimator_spec( features=features, labels=labels, mode=mode, train_op_fn=_train_op_fn, logits=total_prediction) # Update training hooks to include projection_hook in the training mode. if mode == model_fn_lib.ModeKeys.TRAIN: updated_training_hooks = (estimator_spec.training_hooks + (self._projection_hook, )) estimator_spec = estimator_spec._replace( training_hooks=updated_training_hooks) return estimator_spec
def ensemble_lattices_layer(input_tensor, lattice_sizes, structure_indices, is_monotone=None, output_dim=1, interpolation_type='hypercube', lattice_initializers=None, **regularizer_amounts): """Creates a ensemble of lattices layer. Returns a list of output of lattices, lattice parameters, and projection ops. Args: input_tensor: [batch_size, input_dim] tensor. lattice_sizes: A list of lattice sizes of each dimension. structure_indices: A list of list of ints. structure_indices[k] is a list of indices that belongs to kth lattices. is_monotone: A list of input_dim booleans, boolean or None. If None or False, lattice will not have monotonicity constraints. If is_monotone[k] == True, then the lattice output has the non-decreasing monotonicity with respect to input_tensor[?, k] (the kth coordinate). If True, all the input coordinate will have the non-decreasing monotonicity. output_dim: Number of outputs. interpolation_type: 'hypercube' or 'simplex'. lattice_initializers: (Optional) A list of initializer for each lattice parameter vectors. lattice_initializer[k] is a 2D tensor [output_dim, parameter_dim[k]], where parameter_dim[k] is the number of parameter in the kth lattice. If None, lattice_param_as_linear initializer will be used with linear_weights=[1 if monotone else 0 for monotone in is_monotone]. **regularizer_amounts: Keyword args of regularization amounts passed to regularizers.lattice_regularization(). Keyword names should be among regularizers.LATTICE_ONE_DIMENSIONAL_REGULARIZERS or regularizers.LATTICE_MULTI_DIMENSIONAL_REGULARIZERS. For multi-dimensional regularizers the value should be float. For one-dimensional regularizers the values should be float or list of floats. If a single float value is provided, then all dimensions will get the same value. Returns: A tuple of: * a list of output tensors, [batch_size, output_dim], with length len(structure_indices), i.e., one for each lattice. * a list of parameter tensors shape [output_dim, parameter_dim] * None or projection ops, that must be applied at each step (or every so many steps) to project the model to a feasible space: used for bounding the outputs or for imposing monotonicity. * None or a regularization loss, if regularization is configured. """ num_lattices = len(structure_indices) lattice_initializers = tools.cast_to_list(lattice_initializers, num_lattices, 'lattice initializers') one_dimensional_regularizers = \ regularizers.LATTICE_ONE_DIMENSIONAL_REGULARIZERS for regularizer_name in regularizer_amounts: if regularizer_name in one_dimensional_regularizers: regularizer_amounts[regularizer_name] = tools.cast_to_list( regularizer_amounts[regularizer_name], len(lattice_sizes), regularizer_name) # input_slices[k] = input_tensor[:, k]. input_slices = tf.unstack(input_tensor, axis=1) output_tensors = [] param_tensors = [] projections = [] regularization = None if is_monotone: is_monotone = tools.cast_to_list(is_monotone, len(lattice_sizes), 'is_monotone') # Now iterate through structure_indices to construct lattices. get_indices = lambda indices, iterable: [ iterable[index] for index in indices ] for (cnt, structure) in enumerate(structure_indices): with tf.compat.v1.variable_scope('lattice_%d' % cnt): sub = functools.partial(get_indices, structure) sub_lattice_sizes = sub(lattice_sizes) sub_is_monotone = None if is_monotone: sub_is_monotone = sub(is_monotone) sub_input_tensor_list = sub(input_slices) sub_input_tensor = tf.stack(sub_input_tensor_list, axis=1) sub_regularizer_amounts = {} for regularizer_name in regularizer_amounts: if regularizer_name in one_dimensional_regularizers: sub_regularizer_amounts[regularizer_name] = sub( regularizer_amounts[regularizer_name]) else: sub_regularizer_amounts[ regularizer_name] = regularizer_amounts[ regularizer_name] packed_results = lattice_layer( sub_input_tensor, sub_lattice_sizes, sub_is_monotone, output_dim=output_dim, interpolation_type=interpolation_type, lattice_initializer=lattice_initializers[cnt], **sub_regularizer_amounts) (sub_output, sub_param, sub_proj, sub_reg) = packed_results output_tensors.append(sub_output) param_tensors.append(sub_param) if sub_proj: projections += sub_proj regularization = tools.add_if_not_none(regularization, sub_reg) return (output_tensors, param_tensors, projections, regularization)
def prediction_builder(self, columns_to_tensors, mode, hparams, dtype): """Method that builds the prediction graph. Args: columns_to_tensors: A map from feature_name to raw features tensors, each with shape `[batch_size]` or `[batch_size, feature_dim]`. mode: Estimator's `ModeKeys`. hparams: hyperparameters object passed to prediction builder. This is not used by the Base estimator itself and is passed without checks or any processing and can be of any type. dtype: The dtype to be used for tensors. Returns: A tuple of (prediction_tensor, oprojection_ops, regularization_loss) of type (tf.Tensor, list[], tf.Tensor): prediction_tensor: shaped `[batch_size/?,1]` for regression or binary classification, or `[batch_size, n_classes]` for multi-class classifiers. For classifier this will be the logit(s) value(s). projection_ops: list of projection ops to be applied after each batch, or None. regularization_loss: loss related to regularization or None. Raises: ValueError: invalid parameters. """ if (mode == model_fn_lib.ModeKeys.TRAIN and self._quantiles_dir is None and self._keypoints_initializers_fn is None): raise ValueError( "At least one of quantiles_dir or keypoints_initializers_fn " "must be given for training") # If keypoint_initializer closures were given, call them to create the # initializers tensors. kp_init_explicit = None if self._keypoints_initializers_fn is not None: kp_init_explicit = _call_keypoints_inializers_fn( self._keypoints_initializers_fn) # Add feature names to hparams so that builders can make use of them. for feature_name in columns_to_tensors: self._hparams.add_feature(feature_name) total_projection_ops = None total_regularization = None total_prediction = None # Get the ensemble structure. calibration_structure = self.calibration_structure_builder( columns_to_tensors, self._hparams) if calibration_structure is None: # Single model or shared calibration. (calibrated, per_dimension_feature_names, calibration_projections, calibration_regularization) = ( input_calibration_layer_from_hparams( columns_to_tensors=columns_to_tensors, hparams=self._hparams, quantiles_dir=self._quantiles_dir, keypoints_initializers=kp_init_explicit, name=_SCOPE_INPUT_CALIBRATION, dtype=self._dtype)) (total_prediction, prediction_projections, prediction_regularization ) = self.prediction_builder_from_calibrated( mode, per_dimension_feature_names, self._hparams, calibrated) total_projection_ops = tools.add_if_not_none( calibration_projections, prediction_projections) total_regularization = tools.add_if_not_none( calibration_regularization, prediction_regularization) else: # Ensemble model with separate calibration. predictions = [] for (index, sub_columns_to_tensors) in enumerate(calibration_structure): # Calibrate. with variable_scope.variable_scope( "submodel_{}".format(index)): (calibrated, per_dimension_feature_names, calibration_projections, calibration_regularization) = ( input_calibration_layer_from_hparams( columns_to_tensors=sub_columns_to_tensors, hparams=self._hparams, quantiles_dir=self._quantiles_dir, keypoints_initializers=kp_init_explicit, name=_SCOPE_INPUT_CALIBRATION, dtype=self._dtype)) (prediction, prediction_projections, prediction_regularization ) = self.prediction_builder_from_calibrated( mode, per_dimension_feature_names, self._hparams, calibrated) projection_ops = tools.add_if_not_none( calibration_projections, prediction_projections) regularization = tools.add_if_not_none( calibration_regularization, prediction_regularization) # Merge back the results. total_projection_ops = tools.add_if_not_none( total_projection_ops, projection_ops) total_regularization = tools.add_if_not_none( total_regularization, regularization) predictions.append(prediction) # Final prediction is a mean of predictions, plus a bias term. stacked_predictions = array_ops.stack(predictions, axis=0, name="stacked_predictions") ensemble_output = math_ops.reduce_mean(stacked_predictions, axis=0) ensemble_bias_init = self._hparams.get_param("ensemble_bias") bias = variables.Variable([ensemble_bias_init], name="ensemble_bias") total_prediction = ensemble_output + bias return total_prediction, total_projection_ops, total_regularization
def model_fn(features, labels, mode, config): # pylint: disable=unused-argument """Creates the prediction, loss, and train ops. Args: features: A dictionary of tensors keyed by the feature name. labels: A tensor representing the label. mode: The execution mode, as defined in model_fn_lib.ModeKeys. config: Optional configuration object. Will receive what is passed to Estimator in `config` parameter, or the default `config`. Allows updating things in your model_fn based on configuration such as `num_ps_replicas`. Returns: ModelFnOps, with the predictions, loss, and train_op. Raises: ValueError: if incompatible parameters are given, or if the keypoints initializers given during construction return invalid number of keypoints. """ with variable_scope.variable_scope("/".join( [_SCOPE_CALIBRATED_TENSORFLOW_LATTICE, self._name])): if mode == model_fn_lib.ModeKeys.TRAIN: if (self._quantiles_dir is None and self._keypoints_initializers_fn is None): raise ValueError( "At least one of quantiles_dir or keypoints_initializers_fn " "must be given for training") # If keypoint_initializer closures were given, now it materializes # them, into the initializers tensors. kp_init_explicit = None if self._keypoints_initializers_fn is not None: kp_init_explicit = _call_keypoints_inializers_fn( self._keypoints_initializers_fn) # Calibrate. (calibrated, per_dimension_feature_names, projection_ops, regularization) = (input_calibration_layer_from_hparams( features, feature_columns=self._feature_columns, hparams=self._hparams, quantiles_dir=self._quantiles_dir, keypoints_initializers=kp_init_explicit, name=_SCOPE_INPUT_CALIBRATION, dtype=self._dtype)) (prediction, prediction_projections, prediction_regularization) = self.prediction_builder( mode, per_dimension_feature_names, self._hparams, calibrated) projection_ops = tools.add_if_not_none(projection_ops, prediction_projections) regularization = tools.add_if_not_none( regularization, prediction_regularization) def _train_op_fn(loss): """Returns train_op tensor if TRAIN mode, or None.""" train_op = None if mode == model_fn_lib.ModeKeys.TRAIN: if regularization is not None: loss += regularization optimizer = _get_optimizer(self._optimizer, self._hparams) train_op = optimizer.minimize( loss, global_step=training_util.get_global_step(), name=_SCOPE_TRAIN_OP) self._projection_hook.set_projection_ops( projection_ops) return train_op # Use head to generate model_fn outputs. estimator_spec = self._head.create_estimator_spec( features=features, labels=labels, mode=mode, train_op_fn=_train_op_fn, logits=prediction) # Update training hooks to include projection_hook in the training mode. if mode == model_fn_lib.ModeKeys.TRAIN: updated_training_hooks = (estimator_spec.training_hooks + (self._projection_hook, )) estimator_spec = estimator_spec._replace( training_hooks=updated_training_hooks) return estimator_spec