Esempio n. 1
0
def pull_distribution_scale(config, data_handler, data_transformer,
                            shared_objects, *args, **kwargs):
    """This loss penalized the standard deviation of the pull distribution.

    This is meant to run for a few steps with very high batch size at the very
    end of the training procedure of a model in order to correct the scale
    of the uncertainty estimates such that the pull distribution has a
    standard deviation of 1.

    Parameters
    ----------
    config : dict
        Dictionary containing all settings as read in from config file.
    data_handler : :obj: of class DataHandler
        An instance of the DataHandler class. The object is used to obtain
        meta data.
    data_transformer : :obj: of class DataTransformer
        An instance of the DataTransformer class. The object is used to
        transform data.
    shared_objects : dict
        A dictionary containg settings and objects that are shared and passed
        on to sub modules.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    tf.Tensor
        A tensorflow tensor containing the loss for each label.
        Shape: label_shape (same shape as labels)

    """
    if 'event_weights' in shared_objects:
        misc.print_warning("Event weights will be ignored for loss function "
                           "'pull_distribution_scale'")

    y_diff_trafo = tf.stop_gradient(loss_utils.get_y_diff_trafo(
                                    config=config,
                                    data_handler=data_handler,
                                    data_transformer=data_transformer,
                                    shared_objects=shared_objects))

    # small float to prevent division by zero
    eps = 1e-6

    # uncertainty estimate on prediction
    unc = tf.clip_by_value(shared_objects['y_unc_trafo'], eps, float('inf'))

    pull = y_diff_trafo / unc

    # get variance
    mean, var = tf.nn.moments(x=pull, axes=[0])

    loss = (var - 1.)**2

    loss_utils.add_logging_info(data_handler, shared_objects)

    return loss
Esempio n. 2
0
def tukey(config, data_handler, data_transformer, shared_objects,
          *args, **kwargs):
    """Tukey loss of transformed prediction and true values.
    A robust loss measure that is equivalent to MSE for small residuals, but
    has constant loss for very large residuals. This reduces the effect of
    outliers.

    From Paper: 'Robust Optimization for Deep Regression'

    Parameters
    ----------
    config : dict
        Dictionary containing all settings as read in from config file.
    data_handler : :obj: of class DataHandler
        An instance of the DataHandler class. The object is used to obtain
        meta data.
    data_transformer : :obj: of class DataTransformer
        An instance of the DataTransformer class. The object is used to
        transform data.
    shared_objects : dict
        A dictionary containg settings and objects that are shared and passed
        on to sub modules.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    tf.Tensor
        A tensorflow tensor containing the loss for each label.
        Shape: label_shape (same shape as labels)

    """
    y_diff_trafo = loss_utils.get_y_diff_trafo(
                                    config=config,
                                    data_handler=data_handler,
                                    data_transformer=data_transformer,
                                    shared_objects=shared_objects)

    y_diff_trafo_scaled = \
        y_diff_trafo / (1.4826 * shared_objects['median_abs_dev'])

    c = 4.6851
    loss_event = tf.where(
        tf.less(tf.abs(y_diff_trafo_scaled), c),
        (c**2/6) * (1 - (1 - (y_diff_trafo_scaled/c)**2)**3),
        tf.zeros_like(y_diff_trafo_scaled) + (c**2/6),
        name='tukey_loss')

    if 'event_weights' in shared_objects:
        weights = shared_objects['event_weights']
        weight_sum = tf.reduce_sum(input_tensor=weights, axis=0)
        tukey_loss = tf.reduce_sum(input_tensor=loss_event * weights, axis=0) / weight_sum
    else:
        tukey_loss = tf.reduce_mean(input_tensor=loss_event, axis=0)

    loss_utils.add_logging_info(data_handler, shared_objects)

    return tukey_loss
Esempio n. 3
0
def mse(config, data_handler, data_transformer, shared_objects,
        *args, **kwargs):
    """Mean squared error of transformed prediction and true values.

    Parameters
    ----------
    config : dict
        Dictionary containing all settings as read in from config file.
    data_handler : :obj: of class DataHandler
        An instance of the DataHandler class. The object is used to obtain
        meta data.
    data_transformer : :obj: of class DataTransformer
        An instance of the DataTransformer class. The object is used to
        transform data.
    shared_objects : dict
        A dictionary containg settings and objects that are shared and passed
        on to sub modules.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    tf.Tensor
        A tensorflow tensor containing the loss for each label.
        Shape: label_shape (same shape as labels)

    """

    y_diff_trafo = loss_utils.get_y_diff_trafo(
                                    config=config,
                                    data_handler=data_handler,
                                    data_transformer=data_transformer,
                                    shared_objects=shared_objects)

    loss_event = tf.square(y_diff_trafo)
    unc_diff = shared_objects['y_unc_trafo'] - \
        tf.stop_gradient(tf.abs(y_diff_trafo))

    if 'event_weights' in shared_objects:
        weights = shared_objects['event_weights']
        weight_sum = tf.reduce_sum(input_tensor=weights, axis=0)
        mse_values_trafo = tf.reduce_sum(input_tensor=loss_event * weights, axis=0) / weight_sum
        mse_unc_values_trafo = tf.reduce_sum(input_tensor=unc_diff**2 * weights, axis=0) / \
            weight_sum
    else:
        mse_values_trafo = tf.reduce_mean(input_tensor=loss_event, axis=0)
        mse_unc_values_trafo = tf.reduce_mean(input_tensor=unc_diff**2, axis=0)

    loss_utils.add_logging_info(data_handler, shared_objects)

    return mse_values_trafo + mse_unc_values_trafo
Esempio n. 4
0
def weighted_mse(config, data_handler, data_transformer, shared_objects,
                 *args, **kwargs):
    """Weighted mean squared error of transformed prediction and true values.

    The MSE is weighted by the per event uncertainty estimate.

    Parameters
    ----------
    config : dict
        Dictionary containing all settings as read in from config file.
    data_handler : :obj: of class DataHandler
        An instance of the DataHandler class. The object is used to obtain
        meta data.
    data_transformer : :obj: of class DataTransformer
        An instance of the DataTransformer class. The object is used to
        transform data.
    shared_objects : dict
        A dictionary containg settings and objects that are shared and passed
        on to sub modules.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    tf.Tensor
        A tensorflow tensor containing the loss for each label.
        Shape: label_shape (same shape as labels)

    """

    y_diff_trafo = loss_utils.get_y_diff_trafo(
                                    config=config,
                                    data_handler=data_handler,
                                    data_transformer=data_transformer,
                                    shared_objects=shared_objects)

    unc_trafo = tf.stop_gradient(shared_objects['y_unc_trafo'])
    unc_trafo = tf.clip_by_value(unc_trafo, 1e-3, float('inf'))

    loss_event = tf.square(y_diff_trafo / unc_trafo)

    if 'event_weights' in shared_objects:
        weights = shared_objects['event_weights']
        mse_values_trafo = tf.reduce_sum(input_tensor=loss_event * weights, axis=0) / \
            tf.reduce_sum(input_tensor=weights, axis=0)
    else:
        mse_values_trafo = tf.reduce_mean(input_tensor=loss_event, axis=0)

    loss_utils.add_logging_info(data_handler, shared_objects)

    return mse_values_trafo
Esempio n. 5
0
def gaussian_likelihood(config, data_handler, data_transformer, shared_objects,
                        *args, **kwargs):
    """Gaussian likelhood of transformed prediction and true values.

    Parameters
    ----------
    config : dict
        Dictionary containing all settings as read in from config file.
    data_handler : :obj: of class DataHandler
        An instance of the DataHandler class. The object is used to obtain
        meta data.
    data_transformer : :obj: of class DataTransformer
        An instance of the DataTransformer class. The object is used to
        transform data.
    shared_objects : dict
        A dictionary containg settings and objects that are shared and passed
        on to sub modules.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    tf.Tensor
        A tensorflow tensor containing the loss for each label.
        Shape: label_shape (same shape as labels)

    """
    y_diff_trafo = loss_utils.get_y_diff_trafo(
                                    config=config,
                                    data_handler=data_handler,
                                    data_transformer=data_transformer,
                                    shared_objects=shared_objects)

    # small float to prevent division by zero
    eps = 1e-6

    # uncertainty estimate on prediction
    unc = tf.clip_by_value(shared_objects['y_unc_trafo'], eps, float('inf'))

    loss_event = 2*tf.math.log(unc) + (y_diff_trafo / unc)**2

    if 'event_weights' in shared_objects:
        weights = shared_objects['event_weights']
        weight_sum = tf.reduce_sum(input_tensor=weights, axis=0)
        loss = tf.reduce_sum(input_tensor=loss_event * weights, axis=0) / weight_sum
    else:
        loss = tf.reduce_mean(input_tensor=loss_event, axis=0)

    loss_utils.add_logging_info(data_handler, shared_objects)

    return loss
Esempio n. 6
0
def opening_angle_raleigh(config, data_handler, data_transformer,
                          shared_objects, *args, **kwargs):
    """Raleigh loss of opening angle between true and predicted angle.

    This loss only applies to label_azimuth_key and label_zenith_key!

    Parameters
    ----------
    config : dict
        Dictionary containing all settings as read in from config file.
    data_handler : :obj: of class DataHandler
        An instance of the DataHandler class. The object is used to obtain
        meta data.
    data_transformer : :obj: of class DataTransformer
        An instance of the DataTransformer class. The object is used to
        transform data.
    shared_objects : dict
        A dictionary containg settings and objects that are shared and passed
        on to sub modules.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    tf.Tensor
        A tensorflow tensor containing the loss for each label.
        Shape: label_shape (same shape as labels)

    """

    index_azimuth = data_handler.get_label_index(config['label_azimuth_key'])
    index_zenith = data_handler.get_label_index(config['label_zenith_key'])

    azimuth_true = shared_objects['y_true'][:, index_azimuth]
    zenith_true = shared_objects['y_true'][:, index_zenith]

    azimuth_pred = shared_objects['y_pred'][:, index_azimuth]
    zenith_pred = shared_objects['y_pred'][:, index_zenith]

    azimuth_unc = shared_objects['y_unc'][:, index_azimuth]
    zenith_unc = shared_objects['y_unc'][:, index_zenith]

    angle = angle_utils.tf_get_angle_deviation(azimuth1=azimuth_true,
                                               zenith1=zenith_true,
                                               azimuth2=azimuth_pred,
                                               zenith2=zenith_pred)

    # use zenith true here, even though it will hae to be predicted value
    sigma = tf.sqrt(zenith_unc**2 + azimuth_unc**2 * tf.sin(zenith_true)**2)
    sigma /= np.sqrt(2)

    # small float to prevent division by zero
    eps = 1e-6
    sigma = tf.clip_by_value(sigma, eps, float('inf'))

    raleigh = (angle / sigma)**2 + 4*tf.math.log(sigma) - 2*tf.math.log(angle)

    if 'event_weights' in shared_objects:
        weights = shared_objects['event_weights']
        weight_sum = tf.reduce_sum(input_tensor=weights, axis=0)
        loss_angle = tf.reduce_sum(input_tensor=angle * weights, axis=0) / weight_sum
        raleigh_loss = tf.reduce_sum(input_tensor=raleigh * weights, axis=0) / weight_sum
    else:
        loss_angle = tf.reduce_mean(input_tensor=angle, axis=0)
        raleigh_loss = tf.reduce_mean(input_tensor=raleigh, axis=0)

    loss = loss_angle + raleigh_loss
    zeros = tf.zeros_like(loss)

    loss_all_list = []
    for label in data_handler.label_names:
        if label in [config['label_azimuth_key'], config['label_zenith_key']]:
            loss_all_list.append(loss)
        else:
            loss_all_list.append(zeros)

    loss_all = tf.stack(loss_all_list, axis=0)

    loss_utils.add_logging_info(data_handler, shared_objects)

    return loss_all
Esempio n. 7
0
def mse_and_weighted_cross_entropy(config, data_handler, data_transformer,
                                   shared_objects, *args, **kwargs):
    """Mean squared error of transformed prediction and true values.
    Weighted cross entroy loss will be applied to labels for which logit
    tensors are defined in shared_objects[logit_tensors].
    These logit tensors must be added to the shared_objects during building
    of the NN model. This is necessary since using the logits directly is
    more numerically stable than reverting the sigmoid function on the
    output of the model.

    Events will be weighted according to how many signal events
    [pid value == 1] have a lower classification score. This aims to ignore
    abundant background events in a highly imbalanced classification task.
    Generally we only care about correctly modeling the signal region near 1.
    Hence, here we downweight the background region which is defined to be
    at smaller classification scores than the signal events.

    Parameters
    ----------
    config : dict
        Dictionary containing all settings as read in from config file.
    data_handler : :obj: of class DataHandler
        An instance of the DataHandler class. The object is used to obtain
        meta data.
    data_transformer : :obj: of class DataTransformer
        An instance of the DataTransformer class. The object is used to
        transform data.
    shared_objects : dict
        A dictionary containg settings and objects that are shared and passed
        on to sub modules.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    tf.Tensor
        A tensorflow tensor containing the loss for each label.
        Shape: label_shape (same shape as labels)

    """

    y_diff_trafo = loss_utils.get_y_diff_trafo(
                                    config=config,
                                    data_handler=data_handler,
                                    data_transformer=data_transformer,
                                    shared_objects=shared_objects)

    loss_event = tf.square(y_diff_trafo)

    if 'event_weights' in shared_objects:
        weights = shared_objects['event_weights']
        weight_sum = tf.reduce_sum(input_tensor=weights, axis=0)
        mse_values_trafo = tf.reduce_sum(input_tensor=loss_event * weights, axis=0) / weight_sum
    else:
        mse_values_trafo = tf.reduce_mean(input_tensor=loss_event, axis=0)
        weights = tf.expand_dims(tf.ones_like(loss_event[:, 0]), axis=-1)

    logit_tensors = shared_objects['logit_tensors']

    label_loss = []
    for i, name in enumerate(data_handler.label_names):

        # sanity check for correct ordering of labels
        index = data_handler.get_label_index(name)
        assert i == index, '{!r} != {!r}'.format(i, index)

        # calculate average precision if logits are provided
        if name in logit_tensors:

            labels_i = shared_objects['y_true'][:, i]
            predictions_i = logit_tensors[name]
            loss_i = tf.nn.sigmoid_cross_entropy_with_logits(
                                        labels=labels_i,
                                        logits=predictions_i)

            # ------------------------------
            # compute weights for each event
            # ------------------------------
            # Here we assume that background class has value 0 and signal 1
            signal_weights = tf.where(labels_i > 0.5,
                                      weights[:, 0],
                                      tf.zeros_like(weights[:, 0]))

            # sort events according to the classification score
            sorted_indices = tf.argsort(predictions_i)
            signal_weights_sorted = tf.gather(signal_weights, sorted_indices)
            weights_sorted = tf.gather(weights[:, 0], sorted_indices)
            loss_i_sorted = tf.gather(loss_i, sorted_indices)

            loss_weight = tf.stop_gradient(tf.cumsum(signal_weights_sorted))

            # and now multiply event weight on top
            loss_weight *= weights_sorted

            eps = 1e-6
            label_loss.append(
                    tf.reduce_sum(input_tensor=loss_i_sorted * loss_weight, axis=0) /
                    (tf.reduce_sum(input_tensor=loss_weight) + eps))
            # ------------------------------
        else:
            label_loss.append(mse_values_trafo[i])

    label_loss = tf.stack(label_loss)

    loss_utils.add_logging_info(data_handler, shared_objects)

    return label_loss
Esempio n. 8
0
def mse_and_cross_entropy(config, data_handler, data_transformer,
                          shared_objects, *args, **kwargs):
    """Mean squared error of transformed prediction and true values.
    Cross entropy loss will be applied to labels for which logit tensors
    are defined in shared_objects[logit_tensors]. These logit tensors must be
    added to the shared_objects during building of the NN model.
    This is necessary since using the logits directly is more numerically
    stable than reverting the sigmoid function on the output of the model.

    Parameters
    ----------
    config : dict
        Dictionary containing all settings as read in from config file.
    data_handler : :obj: of class DataHandler
        An instance of the DataHandler class. The object is used to obtain
        meta data.
    data_transformer : :obj: of class DataTransformer
        An instance of the DataTransformer class. The object is used to
        transform data.
    shared_objects : dict
        A dictionary containg settings and objects that are shared and passed
        on to sub modules.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    tf.Tensor
        A tensorflow tensor containing the loss for each label.
        Shape: label_shape (same shape as labels)

    """

    y_diff_trafo = loss_utils.get_y_diff_trafo(
                                    config=config,
                                    data_handler=data_handler,
                                    data_transformer=data_transformer,
                                    shared_objects=shared_objects)

    loss_event = tf.square(y_diff_trafo)

    if 'event_weights' in shared_objects:
        weights = shared_objects['event_weights']
        weight_sum = tf.reduce_sum(input_tensor=weights, axis=0)
        mse_values_trafo = tf.reduce_sum(input_tensor=loss_event * weights, axis=0) / weight_sum
    else:
        mse_values_trafo = tf.reduce_mean(input_tensor=loss_event, axis=0)

    logit_tensors = shared_objects['logit_tensors']

    label_loss = []
    for i, name in enumerate(data_handler.label_names):

        # sanity check for correct ordering of labels
        index = data_handler.get_label_index(name)
        assert i == index, '{!r} != {!r}'.format(i, index)

        # apply cross entropy if logits are provided
        if name in logit_tensors:
            loss_i = tf.nn.sigmoid_cross_entropy_with_logits(
                                        labels=shared_objects['y_true'][:, i],
                                        logits=logit_tensors[name])
            if 'event_weights' in shared_objects:
                label_loss.append(
                    tf.reduce_sum(input_tensor=loss_i * weights[:, 0], axis=0) / weight_sum[0])
            else:
                label_loss.append(tf.reduce_mean(input_tensor=loss_i))
        else:
            label_loss.append(mse_values_trafo[i])

    label_loss = tf.stack(label_loss)

    loss_utils.add_logging_info(data_handler, shared_objects)

    return label_loss
Esempio n. 9
0
def track_pos_mse(config, data_handler, data_transformer, shared_objects,
                  *args, **kwargs):
    """The MSE of the 4-vector distance of the predicted vertex (x, y, z, t)
    and the infinite track given by the true direction.

    The label is set up such that all points on the infinite track are correct
    predictions. This loss only applies to vertex (x, y, z, t) via the labels
    'pos_x', 'pos_y', 'pos_z', 'time as defined in the label_particle_keys.

    Parameters
    ----------
    config : dict
        Dictionary containing all settings as read in from config file.
    data_handler : :obj: of class DataHandler
        An instance of the DataHandler class. The object is used to obtain
        meta data.
    data_transformer : :obj: of class DataTransformer
        An instance of the DataTransformer class. The object is used to
        transform data.
    shared_objects : dict
        A dictionary containg settings and objects that are shared and passed
        on to sub modules.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    tf.Tensor
        A tensorflow tensor containing the loss for each label.
        Shape: label_shape (same shape as labels)

    """
    index_dir_x = data_handler.get_label_index(config['label_dir_x_key'])
    index_dir_y = data_handler.get_label_index(config['label_dir_y_key'])
    index_dir_z = data_handler.get_label_index(config['label_dir_z_key'])

    index_pos_x = data_handler.get_label_index(
        config['label_particle_keys']['pos_x'])
    index_pos_y = data_handler.get_label_index(
        config['label_particle_keys']['pos_y'])
    index_pos_z = data_handler.get_label_index(
        config['label_particle_keys']['pos_z'])
    index_time = data_handler.get_label_index(
        config['label_particle_keys']['time'])

    dir_x_true = shared_objects['y_true'][:, index_dir_x]
    dir_y_true = shared_objects['y_true'][:, index_dir_y]
    dir_z_true = shared_objects['y_true'][:, index_dir_z]

    x_true = shared_objects['y_true'][:, index_pos_x]
    y_true = shared_objects['y_true'][:, index_pos_y]
    z_true = shared_objects['y_true'][:, index_pos_z]
    time_true = shared_objects['y_true'][:, index_time]

    x_pred = shared_objects['y_pred'][:, index_pos_x]
    y_pred = shared_objects['y_pred'][:, index_pos_y]
    z_pred = shared_objects['y_pred'][:, index_pos_z]
    time_pred = shared_objects['y_pred'][:, index_time]

    x_unc = shared_objects['y_unc'][:, index_pos_x]
    y_unc = shared_objects['y_unc'][:, index_pos_y]
    z_unc = shared_objects['y_unc'][:, index_pos_z]
    time_unc = shared_objects['y_unc'][:, index_time]

    # x: predicted point, p: true point on track, d: true unit direction vector
    # calculate a = x - p
    a1 = x_pred - x_true
    a2 = y_pred - y_true
    a3 = z_pred - z_true

    # scalar product s = a*d, s is distance to closest point on infinite track
    s = a1*dir_x_true + a2*dir_y_true + a3*dir_z_true

    # caculate r = s*d -a = (p + s*d) - x
    r1 = s*dir_x_true - a1
    r2 = s*dir_y_true - a2
    r3 = s*dir_z_true - a3

    # calculate time diff [meter] at closest approach point on infinite track
    c = 0.299792458  # in m /ns
    rt = (time_true + (s / c) - time_pred) * c

    unc_diff_x = tf.stop_gradient(r1) - x_unc
    unc_diff_y = tf.stop_gradient(r2) - y_unc
    unc_diff_z = tf.stop_gradient(r3) - z_unc
    unc_diff_t = tf.stop_gradient(rt) - time_unc

    if 'event_weights' in shared_objects:
        weights = shared_objects['event_weights']
        w_sum = tf.reduce_sum(input_tensor=weights, axis=0)
        loss_x = tf.reduce_sum(input_tensor=(r1**2 + unc_diff_x**2) * weights, axis=0) / w_sum
        loss_y = tf.reduce_sum(input_tensor=(r2**2 + unc_diff_y**2) * weights, axis=0) / w_sum
        loss_z = tf.reduce_sum(input_tensor=(r3**2 + unc_diff_z**2) * weights, axis=0) / w_sum
        loss_t = tf.reduce_sum(input_tensor=(rt**2 + unc_diff_t**2) * weights, axis=0) / w_sum
    else:
        loss_x = tf.reduce_mean(input_tensor=r1**2 + unc_diff_x**2, axis=0)
        loss_y = tf.reduce_mean(input_tensor=r2**2 + unc_diff_y**2, axis=0)
        loss_z = tf.reduce_mean(input_tensor=r3**2 + unc_diff_z**2, axis=0)
        loss_t = tf.reduce_mean(input_tensor=rt**2 + unc_diff_t**2, axis=0)

    zeros = tf.zeros_like(loss_x)

    loss_all_list = []
    for label in data_handler.label_names:

        if label == config['label_particle_keys']['pos_x']:
            loss_all_list.append(loss_x)

        elif label == config['label_particle_keys']['pos_y']:
            loss_all_list.append(loss_y)

        elif label == config['label_particle_keys']['pos_z']:
            loss_all_list.append(loss_z)

        elif label == config['label_particle_keys']['time']:
            loss_all_list.append(loss_t)

        else:
            loss_all_list.append(zeros)

    loss_all = tf.stack(loss_all_list, axis=0)

    loss_utils.add_logging_info(data_handler, shared_objects)

    return loss_all