Ejemplo n.º 1
0
def rmse(y_true, y_pred):
    '''

    Root mean squared error.

    Parameters
    ----------
    y_true: ndarray
        Ground truth
    y_pred: ndarray
        Array of predictions

    Returns
    -------
    rmsle: float
        Root mean squared error

    References
    ----------
    .. [1] https://www.kaggle.com/wiki/RootMeanSquaredError

    '''

    # Check shapes
    y_true, y_pred = align_shape(y_true, y_pred)
    check_shapes(y_true, y_pred)

    return np.sqrt(((y_true - y_pred)**2).mean())
Ejemplo n.º 2
0
def rmsle(y_true, y_pred):
    '''

    Root mean squared logarithmic error.

    Parameters
    ----------
    y_true: ndarray
        Ground truth
    y_pred: ndarray
        Array of predictions

    Returns
    -------
    rmsle: float
        Root mean squared logarithmic error

    References
    ----------
    .. [1] https://www.kaggle.com/wiki/RootMeanSquaredLogarithmicError
    .. [2] https://www.slideshare.net/KhorSoonHin/rmsle-cost-function

    '''

    # Check shapes
    y_true, y_pred = align_shape(y_true, y_pred)
    check_shapes(y_true, y_pred)

    return np.sqrt(((np.log(y_pred + 1) - np.log(y_true + 1))**2).mean())
Ejemplo n.º 3
0
def wmae(y_true, y_pred, weights):
    '''

    Weighted mean absolute error.

    Parameters
    ----------
    y_true: ndarray
        Ground truth
    y_pred: ndarray
        Array of predictions

    Returns
    -------
    rmsle: float
        Weighted mean absolute error

    References
    ----------
    .. [1] https://www.kaggle.com/wiki/WeightedMeanAbsoluteError

    '''

    # Check shapes
    y_true, y_pred = align_shape(y_true, y_pred)
    check_shapes(y_true, y_pred)

    return (weights * np.abs(y_true - y_pred)).mean()
Ejemplo n.º 4
0
def log_loss(y_true, y_pred):
    '''

    Logarithmic loss

    Parameters
    ----------
    y_true: numpy.ndarray
        Targets
    y_pred: numpy.ndarray
        Class probability

    Returns
    -------
    score: float
        Logarithmic loss score


    References
    ----------
    .. [1] https://www.kaggle.com/wiki/LogLoss
    .. [2] http://www.exegetic.biz/blog/2015/12/making-sense-logarithmic-loss/
    .. [3] http://wiki.fast.ai/index.php/Log_Loss

    '''

    # Check shapes
    check_shapes(y_true, y_pred)
    y_true, y_pred = align_shape(y_true, y_pred)

    # Checking values
    if not (y_pred > 0).all():
        raise ValueError("Prediction array contains zeroes!")

    return -(y_true * np.log(y_pred)).sum(axis=1).mean()
Ejemplo n.º 5
0
def mean_average_precision(y_true, y_pred):

    # TODO: definition of query!!!

    '''

    Mean average precision

    Parameters
     ----------
     y_true: numpy.ndarray
        Targets
    y_pred: numpy.ndarray
        Class predictions (0 or 1 values only)

    Returns
    ------
    score: float
        Mean average precision score

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Information_retrieval#Mean_average_precision

    '''

    # Check shapes
    check_shapes(y_true, y_pred)
    y_true, y_pred = align_shape(y_true, y_pred)
Ejemplo n.º 6
0
def mcc(y_true, y_pred):
    '''

    Matthews Correlation Coefficient

    Parameters
     ----------
     y_true: numpy.ndarray
        Targets
    y_pred: numpy.ndarray
        Class predictions (0 or 1 values only)

    Returns
    ------
    score: float
        Matthews Correlation Coefficient score

    References
    ----------
    .. [1] https://lettier.github.io/posts/2016-08-05-matthews-correlation-coefficient.html
    .. [2] https://en.wikipedia.org/wiki/Matthews_correlation_coefficient

    '''

    # Check shapes
    check_shapes(y_true, y_pred)
    y_true, y_pred = align_shape(y_true, y_pred)

    # Confusion matrix values
    tp, tn, fp, fn = confusion_binary(y_true, y_pred)

    numerator = tp * tn - fp * fn
    denominator = np.sqrt((tp + fp) * (fn + tn) * (fp + tn) * (tp + fn))

    return numerator / denominator
Ejemplo n.º 7
0
def mean_utility(y_true, y_pred, weights):

    '''

    Mean utility

    Parameters
     ----------
     y_true: numpy.ndarray
        Targets
    y_pred: numpy.ndarray
        Class predictions (0 or 1 values only)

    Returns
    ------
    score: float
        Mean utility score

    References
    ----------
    .. [1] https://www.kaggle.com/wiki/MeanUtility
    .. [2] https://en.wikipedia.org/wiki/Multi-label_classification

    Notes
    -----
    The higher the better.

    '''

    # Check shapes
    check_shapes(y_true, y_pred)
    y_true, y_pred = align_shape(y_true, y_pred)

    # Weights assignment
    w_tp, w_tn, w_fp, w_fn = weights

    tp, tn, fp, fn = confusion_binary(y_true, y_pred)

    return w_tp * tp + w_tn * tn + w_fp * fp + w_fn * fn
Ejemplo n.º 8
0
def hamming_loss(y_true, y_pred):
    '''

    Hamming loss

    Parameters
     ----------
     y_true: numpy.ndarray
        Targets
    y_pred: numpy.ndarray
        Class predictions (0 or 1 values only)

    Returns
    ------
    score: float
        Hamming loss score

    References
    ----------
    .. [1] https://www.kaggle.com/wiki/HammingLoss
    .. [2] https://en.wikipedia.org/wiki/Multi-label_classification

    Notes
    -----
    The smaller the better.

    '''

    # Check shapes
    check_shapes(y_true, y_pred)
    y_true, y_pred = align_shape(y_true, y_pred)

    # Logical values only!
    check_binary(y_true, y_pred)

    return np.logical_xor(y_pred, y_true).mean(axis=1).mean()
Ejemplo n.º 9
0
def mce(y_true, y_pred):
    '''

    Mean consequential error

    Parameters
    ----------
    y_true: numpy.ndarray
        Targets
    y_pred: numpy.ndarray
        Class predictions (0 or 1 values only)

    Returns
    -------
    score: float
        Mean consequential error score

    References
    ----------
    .. [1] https://www.kaggle.com/wiki/MeanConsequentialError
    .. [2] http://www.machinelearning.ru/wiki/images/5/59/PZAD2016_04_errors.pdf (RU)

    Notes
    -----
    The higher the better.

    '''

    # Check shapes
    check_shapes(y_true, y_pred)
    y_true, y_pred = align_shape(y_true, y_pred)

    # Checking binarity
    check_binary(y_true, y_pred)

    return (y_true.astype(bool) == y_pred.astype(bool)).mean()
Ejemplo n.º 10
0
batches_per_epoch = len(train_tf_dataset)
val_batches_per_epoch = len(val_tf_dataset)

# tf.debugging.set_log_device_placement(True)

for epoch in range(1, pretrain_epochs + 1):
    new_learning_rate = 0.001 - (0.001 - 0.0005) * (epoch -
                                                    1) / (pretrain_epochs - 1)
    optimizer.lr.assign(new_learning_rate)
    iterator = iter(train_dist_dataset)
    print('epoch', epoch)
    losses = []
    for batch_n in tqdm(range(1, batches_per_epoch + 1)):

        batch = next(iterator)
        if check_shapes(batch):
            loss, greedy_seqs = distributed_step(batch, 'train')
            # loss = distributed_train_step(batch)
            losses.append(loss)

        if batch_n % 200 == 0:
            # if True:
            with tf.device('CPU'):
                train_sums = list(
                    tf.concat(greedy_seqs.values, axis=0).numpy())
                train_inds = list(
                    tf.concat(batch[-1].values, axis=0).numpy().squeeze())

            articles = [article[x] for x in train_inds]
            gt_summaries = [summary[x] for x in train_inds]
            examples_oovs = [oovs[x] for x in train_inds]