def recall(y_true, y_pred): '''Calculates the recall, a metric for multi-label classification of how many relevant items are selected. ''' true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall
def precision(y_true, y_pred): '''Calculates the precision, a metric for multi-label classification of how many selected items are relevant. ''' true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision
def fbeta_score(y_true, y_pred, beta=1): '''Calculates the F score, the weighted harmonic mean of precision and recall. This is useful for multi-label classification, where input samples can be classified as sets of labels. By only using accuracy (precision) a model would achieve a perfect score by simply assigning every class to every input. In order to avoid this, a metric should penalize incorrect class assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0) computes this, as a weighted mean of the proportion of correct class assignments vs. the proportion of incorrect class assignments. With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning correct classes becomes more important, and with beta > 1 the metric is instead weighted towards penalizing incorrect class assignments. ''' if beta < 0: raise ValueError('The lowest choosable beta is zero (only precision).') # If there are no true positives, fix the F score at 0 like sklearn. if K.sum(K.round(K.clip(y_true, 0, 1))) == 0: return 0 p = precision(y_true, y_pred) r = recall(y_true, y_pred) bb = beta**2 fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon()) return fbeta_score
def kullback_leibler_divergence(y_true, y_pred): '''Calculates the Kullback-Leibler (KL) divergence between prediction and target values. ''' y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) return K.sum(y_true * K.log(y_true / y_pred), axis=-1)
def kde_entropy(output, var): dims = K.cast(K.shape(output)[1], K.floatx() ) #int(K.shape(output)[1]) N = K.cast(K.shape(output)[0], K.floatx() ) normconst = (dims/2.0)*K.log(2*np.pi*var) # Kernel density estimation of entropy # get dists matrix x2 = K.expand_dims(K.sum(K.square(output), axis=1), 1) #x2 = x2 + K.transpose(x2) #return K.shape(x2) dists = x2 + K.transpose(x2) - 2*K.dot(output, K.transpose(output)) dists = dists / (2*var) #y1 = K.expand_dims(output, 0) #y2 = K.expand_dims(output, 1) #dists = K.sum(K.square(y1-y2), axis=2) / (2*var) normCount = N ## Removes effect of diagonals, i.e. leave-one-out entropy #normCount = N-1 #diagvals = get_diag(10e20*K.ones_like(dists[0,:])) #dists = dists + diagvals lprobs = logsumexp(-dists, axis=1) - K.log(normCount) - normconst h = -K.mean(lprobs) return nats2bits * h # , normconst + (dims/2.0)
def matthews_correlation(y_true, y_pred): '''Calculates the Matthews correlation coefficient measure for quality of binary classification problems. ''' y_pred_pos = K.round(K.clip(y_pred, 0, 1)) y_pred_neg = 1 - y_pred_pos y_pos = K.round(K.clip(y_true, 0, 1)) y_neg = 1 - y_pos tp = K.sum(y_pos * y_pred_pos) tn = K.sum(y_neg * y_pred_neg) fp = K.sum(y_neg * y_pred_pos) fn = K.sum(y_pos * y_pred_neg) numerator = (tp * tn - fp * fn) denominator = K.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) return numerator / (denominator + K.epsilon())
def model_lstm_with_self_att(embed_dp, max_len): hidden_states = embed_dp hidden_states = Bidirectional( LSTM(max_len, dropout=0.3, return_sequences=True, return_state=False))(hidden_states) # Attention mechanism attention = Conv1D(filters=max_len, kernel_size=1, activation='tanh', padding='same', use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', name="attention_layer1")(hidden_states) attention = Conv1D(filters=max_len, kernel_size=1, activation='linear', padding='same', use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', name="attention_layer2")(attention) attention = Lambda(lambda x: softmax(x, axis=1), name="attention_vector")(attention) # Apply attention weights weighted_sequence_embedding = Dot(axes=[1, 1], normalize=False, name="weighted_sequence_embedding")( [attention, hidden_states]) # Add and normalize to obtain final sequence embedding sequence_embedding = Lambda(lambda x: K.l2_normalize(K.sum(x, axis=1)))( weighted_sequence_embedding) return sequence_embedding
def nll(y_true, y_pred): """ Negative log likelihood. """ # keras.losses.binary_crossentropy give the mean # over the last axis. we require the sum return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1)
def weighted_accuracy(y_true, y_pred): return K.sum( K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)) * K.sum(y_true, axis=-1)) / K.sum(y_true)
def logsumexp(mx, axis): cmax = K.max(mx, axis=axis) cmax2 = K.expand_dims(cmax, 1) mx2 = mx - cmax2 return cmax + K.log(K.sum(K.exp(mx2), axis=1))