Ejemplo n.º 1
0
 def fit(self, X, y):
     X = check_feature_array(X)
     y = check_multilabel_array(y)
     if X.shape != y.shape:
         raise ValueError('X (shape=%s) and y (shape=%s) must have equal shapes' % (X.shape, y.shape))
     self.n_features_ = X.shape[1]
     self.model_ = OneVsRestClassifier(self._init_model(), n_jobs=self.n_jobs).fit(X, y)
Ejemplo n.º 2
0
def distances_report(distances, fmt='.2f', target_names=None):
    distances = check_feature_array(distances)
    if distances.shape[0] != distances.shape[1]:
        raise ValueError('distances must be square')
    n_samples = distances.shape[1]
    if target_names is not None and len(target_names) != n_samples:
        raise ValueError('target_names must specify a name for all %d labels' %
                         n_samples)

    # Generate table
    if target_names is None:
        target_names = range(n_samples)
    headers = [''] + target_names
    data = []
    for sample_idx in xrange(n_samples):
        target_name = target_names[sample_idx]
        row = [target_name] + list(distances[sample_idx])
        data.append(row)

    # Calculate summaries for all values
    averages = np.average(distances, axis=1)
    summary_row = ['avg'] + list(averages)
    data.append(summary_row)

    return tabulate(data, headers=headers, floatfmt=fmt)
Ejemplo n.º 3
0
 def predict(self, X):
     """Decide for each feature in each sample if it is on or off. _MultiLabelDecisionMaker uses a multi-label
     classifier to predict the multi-labels for all features. However, the classifier must first be trained by
     calling fit.
     """
     check_is_fitted(self, 'model_', 'n_features_')
     X = check_feature_array(X, self.n_features_)
     predictions = self.model_.predict(X)
     return predictions
Ejemplo n.º 4
0
 def predict(self, X):
     """Decide for each feature in each sample if it is on or off. ExtremumDecisionMaker calculates the
     maximum for each row and turns on the feature with that extreme value.
     Exactly one feature per row is turned on.
     """
     X = check_feature_array(X)
     max_indexes = np.argmax(X, axis=1)
     predictions = np.zeros(X.shape, dtype=int)
     predictions[range(predictions.shape[0]), max_indexes] = 1
     return predictions
Ejemplo n.º 5
0
    def fit(self, X, y):
        """Fit the _MultiLabelDecisionMaker according to the given training data.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vectors, where n_samples is the number of samples and n_features is the number of features.
        y : array-like, shape (n_samples, n_features)
            Binary target values, where for each sample a feature is either on (encoded as 1) or off (encoded as 0).
        """
        X = check_feature_array(X)
        y = check_multilabel_array(y)
        if X.shape != y.shape:
            raise ValueError('X (shape=%s) and y (shape=%s) must have equal shapes' % (X.shape, y.shape))
        self.n_features_ = X.shape[1]
        self.model_ = self._init_model().fit(X, y)
Ejemplo n.º 6
0
def multilabel_loglikelihood_report(y_true, loglikelihoods, fmt='.0f'):
    y_true = check_multilabel_array(y_true)
    loglikelihoods = check_feature_array(loglikelihoods)
    if y_true.shape != loglikelihoods.shape:
        raise ValueError('y_true and loglikelihoods must have equal shapes')

    # Format likelihoods so that they are properly aligned
    printable_loglikelihoods = _printable_scores(loglikelihoods, score_fmt=fmt)
    assert len(printable_loglikelihoods) == len(loglikelihoods)

    # Generate table
    headers = ['label', 'loglikelihoods']
    data = []
    for idx, label in enumerate(y_true):
        row = [str(label), '[' + printable_loglikelihoods[idx] + ']']
        data.append(row)
    return tabulate(data, headers=headers, floatfmt='.0f')
Ejemplo n.º 7
0
def multilabel_loglikelihood_report(y_true, loglikelihoods, fmt='.0f'):
    y_true = check_multilabel_array(y_true)
    loglikelihoods = check_feature_array(loglikelihoods)
    if y_true.shape != loglikelihoods.shape:
        raise ValueError('y_true and loglikelihoods must have equal shapes')

    # Format likelihoods so that they are properly aligned
    printable_loglikelihoods = _printable_scores(loglikelihoods, score_fmt=fmt)
    assert len(printable_loglikelihoods) == len(loglikelihoods)

    # Generate table
    headers = ['label', 'loglikelihoods']
    data = []
    for idx, label in enumerate(y_true):
        row = [str(label), '[' + printable_loglikelihoods[idx] + ']']
        data.append(row)
    return tabulate(data, headers=headers, floatfmt='.0f')
Ejemplo n.º 8
0
def multilabel_loglikelihood_summary(y_true, loglikelihoods):
    y_true = check_multilabel_array(y_true)
    loglikelihoods = check_feature_array(loglikelihoods)
    if y_true.shape != loglikelihoods.shape:
        raise ValueError('y_true and loglikelihoods must have equal shapes')
    n_labels = y_true.shape[1]

    # Calculate stats
    summary = np.zeros((n_labels, 4))
    for label_idx in xrange(n_labels):
        pos_indexes = np.where(y_true[:, label_idx] == 1)[0]
        neg_indexes = np.where(y_true[:, label_idx] == 0)[0]

        summary[label_idx][0] = np.average(loglikelihoods[:, label_idx][pos_indexes])
        summary[label_idx][1] = np.std(loglikelihoods[:, label_idx][pos_indexes])
        summary[label_idx][2] = np.average(loglikelihoods[:, label_idx][neg_indexes])
        summary[label_idx][3] = np.std(loglikelihoods[:, label_idx][neg_indexes])
    return summary
Ejemplo n.º 9
0
    def predict(self, X):
        """Decide for each feature in each sample if it is on or off. The decision is made by the following simple
        calculation for each row x, where central_measure is the specified measure:
            x_scaled = x - central_measure
            predictions[x_scaled >= threshold] = 1
            predictions[x_scaled  < threshold] = 0
        """
        X = check_feature_array(X)

        central_measure = None
        if self.measure == 'median':
            central_measure = np.median(X, axis=1)
        elif self.measure == 'mean':
            central_measure = np.mean(X, axis=1)
        elif self.measure == 'zero':
            central_measure = 0.0
        assert central_measure is not None

        scaled_X = (X.T - central_measure).T
        predictions = np.zeros(scaled_X.shape, dtype=int)
        predictions[scaled_X >= self.threshold] = 1
        return predictions
Ejemplo n.º 10
0
def multilabel_loglikelihood_summary(y_true, loglikelihoods):
    y_true = check_multilabel_array(y_true)
    loglikelihoods = check_feature_array(loglikelihoods)
    if y_true.shape != loglikelihoods.shape:
        raise ValueError('y_true and loglikelihoods must have equal shapes')
    n_labels = y_true.shape[1]

    # Calculate stats
    summary = np.zeros((n_labels, 4))
    for label_idx in xrange(n_labels):
        pos_indexes = np.where(y_true[:, label_idx] == 1)[0]
        neg_indexes = np.where(y_true[:, label_idx] == 0)[0]

        summary[label_idx][0] = np.average(
            loglikelihoods[:, label_idx][pos_indexes])
        summary[label_idx][1] = np.std(loglikelihoods[:,
                                                      label_idx][pos_indexes])
        summary[label_idx][2] = np.average(
            loglikelihoods[:, label_idx][neg_indexes])
        summary[label_idx][3] = np.std(loglikelihoods[:,
                                                      label_idx][neg_indexes])
    return summary
Ejemplo n.º 11
0
def distances_report(distances, fmt='.2f', target_names=None):
    distances = check_feature_array(distances)
    if distances.shape[0] != distances.shape[1]:
        raise ValueError('distances must be square')
    n_samples = distances.shape[1]
    if target_names is not None and len(target_names) != n_samples:
        raise ValueError('target_names must specify a name for all %d labels' % n_samples)

    # Generate table
    if target_names is None:
        target_names = range(n_samples)
    headers = [''] + target_names
    data = []
    for sample_idx in xrange(n_samples):
        target_name = target_names[sample_idx]
        row = [target_name] + list(distances[sample_idx])
        data.append(row)

    # Calculate summaries for all values
    averages = np.average(distances, axis=1)
    summary_row = ['avg'] + list(averages)
    data.append(summary_row)

    return tabulate(data, headers=headers, floatfmt=fmt)
Ejemplo n.º 12
0
 def predict(self, X):
     check_is_fitted(self, 'model_', 'n_features_')
     X = check_feature_array(X, self.n_features_)
     predictions = self.model_.predict(X)
     return predictions