def fit(self, X, y): X = check_feature_array(X) y = check_multilabel_array(y) if X.shape != y.shape: raise ValueError('X (shape=%s) and y (shape=%s) must have equal shapes' % (X.shape, y.shape)) self.n_features_ = X.shape[1] self.model_ = OneVsRestClassifier(self._init_model(), n_jobs=self.n_jobs).fit(X, y)
def distances_report(distances, fmt='.2f', target_names=None): distances = check_feature_array(distances) if distances.shape[0] != distances.shape[1]: raise ValueError('distances must be square') n_samples = distances.shape[1] if target_names is not None and len(target_names) != n_samples: raise ValueError('target_names must specify a name for all %d labels' % n_samples) # Generate table if target_names is None: target_names = range(n_samples) headers = [''] + target_names data = [] for sample_idx in xrange(n_samples): target_name = target_names[sample_idx] row = [target_name] + list(distances[sample_idx]) data.append(row) # Calculate summaries for all values averages = np.average(distances, axis=1) summary_row = ['avg'] + list(averages) data.append(summary_row) return tabulate(data, headers=headers, floatfmt=fmt)
def predict(self, X): """Decide for each feature in each sample if it is on or off. _MultiLabelDecisionMaker uses a multi-label classifier to predict the multi-labels for all features. However, the classifier must first be trained by calling fit. """ check_is_fitted(self, 'model_', 'n_features_') X = check_feature_array(X, self.n_features_) predictions = self.model_.predict(X) return predictions
def predict(self, X): """Decide for each feature in each sample if it is on or off. ExtremumDecisionMaker calculates the maximum for each row and turns on the feature with that extreme value. Exactly one feature per row is turned on. """ X = check_feature_array(X) max_indexes = np.argmax(X, axis=1) predictions = np.zeros(X.shape, dtype=int) predictions[range(predictions.shape[0]), max_indexes] = 1 return predictions
def fit(self, X, y): """Fit the _MultiLabelDecisionMaker according to the given training data. Parameters ---------- X : array-like, shape (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples, n_features) Binary target values, where for each sample a feature is either on (encoded as 1) or off (encoded as 0). """ X = check_feature_array(X) y = check_multilabel_array(y) if X.shape != y.shape: raise ValueError('X (shape=%s) and y (shape=%s) must have equal shapes' % (X.shape, y.shape)) self.n_features_ = X.shape[1] self.model_ = self._init_model().fit(X, y)
def multilabel_loglikelihood_report(y_true, loglikelihoods, fmt='.0f'): y_true = check_multilabel_array(y_true) loglikelihoods = check_feature_array(loglikelihoods) if y_true.shape != loglikelihoods.shape: raise ValueError('y_true and loglikelihoods must have equal shapes') # Format likelihoods so that they are properly aligned printable_loglikelihoods = _printable_scores(loglikelihoods, score_fmt=fmt) assert len(printable_loglikelihoods) == len(loglikelihoods) # Generate table headers = ['label', 'loglikelihoods'] data = [] for idx, label in enumerate(y_true): row = [str(label), '[' + printable_loglikelihoods[idx] + ']'] data.append(row) return tabulate(data, headers=headers, floatfmt='.0f')
def multilabel_loglikelihood_summary(y_true, loglikelihoods): y_true = check_multilabel_array(y_true) loglikelihoods = check_feature_array(loglikelihoods) if y_true.shape != loglikelihoods.shape: raise ValueError('y_true and loglikelihoods must have equal shapes') n_labels = y_true.shape[1] # Calculate stats summary = np.zeros((n_labels, 4)) for label_idx in xrange(n_labels): pos_indexes = np.where(y_true[:, label_idx] == 1)[0] neg_indexes = np.where(y_true[:, label_idx] == 0)[0] summary[label_idx][0] = np.average(loglikelihoods[:, label_idx][pos_indexes]) summary[label_idx][1] = np.std(loglikelihoods[:, label_idx][pos_indexes]) summary[label_idx][2] = np.average(loglikelihoods[:, label_idx][neg_indexes]) summary[label_idx][3] = np.std(loglikelihoods[:, label_idx][neg_indexes]) return summary
def predict(self, X): """Decide for each feature in each sample if it is on or off. The decision is made by the following simple calculation for each row x, where central_measure is the specified measure: x_scaled = x - central_measure predictions[x_scaled >= threshold] = 1 predictions[x_scaled < threshold] = 0 """ X = check_feature_array(X) central_measure = None if self.measure == 'median': central_measure = np.median(X, axis=1) elif self.measure == 'mean': central_measure = np.mean(X, axis=1) elif self.measure == 'zero': central_measure = 0.0 assert central_measure is not None scaled_X = (X.T - central_measure).T predictions = np.zeros(scaled_X.shape, dtype=int) predictions[scaled_X >= self.threshold] = 1 return predictions
def multilabel_loglikelihood_summary(y_true, loglikelihoods): y_true = check_multilabel_array(y_true) loglikelihoods = check_feature_array(loglikelihoods) if y_true.shape != loglikelihoods.shape: raise ValueError('y_true and loglikelihoods must have equal shapes') n_labels = y_true.shape[1] # Calculate stats summary = np.zeros((n_labels, 4)) for label_idx in xrange(n_labels): pos_indexes = np.where(y_true[:, label_idx] == 1)[0] neg_indexes = np.where(y_true[:, label_idx] == 0)[0] summary[label_idx][0] = np.average( loglikelihoods[:, label_idx][pos_indexes]) summary[label_idx][1] = np.std(loglikelihoods[:, label_idx][pos_indexes]) summary[label_idx][2] = np.average( loglikelihoods[:, label_idx][neg_indexes]) summary[label_idx][3] = np.std(loglikelihoods[:, label_idx][neg_indexes]) return summary
def predict(self, X): check_is_fitted(self, 'model_', 'n_features_') X = check_feature_array(X, self.n_features_) predictions = self.model_.predict(X) return predictions