def test_BagScorer_metric(self): """Define scoring functions, such as accuracy or recall, which will be used to score how well single-instance inference performs on the bag classification task The scoring functions have some requirements - a) They are passed to BagScorer on initialization b) Must have a method "_score_func" with a signature f(y_true, y_pred) (This is provided by default when using sklearn.metrics.make_scorer) Successful conditions: The bagscorer must report the same performance metrics as when the metrics are manually calculated This tests if the bagscorer property fits, trains, and evaluates the estimator passed to it """ # Generate a scoring metric for the bag scorer accuracy_scorer = make_scorer(accuracy_score) self.assertTrue(hasattr(accuracy_scorer, '_score_func'), msg='accuracy scorer must have _score_function method') # Generate some data train_bags, train_labels = self.train_bags, self.train_labels test_bags, test_labels = self.test_bags, self.test_labels # Create a dummy estimator dumb = DummyClassifier(strategy='constant', constant=1) # concatenate arrays across 1st axis SI_train, SI_train_labels = bags_2_si(train_bags, train_labels) SI_test, SI_test_labels = bags_2_si(test_bags, test_labels) dumb.fit(SI_train, SI_train_labels) pred_test = dumb.predict(SI_test) pred_train = dumb.predict(SI_train) """Calculate the correct number of predictions based on dummy classifier The dummy classifier predicts 1 always (constant) The training set bas """ pct_train = sum(train_labels) / len(train_labels) pct_test = sum(test_labels) / len(test_labels) dumb_accuracy_train = accuracy_score(SI_train_labels, pred_train) dumb_accuracy_test = accuracy_score(SI_test_labels, pred_test) # Test custom scorer, with the same dummy estimator bagAccScorer = BagScorer(accuracy_scorer, sparse=True) estimator = bagAccScorer.estimator_fit(dumb, train_bags, train_labels) test_score = bagAccScorer(estimator, test_bags, test_labels) train_score = bagAccScorer(estimator, train_bags, train_labels) """test_score should output the accuracy for predictions among bags The test_score for bagScorer should be equal to the dumb_accuracy_test because bag labels are reduced by the most frequest SI prediction If all SI labels are predicted + then all bags will be predicted + The accuracy of bag labels reduced by BagScorer will be equal to percent of bag labels that are positive""" self.assertEqual(test_score, pct_test) self.assertEqual(train_score, pct_train) self.assertEqual(pct_train, dumb_accuracy_train) self.assertEqual(pct_test, dumb_accuracy_test)
def test_BagScorer(self): """Define scoring functions, such as accuracy or recall, which will be used to score how well single-instance inference performs on the bag classification task The scoring functions have some requirements - a) They are passed to BagScorer on initialization b) Must have a method "_score_func" with a signature f(y_true, y_pred) (This is provided by default when using sklearn.metrics.make_scorer) """ # Create scoring metrics, and load scoring metric into BagScorer accuracy_scorer = make_scorer(accuracy_score, normalize=True) precision_scorer = make_scorer(precision_score, average='weighted') recall_scorer = make_scorer(recall_score, average='weighted') # {'normalize':'weighted'} self.assertDictContainsSubset({'normalize': True}, accuracy_scorer._kwargs) self.assertIn('_score_func', accuracy_scorer.__dict__.keys()) # Dummy data train_bags, train_labels = self.train_bags, self.train_labels test_bags, test_labels = self.test_bags, self.test_labels # Create a single-instance estimator compNB = ComplementNB(alpha=1.0, fit_prior=True, class_prior=None, norm=False) # Test custom scorer bagAccScorer = BagScorer(accuracy_scorer, sparse=True) bagPrecisionScorer = BagScorer(precision_scorer, sparse=True) bagRecallScorer = BagScorer(recall_scorer, sparse=True) estimator = bagAccScorer.estimator_fit(compNB, train_bags, train_labels) # The estimator is the same for all instances... accuracy = bagAccScorer(estimator, test_bags, test_labels) precision = bagPrecisionScorer(estimator, test_bags, test_labels) recall = bagRecallScorer(estimator, test_bags, test_labels) self.assertIsInstance(accuracy, float) self.assertLess(accuracy, 1) self.assertGreater(accuracy, 0) self.assertIsInstance(precision, float) self.assertLess(precision, 1) self.assertGreater(precision, 0) self.assertIsInstance(recall, float) self.assertLess(recall, 1) self.assertGreater(recall, 0) return None
def test_fit_and_score_return_dict(self): # Scoring accuracy_scorer = make_scorer(accuracy_score, normalize='weighted') # Test estimator dumb = DummyClassifier(strategy='constant', constant=1) # Test custom scorer bagAccScorer = BagScorer(accuracy_scorer, sparse=True) # Rename for easier parameters X = self.train_bags y = self.train_labels scoring = {'bag-scorer': bagAccScorer} estimator = dumb groups = None cv = 3 n_jobs = 3 verbose = 0 pre_dispatch = 6 fit_params = None return_estimator = True error_score = 'raise' return_train_score = True parameters = None # Test _fit_and_score method X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorers = _check_multimetric_scoring(estimator, scoring=scoring) # Use one cross-validation split generator = cv.split(X, y, groups) # Get training and test split of training data train, test = next(generator) # Generate scores using BagScorer scores = _fit_and_score(clone(estimator), X, y, scorers, train, test, verbose, parameters, fit_params, return_train_score=return_train_score, return_times=True, return_estimator=return_estimator, return_n_test_samples=False, error_score=error_score) # Returned dictionary contains keys self.assertIn('train_scores', scores.keys()) self.assertIn('test_scores', scores.keys()) self.assertIn('fit_time', scores.keys()) self.assertIn('score_time', scores.keys()) self.assertIn('estimator', scores.keys()) return None
def test_BagScorer_signature(self): # Test custom scorer accuracy_scorer = make_scorer(accuracy_score, normalize='weighted') bagAccScorer = BagScorer(accuracy_scorer, sparse=True) self.assertTrue(callable(bagAccScorer), msg="BagScorer must be callable") return None
def predict( self, data: Union[List[MutableMapping], MutableMapping, pd.DataFrame] ) -> np.ndarray: """Predict on an embedded bag inputs ------- data: (list(RawInputData), RawInputData, pandas.DataFrame) Raw data input which is transformed by this class outputs ------- bag_prediction: (np.ndarray) results of aggregation with single-instance inference of a bags label from the instances within the bag""" # Transform raw data transformed_data = self._transform_data(data) # Predict on transformed data predictions = self.classifier.predict( self._determine_reshape(self.custom_transform(transformed_data))) # Add aggregation of prediction bag_prediction = BagScorer.reduce_bag_label(predictions, method='mode') return np.array([bag_prediction], dtype=np.unicode_)
train_index, test_index = next(rs.split(bags, labels)) train_bags, train_labels = bags[train_index], labels[train_index] test_bags, test_labels = bags[test_index], labels[test_index] # Create an estimator dumb = DummyClassifier(strategy='constant', constant=1) radiusNeighbor = RadiusNeighborsClassifier( weights='distance', algorithm='auto', p=1, # Manhattan distance ) # Create an evaluation metric # Multiple evaluation metrics are allowed accuracy_scorer = make_scorer(accuracy_score) bagAccScorer = BagScorer( accuracy_scorer) # Accuracy score, no factory function precision_scorer = make_scorer(precision_score, average='binary') bagPreScorer = BagScorer(precision_scorer) jaccard_scorer = make_scorer(jaccard_score, average='binary') bagJacScorer = BagScorer(jaccard_scorer) scoring = { 'bag_accuracy': bagAccScorer, 'bag_precision': bagPreScorer, 'bag_jaccard': bagJacScorer, } #%% # Cross validate the dummy data and estimator result_dumb = cross_validate_bag( estimator=dumb,
# Filter out bags with only a single instance _filter = _filter_bags_by_size(train_bags_cat, min_instances=5, max_instances=1000) # Convert bags to dense for KNN estimator _train_bags_dense = _densify_bags(train_bags[_filter]) _train_labels = train_labels[_filter] # Keep bags sparse for Complement Native Bayes and Multinomial _train_bags_cat = train_bags_cat[_filter] _train_labels_cat = train_labels_cat[_filter] # Define evaluation metrics accuracy_scorer = make_scorer(accuracy_score) bagAccScorer = BagScorer(accuracy_scorer, sparse_input=False) precision_scorer = make_scorer(precision_score, average='weighted') bagPreScorer = BagScorer(precision_scorer, sparse_input=False) recall_scorer = make_scorer(recall_score, average='weighted') bagRecScorer = BagScorer(recall_scorer, sparse_input=False) scoring_dense = {'bag_accuracy':bagAccScorer, 'bag_precision':bagPreScorer, 'bag_recall':bagRecScorer, } # Cross validate bags res_knn_cv = cross_validate_bag( estimator=knn, X=_train_bags_dense, y=_train_labels,
def test_fit_and_score(self): # Scoring accuracy_scorer = make_scorer(accuracy_score, normalize='weighted') # Test estimator dumb = DummyClassifier(strategy='constant', constant=1) # Test custom scorer bagAccScorer = BagScorer(accuracy_scorer, sparse=True) # _fit_and_score testing X = self.train_bags y = self.train_labels scoring = { 'bag-accuracy-scorer': bagAccScorer, } estimator = dumb groups = None cv = 3 n_jobs = 3 verbose = 0 pre_dispatch = 6 fit_params = None return_estimator = None error_score = 'raise' return_train_score = None parameters = None # Test _fit_and_score method X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorers = _check_multimetric_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) # Scores is a list of dictonaries """When scoring is a dictionary, the returned result looks like [{'test_scores': {'bag-accuracy-scorer': 0.5185185185185185}, 'fit_time': 0.0, 'score_time': 0.0}, {'test_scores': {'bag-accuracy-scorer': 0.5185185185185185}, 'fit_time': 0.0, 'score_time': 0.0}, ... ]""" scores = parallel( delayed(_fit_and_score)(clone(estimator), X, y, scorers, train, test, verbose, parameters, fit_params, return_train_score=return_train_score, return_times=True, return_estimator=return_estimator, error_score=error_score) for train, test in cv.split(X, y, groups)) for score in scores: bag_scoring_metric = score['test_scores'] self.assertLessEqual(bag_scoring_metric['bag-accuracy-scorer'], 1) self.assertGreaterEqual(bag_scoring_metric['bag-accuracy-scorer'], 0) fit_time = score['fit_time'] self.assertIsInstance(fit_time, float) score_time = score['score_time'] self.assertIsInstance(score_time, float) return None
def test_cross_validate_bag(self): # Scoring accuracy_scorer = make_scorer(accuracy_score, normalize='weighted') # Dummy data train_bags, train_labels = self.train_bags, self.train_labels test_bags, test_labels = self.test_bags, self.test_labels # Define an estimator dumb = DummyClassifier(strategy='constant', constant=1) # Calculate metrics manually expected_accuracy = sum(train_labels) / len(train_labels) kf = KFold(n_splits=4) accuracies = [] for train_index, test_index in kf.split(train_labels): _fold = train_labels[test_index] _acc = sum(_fold) / len(_fold) print(sum(_fold)) accuracies.append(_acc) print('Global Accuracy : ', sum(train_labels) / len(train_labels)) print('Averaged accuracies : ', np.mean(accuracies)) # Custom scorer bagAccScorer = BagScorer(accuracy_scorer, sparse=True) scorer = { 'bag-accuracy-scorer': bagAccScorer, } # Test cross_validate_bag # Res is a dictonary of lists {'fit_time':[1,2,3], # 'test_bag-accuracy-scorer':[0.1,0.2,0.3]} res = cross_validate_bag(dumb, train_bags, train_labels, cv=4, scoring=scorer, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', return_train_score=False, return_estimator=False, error_score='raise') """The arithmetic mean of all accuracy predictions should equal the prediction accuracy of the training bags (At least if all splits are equal size -> Which is not true if the number of training instances is not divisible by the number of splits) This is only true because the dummy classifier always predicts 1 If the splits are not equal size then they will be close to equal""" self.assertAlmostEqual(np.mean(res['test_bag-accuracy-scorer']), expected_accuracy, 3) # Just check the mean also LOL self.assertEqual(np.mean(res['test_bag-accuracy-scorer']), expected_accuracy) # 4 Crossvalidation splits self.assertTrue(len(res['test_bag-accuracy-scorer']) == 4) # Assert result has dictionary values self.assertIn('fit_time', res.keys()) self.assertIn('score_time', res.keys()) return None