def test_smaller_dictionary(self): taxonomy_dictionary = self.taxonomy_dictionary taxonomy_dictionary['Stochastic'] = taxonomy_dictionary[ 'Stochastic'][:-1] classifier = HierarchicalRandomForest(self.taxonomy_dictionary) with self.assertRaises(Exception): classifier.fit(self.features, self.labels)
def test_larger_dictionary(self): taxonomy_dictionary = self.taxonomy_dictionary taxonomy_dictionary[ 'Stochastic'] = taxonomy_dictionary['Stochastic'] + ['new class'] classifier = HierarchicalRandomForest(self.taxonomy_dictionary) classifier.fit(self.features, self.labels) predicted_probs = classifier.predict_proba(self.features) self.assertEqual(predicted_probs.shape, (len(self.features), 15))
def test_fit(self): classifier = HierarchicalRandomForest(self.taxonomy_dictionary) classifier.fit(self.features, self.labels) predicted_classes = classifier.predict(self.features) recall_values = balanced_recall(predicted_classes, self.labels) self.assertGreater( recall_values, 0.95, 'Training balanced recall has to be greater than 95 %')
def test_feature_order_shuffle(self): classifier = HierarchicalRandomForest(self.taxonomy_dictionary) classifier.fit(self.features, self.labels) new_columns = self.features.columns.values np.random.shuffle(new_columns) predicted_classes = classifier.predict(self.features[new_columns]) recall_values = balanced_recall(predicted_classes, self.labels) self.assertGreater( recall_values, 0.95, 'Training balanced recall has to be greater than 95 %')
def test_fit(self): taxonomy_dictionary = { 'Stochastic': ['LPV', 'QSO', 'YSO', 'CV/Nova', 'Blazar', 'AGN'], 'Periodic': ['RRL', 'EB', 'DSCT', 'Ceph', 'Periodic-Other'], 'Transient': ['SNIa', 'SNII', 'SNIbc'] } model = HierarchicalRandomForest(taxonomy_dictionary) model.fit(self.train_features, self.train_labels) probs = model.predict_proba(self.train_features) print(probs.head()) predicted_classes = model.predict(self.train_features) print(predicted_classes.head())
def test_predict_in_pipeline(self): classifier = HierarchicalRandomForest(self.taxonomy_dictionary) classifier.fit(self.features, self.labels) features = self.features.iloc[0] predicted_classes = classifier.predict_in_pipeline(features) n_classes = len(classifier.get_list_of_classes()) self.dict_sum_one(predicted_classes['probabilities']) self.assertEqual(n_classes, len(predicted_classes['probabilities'])) self.dict_sum_one(predicted_classes['hierarchical']['top']) self.assertEqual(3, len(predicted_classes['hierarchical']['top'])) for children_probs in predicted_classes['hierarchical'][ 'children'].values(): self.dict_sum_one(children_probs)
def test_predict_proba(self): classifier = HierarchicalRandomForest(self.taxonomy_dictionary) classifier.fit(self.features, self.labels) predicted_probs = classifier.predict_proba(self.features) self.is_sum_one(predicted_probs)
def test_save_and_load_hierarchical_rf(self): classifier = HierarchicalRandomForest(self.taxonomy_dictionary) classifier.fit(self.features, self.labels) predicted_probs = classifier.predict_proba(self.features) classifier.save_model(self.tmp_dir) classifier2 = HierarchicalRandomForest(self.taxonomy_dictionary) classifier2.load_model(self.tmp_dir) predicted_probs2 = classifier2.predict_proba(self.features) self.assertTrue((predicted_probs == predicted_probs2).all(axis=None))
stratify=labels.loc[train_val_oids].classALeRCE.values, test_size=0.15 / 0.7, random_state=0) # needed for color augmentation training_features = features.loc[train_oids] training_labels = labels.loc[train_oids] training_features.to_pickle('training_features.pkl') training_labels.to_pickle('training_labels.pkl') taxonomy_dictionary = { 'Stochastic': ['LPV', 'QSO', 'YSO', 'CV/Nova', 'Blazar', 'AGN'], 'Periodic': ['RRL', 'EB', 'DSCT', 'Ceph', 'Periodic-Other'], 'Transient': ['SNIa', 'SNII', 'SNIbc', 'SLSN'] } classifier = HierarchicalRandomForest(taxonomy_dictionary) classifier.fit(features.loc[train_oids], labels.loc[train_oids]) def evaluate_classifier(classifier): test_predictions = classifier.predict(features.loc[test_oids]) test_confusion_matrix = confusion_matrix(test_predictions, labels.loc[test_oids], classifier.get_list_of_classes()) print(test_confusion_matrix) test_predictions_proba = classifier.predict_proba(features.loc[test_oids]) kaggle_score_value = kaggle_score(test_predictions_proba,