Ejemplo n.º 1
0
 def test_smaller_dictionary(self):
     taxonomy_dictionary = self.taxonomy_dictionary
     taxonomy_dictionary['Stochastic'] = taxonomy_dictionary[
         'Stochastic'][:-1]
     classifier = HierarchicalRandomForest(self.taxonomy_dictionary)
     with self.assertRaises(Exception):
         classifier.fit(self.features, self.labels)
Ejemplo n.º 2
0
 def test_larger_dictionary(self):
     taxonomy_dictionary = self.taxonomy_dictionary
     taxonomy_dictionary[
         'Stochastic'] = taxonomy_dictionary['Stochastic'] + ['new class']
     classifier = HierarchicalRandomForest(self.taxonomy_dictionary)
     classifier.fit(self.features, self.labels)
     predicted_probs = classifier.predict_proba(self.features)
     self.assertEqual(predicted_probs.shape, (len(self.features), 15))
Ejemplo n.º 3
0
 def test_fit(self):
     classifier = HierarchicalRandomForest(self.taxonomy_dictionary)
     classifier.fit(self.features, self.labels)
     predicted_classes = classifier.predict(self.features)
     recall_values = balanced_recall(predicted_classes, self.labels)
     self.assertGreater(
         recall_values, 0.95,
         'Training balanced recall has to be greater than 95 %')
Ejemplo n.º 4
0
 def test_feature_order_shuffle(self):
     classifier = HierarchicalRandomForest(self.taxonomy_dictionary)
     classifier.fit(self.features, self.labels)
     new_columns = self.features.columns.values
     np.random.shuffle(new_columns)
     predicted_classes = classifier.predict(self.features[new_columns])
     recall_values = balanced_recall(predicted_classes, self.labels)
     self.assertGreater(
         recall_values, 0.95,
         'Training balanced recall has to be greater than 95 %')
 def test_fit(self):
     taxonomy_dictionary = {
         'Stochastic': ['LPV', 'QSO', 'YSO', 'CV/Nova', 'Blazar', 'AGN'],
         'Periodic': ['RRL', 'EB', 'DSCT', 'Ceph', 'Periodic-Other'],
         'Transient': ['SNIa', 'SNII', 'SNIbc']
     }
     model = HierarchicalRandomForest(taxonomy_dictionary)
     model.fit(self.train_features, self.train_labels)
     probs = model.predict_proba(self.train_features)
     print(probs.head())
     predicted_classes = model.predict(self.train_features)
     print(predicted_classes.head())
Ejemplo n.º 6
0
 def test_predict_in_pipeline(self):
     classifier = HierarchicalRandomForest(self.taxonomy_dictionary)
     classifier.fit(self.features, self.labels)
     features = self.features.iloc[0]
     predicted_classes = classifier.predict_in_pipeline(features)
     n_classes = len(classifier.get_list_of_classes())
     self.dict_sum_one(predicted_classes['probabilities'])
     self.assertEqual(n_classes, len(predicted_classes['probabilities']))
     self.dict_sum_one(predicted_classes['hierarchical']['top'])
     self.assertEqual(3, len(predicted_classes['hierarchical']['top']))
     for children_probs in predicted_classes['hierarchical'][
             'children'].values():
         self.dict_sum_one(children_probs)
Ejemplo n.º 7
0
 def test_predict_proba(self):
     classifier = HierarchicalRandomForest(self.taxonomy_dictionary)
     classifier.fit(self.features, self.labels)
     predicted_probs = classifier.predict_proba(self.features)
     self.is_sum_one(predicted_probs)
Ejemplo n.º 8
0
 def test_save_and_load_hierarchical_rf(self):
     classifier = HierarchicalRandomForest(self.taxonomy_dictionary)
     classifier.fit(self.features, self.labels)
     predicted_probs = classifier.predict_proba(self.features)
     classifier.save_model(self.tmp_dir)
     classifier2 = HierarchicalRandomForest(self.taxonomy_dictionary)
     classifier2.load_model(self.tmp_dir)
     predicted_probs2 = classifier2.predict_proba(self.features)
     self.assertTrue((predicted_probs == predicted_probs2).all(axis=None))
    stratify=labels.loc[train_val_oids].classALeRCE.values,
    test_size=0.15 / 0.7,
    random_state=0)

# needed for color augmentation
training_features = features.loc[train_oids]
training_labels = labels.loc[train_oids]
training_features.to_pickle('training_features.pkl')
training_labels.to_pickle('training_labels.pkl')

taxonomy_dictionary = {
    'Stochastic': ['LPV', 'QSO', 'YSO', 'CV/Nova', 'Blazar', 'AGN'],
    'Periodic': ['RRL', 'EB', 'DSCT', 'Ceph', 'Periodic-Other'],
    'Transient': ['SNIa', 'SNII', 'SNIbc', 'SLSN']
}
classifier = HierarchicalRandomForest(taxonomy_dictionary)

classifier.fit(features.loc[train_oids], labels.loc[train_oids])


def evaluate_classifier(classifier):
    test_predictions = classifier.predict(features.loc[test_oids])

    test_confusion_matrix = confusion_matrix(test_predictions,
                                             labels.loc[test_oids],
                                             classifier.get_list_of_classes())

    print(test_confusion_matrix)

    test_predictions_proba = classifier.predict_proba(features.loc[test_oids])
    kaggle_score_value = kaggle_score(test_predictions_proba,