def _load_taxon_hierarchy(self): """Load the taxon hierarchy. Must be separate from the constructor because :meth:`set_photo_count_min` influences the taxon hierarchy. """ session, metadata = db.get_session_or_error() if not self.taxon_hr: self.taxon_hr = db.get_taxon_hierarchy(session, metadata)
def test_get_taxon_hierarchy(self): """Test the get_taxon_hierarchy() method.""" with db.session_scope(META_FILE) as (session, metadata): expected = { u'Paphiopedilum': { u'Brachypetalum': [u'godefroyae', u'wenshanense'] }, u'Selenipedium': { None: [u'palmifolium'] }, u'Mexipedium': { None: [u'xerophyticum'] }, u'Cypripedium': { u'Trigonopedia': [u'fargesii', u'sichuanense'], u'Obtusipetala': [u'flavum'], u'Arietinum': [u'plectrochilum'] }, u'Phragmipedium': { u'Micropetalum': [u'besseae'] } } hier = db.get_taxon_hierarchy(session, metadata) self.assertEqual(str(hier), str(expected)) # Setting `photo_count_min` to 4 should only return the species with # at least 4 photos. conf.photo_count_min = 4 expected = { u'Paphiopedilum': { u'Brachypetalum': [u'godefroyae'] }, u'Cypripedium': { u'Obtusipetala': [u'flavum'] }, u'Phragmipedium': { u'Micropetalum': [u'besseae'] } } hier = db.get_taxon_hierarchy(session, metadata) self.assertEqual(str(hier), str(expected))
def get_taxon_hierarchy(self): """Return the taxon hierarchy. First tries to get the taxon hierarchy from the metadata database. If that fails, it will try to get it from the configuration file. """ try: session, metadata = db.get_session_or_error() hr = db.get_taxon_hierarchy(session, metadata) except DatabaseSessionError: hr = self.config.classification.taxa.as_dict() return hr
def test_with_hierarchy(self, test_data_dir, ann_dir, max_error=0.001): """Test each ANN in a classification hierarchy and export results. Returns a 2-tuple ``(correct,total)``. """ session, metadata = db.get_session_or_error() logging.info("Testing the neural networks hierarchy...") self.classifications = {} self.classifications_expected = {} # Get the taxonomic hierarchy from the database. self.taxon_hr = db.get_taxon_hierarchy(session, metadata) # Get the classification hierarchy from the configurations. try: self.class_hr = self.config.classification.hierarchy except: raise ConfigurationError("classification hierarchy not set") # Get the name of each level in the classification hierarchy. levels = [l.name for l in self.class_hr] # Get the prefix for the classification columns. try: dependent_prefix = self.config.data.dependent_prefix except: dependent_prefix = OUTPUT_PREFIX # Get the expected and recognized classification for each sample in # the test data. for filter_ in classification_hierarchy_filters(levels, self.taxon_hr): logging.info("Classifying on %s" % readable_filter(filter_)) level_name = filter_.get('class') level_n = levels.index(level_name) level = self.class_hr[level_n] test_file = os.path.join(test_data_dir, level.test_file) ann_file = os.path.join(ann_dir, level.ann_file) # Set the maximum error for classification. try: max_error = level.max_error except: pass # Replace any placeholders in the paths. where = filter_.get('where', {}) for key, val in where.items(): val = val if val is not None else '_' test_file = test_file.replace("__%s__" % key, val) ann_file = ann_file.replace("__%s__" % key, val) # Get the class names for this filter. classes = db.get_classes_from_filter(session, metadata, filter_) assert len(classes) > 0, \ "No classes found for filter `%s`" % filter_ # Get the codeword for each class. codewords = get_codewords(classes) # Load the ANN. if len(classes) > 1: ann = libfann.neural_net() ann.create_from_file(str(ann_file)) # Load the test data. test_data = TrainData() test_data.read_from_file(test_file, dependent_prefix) # Test each sample in the test data. for label, input_, output in test_data: assert len(codewords) == len(output), \ "Codeword size mismatch. Codeword has {0} bits, but the " \ "training data has {1} output bits.".\ format(len(codewords), len(output)) # Obtain the photo ID from the label. if not label: raise ValueError("Test sample is missing a label with " \ "photo ID") try: photo_id = self.re_photo_id.search(label).group(1) photo_id = int(photo_id) except: raise RuntimeError("Failed to obtain the photo ID from " \ "the sample label") # Skip classification if there is only one class for this # filter. if not len(classes) > 1: logging.debug("Not enough classes for filter. Skipping " \ "testing of %s" % ann_file) self.classifications[photo_id][level_name] = [''] self.classifications_expected[photo_id][level_name] = [''] continue # Set the expected class. class_expected = get_classification(codewords, output, max_error) class_expected = [class_ for mse,class_ in class_expected] assert len(class_expected) == 1, \ "Class codewords must have one positive bit, found {0}".\ format(len(class_expected)) # Get the recognized class. codeword = ann.run(input_) class_ann = get_classification(codewords, codeword, max_error) class_ann = [class_ for mse,class_ in class_ann] # Save the classification at each level. if level_n == 0: self.classifications[photo_id] = {} self.classifications_expected[photo_id] = {} self.classifications[photo_id][level_name] = class_ann self.classifications_expected[photo_id][level_name] = class_expected ann.destroy() return self.get_correct_count()
def test_with_hierarchy(self, test_data_dir, ann_dir, max_error=0.001): """Test each ANN in a classification hierarchy and export results. Returns a 2-tuple ``(correct,total)``. """ session, metadata = db.get_session_or_error() logging.info("Testing the neural networks hierarchy...") self.classifications = {} self.classifications_expected = {} # Get the taxonomic hierarchy from the database. self.taxon_hr = db.get_taxon_hierarchy(session, metadata) # Get the classification hierarchy from the configurations. try: self.class_hr = self.config.classification.hierarchy except: raise ConfigurationError("classification hierarchy not set") # Get the name of each level in the classification hierarchy. levels = [l.name for l in self.class_hr] # Get the prefix for the classification columns. try: dependent_prefix = self.config.data.dependent_prefix except: dependent_prefix = OUTPUT_PREFIX # Get the expected and recognized classification for each sample in # the test data. for filter_ in classification_hierarchy_filters(levels, self.taxon_hr): logging.info("Classifying on %s" % readable_filter(filter_)) level_name = filter_.get('class') level_n = levels.index(level_name) level = self.class_hr[level_n] test_file = os.path.join(test_data_dir, level.test_file) ann_file = os.path.join(ann_dir, level.ann_file) # Set the maximum error for classification. try: max_error = level.max_error except: pass # Replace any placeholders in the paths. where = filter_.get('where', {}) for key, val in where.items(): val = val if val is not None else '_' test_file = test_file.replace("__%s__" % key, val) ann_file = ann_file.replace("__%s__" % key, val) # Get the class names for this filter. classes = db.get_classes_from_filter(session, metadata, filter_) assert len(classes) > 0, \ "No classes found for filter `%s`" % filter_ # Get the codeword for each class. codewords = get_codewords(classes) # Load the ANN. ann = libfann.neural_net() if len(classes) > 1: ann.create_from_file(str(ann_file)) # Load the test data. test_data = TrainData() test_data.read_from_file(test_file, dependent_prefix) # Test each sample in the test data. for label, input_, output in test_data: assert len(codewords) == len(output), \ "Codeword size mismatch. Codeword has {0} bits, but the " \ "training data has {1} output bits.".\ format(len(codewords), len(output)) # Obtain the photo ID from the label. if not label: raise ValueError("Test sample is missing a label with " \ "photo ID") try: photo_id = self.re_photo_id.search(label).group(1) photo_id = int(photo_id) except: raise RuntimeError("Failed to obtain the photo ID from " \ "the sample label") # Save the classification at each level. if level_n == 0: self.classifications[photo_id] = {} self.classifications_expected[photo_id] = {} # Skip classification if there is only one class for this # filter. if not len(classes) > 1: logging.debug("Not enough classes for filter. Skipping " \ "testing of %s" % ann_file) self.classifications[photo_id][level_name] = [''] self.classifications_expected[photo_id][level_name] = [''] continue # Set the expected class. class_expected = get_classification(codewords, output, max_error) class_expected = [class_ for mse,class_ in class_expected] assert len(class_expected) == 1, \ "Class codewords must have one positive bit, found {0}".\ format(len(class_expected)) # Get the recognized class. codeword = ann.run(input_) class_ann = get_classification(codewords, codeword, max_error) class_ann = [class_ for mse,class_ in class_ann] # Save the classification at each level. self.classifications[photo_id][level_name] = class_ann self.classifications_expected[photo_id][level_name] = class_expected ann.destroy() return self.get_correct_count()