Example #1
0
    def _load_taxon_hierarchy(self):
        """Load the taxon hierarchy.

        Must be separate from the constructor because
        :meth:`set_photo_count_min` influences the taxon hierarchy.
        """
        session, metadata = db.get_session_or_error()

        if not self.taxon_hr:
            self.taxon_hr = db.get_taxon_hierarchy(session, metadata)
Example #2
0
    def test_get_taxon_hierarchy(self):
        """Test the get_taxon_hierarchy() method."""
        with db.session_scope(META_FILE) as (session, metadata):
            expected = {
                u'Paphiopedilum': {
                    u'Brachypetalum': [u'godefroyae', u'wenshanense']
                },
                u'Selenipedium': {
                    None: [u'palmifolium']
                },
                u'Mexipedium': {
                    None: [u'xerophyticum']
                },
                u'Cypripedium': {
                    u'Trigonopedia': [u'fargesii', u'sichuanense'],
                    u'Obtusipetala': [u'flavum'],
                    u'Arietinum': [u'plectrochilum']
                },
                u'Phragmipedium': {
                    u'Micropetalum': [u'besseae']
                }
            }

            hier = db.get_taxon_hierarchy(session, metadata)
            self.assertEqual(str(hier), str(expected))

            # Setting `photo_count_min` to 4 should only return the species with
            # at least 4 photos.
            conf.photo_count_min = 4
            expected = {
                u'Paphiopedilum': {
                    u'Brachypetalum': [u'godefroyae']
                },
                u'Cypripedium': {
                    u'Obtusipetala': [u'flavum']
                },
                u'Phragmipedium': {
                    u'Micropetalum': [u'besseae']
                }
            }

            hier = db.get_taxon_hierarchy(session, metadata)
            self.assertEqual(str(hier), str(expected))
Example #3
0
    def _load_taxon_hierarchy(self):
        """Load the taxon hierarchy.

        Must be separate from the constructor because
        :meth:`set_photo_count_min` influences the taxon hierarchy.
        """
        session, metadata = db.get_session_or_error()

        if not self.taxon_hr:
            self.taxon_hr = db.get_taxon_hierarchy(session, metadata)
Example #4
0
    def get_taxon_hierarchy(self):
        """Return the taxon hierarchy.

        First tries to get the taxon hierarchy from the metadata database. If
        that fails, it will try to get it from the configuration file.
        """
        try:
            session, metadata = db.get_session_or_error()
            hr = db.get_taxon_hierarchy(session, metadata)
        except DatabaseSessionError:
            hr = self.config.classification.taxa.as_dict()
        return hr
Example #5
0
    def get_taxon_hierarchy(self):
        """Return the taxon hierarchy.

        First tries to get the taxon hierarchy from the metadata database. If
        that fails, it will try to get it from the configuration file.
        """
        try:
            session, metadata = db.get_session_or_error()
            hr = db.get_taxon_hierarchy(session, metadata)
        except DatabaseSessionError:
            hr = self.config.classification.taxa.as_dict()
        return hr
Example #6
0
    def test_with_hierarchy(self, test_data_dir, ann_dir, max_error=0.001):
        """Test each ANN in a classification hierarchy and export results.

        Returns a 2-tuple ``(correct,total)``.
        """
        session, metadata = db.get_session_or_error()

        logging.info("Testing the neural networks hierarchy...")

        self.classifications = {}
        self.classifications_expected = {}

        # Get the taxonomic hierarchy from the database.
        self.taxon_hr = db.get_taxon_hierarchy(session, metadata)

        # Get the classification hierarchy from the configurations.
        try:
            self.class_hr = self.config.classification.hierarchy
        except:
            raise ConfigurationError("classification hierarchy not set")

        # Get the name of each level in the classification hierarchy.
        levels = [l.name for l in self.class_hr]

        # Get the prefix for the classification columns.
        try:
            dependent_prefix = self.config.data.dependent_prefix
        except:
            dependent_prefix = OUTPUT_PREFIX

        # Get the expected and recognized classification for each sample in
        # the test data.
        for filter_ in classification_hierarchy_filters(levels, self.taxon_hr):
            logging.info("Classifying on %s" % readable_filter(filter_))

            level_name = filter_.get('class')
            level_n = levels.index(level_name)
            level = self.class_hr[level_n]
            test_file = os.path.join(test_data_dir, level.test_file)
            ann_file = os.path.join(ann_dir, level.ann_file)

            # Set the maximum error for classification.
            try:
                max_error = level.max_error
            except:
                pass

            # Replace any placeholders in the paths.
            where = filter_.get('where', {})
            for key, val in where.items():
                val = val if val is not None else '_'
                test_file = test_file.replace("__%s__" % key, val)
                ann_file = ann_file.replace("__%s__" % key, val)

            # Get the class names for this filter.
            classes = db.get_classes_from_filter(session, metadata, filter_)
            assert len(classes) > 0, \
                "No classes found for filter `%s`" % filter_

            # Get the codeword for each class.
            codewords = get_codewords(classes)

            # Load the ANN.
            if len(classes) > 1:
                ann = libfann.neural_net()
                ann.create_from_file(str(ann_file))

            # Load the test data.
            test_data = TrainData()
            test_data.read_from_file(test_file, dependent_prefix)

            # Test each sample in the test data.
            for label, input_, output in test_data:
                assert len(codewords) == len(output), \
                    "Codeword size mismatch. Codeword has {0} bits, but the " \
                    "training data has {1} output bits.".\
                    format(len(codewords), len(output))

                # Obtain the photo ID from the label.
                if not label:
                    raise ValueError("Test sample is missing a label with " \
                        "photo ID")

                try:
                    photo_id = self.re_photo_id.search(label).group(1)
                    photo_id = int(photo_id)
                except:
                    raise RuntimeError("Failed to obtain the photo ID from " \
                        "the sample label")

                # Skip classification if there is only one class for this
                # filter.
                if not len(classes) > 1:
                    logging.debug("Not enough classes for filter. Skipping " \
                        "testing of %s" % ann_file)

                    self.classifications[photo_id][level_name] = ['']
                    self.classifications_expected[photo_id][level_name] = ['']
                    continue

                # Set the expected class.
                class_expected = get_classification(codewords, output,
                    max_error)
                class_expected = [class_ for mse,class_ in class_expected]

                assert len(class_expected) == 1, \
                    "Class codewords must have one positive bit, found {0}".\
                    format(len(class_expected))

                # Get the recognized class.
                codeword = ann.run(input_)
                class_ann = get_classification(codewords, codeword,
                    max_error)
                class_ann = [class_ for mse,class_ in class_ann]

                # Save the classification at each level.
                if level_n == 0:
                    self.classifications[photo_id] = {}
                    self.classifications_expected[photo_id] = {}

                self.classifications[photo_id][level_name] = class_ann
                self.classifications_expected[photo_id][level_name] = class_expected

            ann.destroy()

        return self.get_correct_count()
Example #7
0
    def test_with_hierarchy(self, test_data_dir, ann_dir, max_error=0.001):
        """Test each ANN in a classification hierarchy and export results.

        Returns a 2-tuple ``(correct,total)``.
        """
        session, metadata = db.get_session_or_error()

        logging.info("Testing the neural networks hierarchy...")

        self.classifications = {}
        self.classifications_expected = {}

        # Get the taxonomic hierarchy from the database.
        self.taxon_hr = db.get_taxon_hierarchy(session, metadata)

        # Get the classification hierarchy from the configurations.
        try:
            self.class_hr = self.config.classification.hierarchy
        except:
            raise ConfigurationError("classification hierarchy not set")

        # Get the name of each level in the classification hierarchy.
        levels = [l.name for l in self.class_hr]

        # Get the prefix for the classification columns.
        try:
            dependent_prefix = self.config.data.dependent_prefix
        except:
            dependent_prefix = OUTPUT_PREFIX

        # Get the expected and recognized classification for each sample in
        # the test data.
        for filter_ in classification_hierarchy_filters(levels, self.taxon_hr):
            logging.info("Classifying on %s" % readable_filter(filter_))

            level_name = filter_.get('class')
            level_n = levels.index(level_name)
            level = self.class_hr[level_n]
            test_file = os.path.join(test_data_dir, level.test_file)
            ann_file = os.path.join(ann_dir, level.ann_file)

            # Set the maximum error for classification.
            try:
                max_error = level.max_error
            except:
                pass

            # Replace any placeholders in the paths.
            where = filter_.get('where', {})
            for key, val in where.items():
                val = val if val is not None else '_'
                test_file = test_file.replace("__%s__" % key, val)
                ann_file = ann_file.replace("__%s__" % key, val)

            # Get the class names for this filter.
            classes = db.get_classes_from_filter(session, metadata, filter_)
            assert len(classes) > 0, \
                "No classes found for filter `%s`" % filter_

            # Get the codeword for each class.
            codewords = get_codewords(classes)

            # Load the ANN.
            ann = libfann.neural_net()
            if len(classes) > 1:
                ann.create_from_file(str(ann_file))

            # Load the test data.
            test_data = TrainData()
            test_data.read_from_file(test_file, dependent_prefix)

            # Test each sample in the test data.
            for label, input_, output in test_data:
                assert len(codewords) == len(output), \
                    "Codeword size mismatch. Codeword has {0} bits, but the " \
                    "training data has {1} output bits.".\
                    format(len(codewords), len(output))

                # Obtain the photo ID from the label.
                if not label:
                    raise ValueError("Test sample is missing a label with " \
                        "photo ID")

                try:
                    photo_id = self.re_photo_id.search(label).group(1)
                    photo_id = int(photo_id)
                except:
                    raise RuntimeError("Failed to obtain the photo ID from " \
                        "the sample label")
                
                # Save the classification at each level.
                if level_n == 0:
                    self.classifications[photo_id] = {}
                    self.classifications_expected[photo_id] = {}

                # Skip classification if there is only one class for this
                # filter.
                if not len(classes) > 1:
                    logging.debug("Not enough classes for filter. Skipping " \
                        "testing of %s" % ann_file)

                    self.classifications[photo_id][level_name] = ['']
                    self.classifications_expected[photo_id][level_name] = ['']
                    continue

                # Set the expected class.
                class_expected = get_classification(codewords, output,
                    max_error)
                class_expected = [class_ for mse,class_ in class_expected]

                assert len(class_expected) == 1, \
                    "Class codewords must have one positive bit, found {0}".\
                    format(len(class_expected))

                # Get the recognized class.
                codeword = ann.run(input_)
                class_ann = get_classification(codewords, codeword,
                    max_error)
                class_ann = [class_ for mse,class_ in class_ann]

                # Save the classification at each level.
                self.classifications[photo_id][level_name] = class_ann
                self.classifications_expected[photo_id][level_name] = class_expected

            ann.destroy()

        return self.get_correct_count()