Example #1
0
def print_percentages():
    attributes = ["original_name", "original_citation", "page_described", "authority", "year"]
    parent_of_taxon = {}

    def _find_parent(taxon):
        if taxon.is_page_root:
            return taxon.id
        elif taxon.id in parent_of_taxon:
            return parent_of_taxon[taxon.id]
        else:
            return _find_parent(taxon.parent)

    for taxon in Taxon.select():
        parent_of_taxon[taxon.id] = _find_parent(taxon)

    counts_of_parent = collections.defaultdict(lambda: collections.defaultdict(int))
    for name in Name.select():
        parent_id = parent_of_taxon[name.taxon.id]
        counts_of_parent[parent_id]["total"] += 1
        for attribute in attributes:
            if getattr(name, attribute) is not None:
                counts_of_parent[parent_id][attribute] += 1

    for parent_id, data in counts_of_parent.items():
        parent = Taxon.filter(Taxon.id == parent_id)[0]
        print("FILE", parent)
        total = data["total"]
        del data["total"]
        print("Total", total)
        for attribute in attributes:
            percentage = data[attribute] * 100.0 / total
            print("%s: %s (%.2f%%)" % (attribute, data[attribute], percentage))
Example #2
0
def dup_taxa():
    taxa = collections.defaultdict(list)
    for txn in Taxon.select():
        if txn.rank == db.constants.SUBGENUS and len(taxa[txn.valid_name]) > 0:
            continue
        taxa[txn.valid_name].append(txn)
    return [taxa]
Example #3
0
def find_rank_mismatch():
    for taxon in Taxon.select():
        expected_group = db.helpers.group_of_rank(taxon.rank)
        if expected_group != taxon.base_name.group:
            rank = db.constants.string_of_rank(taxon.rank)
            group = db.constants.string_of_group(taxon.base_name.group)
            print("Group mismatch for %s: rank %s but group %s" % (taxon, rank, group))
            yield taxon
Example #4
0
 def keys(self):
     keys = set(super(_ShellNamespace, self).keys())
     keys |= set(dir(__builtins__))
     if not hasattr(self, "_names"):
         self._names = set(
             _encode_name(taxon.valid_name)
             for taxon in Taxon.select(Taxon.valid_name)
             if taxon.valid_name is not None
         )
     return keys | self._names
Example #5
0
def name_mismatches(max_count=None, correct=False, correct_undoubted=True):
    count = 0
    for taxon in Taxon.select():
        computed = taxon.compute_valid_name()
        if computed is not None and taxon.valid_name != computed:
            print("Mismatch for %s: %s (actual) vs. %s (computed)" % (taxon, taxon.valid_name, computed))
            yield taxon
            count += 1
            # for species-group taxa with a known genus parent, the computed valid name is almost
            # always right (the mismatch will usually happen after a change in genus classification)
            # one area that isn't well-covered yet is autocorrecting gender endings
            if (
                correct_undoubted
                and taxon.base_name.group == db.constants.GROUP_SPECIES
                and taxon.has_parent_of_rank(db.constants.GENUS)
            ):
                taxon.recompute_name()
            elif correct:
                taxon.recompute_name()
            if max_count is not None and count == max_count:
                return