def stem_statistics(): stem = Name.filter(Name.group == db.constants.GROUP_GENUS, ~(Name.stem >> None)).count() gender = Name.filter(Name.group == db.constants.GROUP_GENUS, ~(Name.gender >> None)).count() total = Name.filter(Name.group == db.constants.GROUP_GENUS).count() print("Genus-group names:") print("stem: %s/%s (%.02f%%)" % (stem, total, stem / total * 100)) print("gender: %s/%s (%.02f%%)" % (gender, total, gender / total * 100)) print("Family-group names:") total = Name.filter(Name.group == db.constants.GROUP_FAMILY).count() typ = Name.filter(Name.group == db.constants.GROUP_FAMILY, ~(Name.type >> None)).count() print("type: %s/%s (%.02f%%)" % (typ, total, typ / total * 100))
def detect_types(max_count=None, verbose=False): """Converts verbatim_types into references to the actual names.""" count = 0 successful_count = 0 group = (db.constants.GROUP_FAMILY, db.constants.GROUP_GENUS) for name in Name.filter(Name.verbatim_type != None, Name.type >> None, Name.group << group).limit(max_count): count += 1 if name.detect_and_set_type(verbatim_type=name.verbatim_type, verbose=verbose): successful_count += 1 print("Success: %d/%d" % (successful_count, count))
def detect_from_root_name(name, root_name): candidates = Name.filter( Name.group == db.constants.GROUP_GENUS, (Name.stem == root_name) | (Name.stem == root_name + "i") ) candidates = list(filter(lambda c: c.taxon.is_child_of(name.taxon), candidates)) if len(candidates) == 1: print("Detected type for name %s: %s" % (name, candidates[0])) name.type = candidates[0] name.save() return True else: return False
def add_page_described(): for name in Name.filter( Name.original_citation != None, Name.page_described >> None, Name.year != "in press" ).order_by(Name.original_citation): message = "Name %s is missing page described, but has original citation {%s}" % ( name.description(), name.original_citation, ) name.page_described = getinput.get_line( message, handlers={"o": lambda _: name.open_description()}, should_stop=lambda line: line == "s" ) name.save()
def detect_stems(): for name in Name.filter(Name.group == db.constants.GROUP_GENUS, Name.stem >> None): inferred = db.detection.detect_stem_and_gender(name.root_name) if inferred is None: continue if not inferred.confident: print("%s: stem %s, gender %s" % (name.description(), inferred.stem, inferred.gender)) if not getinput.yes_no("Is this correct? "): continue print("Inferred stem and gender for %s: %s, %s" % (name, inferred.stem, inferred.gender)) name.stem = inferred.stem name.gender = inferred.gender name.save()
def add_types(): for name in Name.filter( Name.original_citation != None, Name.type >> None, Name.year > "1930", Name.group == db.constants.GROUP_GENUS ).order_by(Name.original_citation): name.taxon.display(full=True, max_depth=1) message = "Name %s is missing type, but has original citation {%s}" % ( name.description(), name.original_citation, ) verbatim_type = getinput.get_line( message, handlers={"o": lambda _: name.open_description()}, should_stop=lambda line: line == "s" ) if verbatim_type is not None: name.detect_and_set_type(verbatim_type, verbose=True)
def root_name_mismatch(): for name in Name.filter(Name.group == db.constants.GROUP_FAMILY, ~(Name.type >> None)): if name.is_unavailable(): continue stem_name = name.type.stem if stem_name is None: continue if name.root_name == stem_name: continue for stripped in db.helpers.name_with_suffixes_removed(name.root_name): if stripped == stem_name or stripped + "i" == stem_name: print("Autocorrecting root name: %s -> %s" % (name.root_name, stem_name)) name.root_name = stem_name name.save() break if name.root_name != stem_name: print("Stem mismatch for %s: %s vs. %s" % (name, name.root_name, stem_name)) yield name
def detect_types_from_root_names(max_count=None): """Detects types for family-group names on the basis of the root_name.""" def detect_from_root_name(name, root_name): candidates = Name.filter( Name.group == db.constants.GROUP_GENUS, (Name.stem == root_name) | (Name.stem == root_name + "i") ) candidates = list(filter(lambda c: c.taxon.is_child_of(name.taxon), candidates)) if len(candidates) == 1: print("Detected type for name %s: %s" % (name, candidates[0])) name.type = candidates[0] name.save() return True else: return False count = 0 successful_count = 0 for name in ( Name.filter(Name.group == db.constants.GROUP_FAMILY, Name.type >> None) .order_by(Name.id.desc()) .limit(max_count) ): if name.is_unavailable(): continue count += 1 if detect_from_root_name(name, name.root_name): successful_count += 1 else: for stripped in db.helpers.name_with_suffixes_removed(name.root_name): if detect_from_root_name(name, stripped): successful_count += 1 break else: print("Could not detect type for name %s (root_name = %s)" % (name, name.root_name)) print("Success: %d/%d" % (successful_count, count))
def dup_genus(): names = collections.defaultdict(list) for name in Name.filter(Name.group == db.constants.GROUP_GENUS): full_name = "%s %s, %s" % (name.root_name, name.authority, name.year) names[full_name].append(name) return [names]
def endswith(end): return list(Name.filter(Name.group == db.constants.GROUP_GENUS, Name.root_name % ("%%%s" % end)))
def n(name): """Finds names with the given root name or original name.""" return list(Name.filter((Name.root_name == name) | (Name.original_name == name)))