def print_percentages(): attributes = ["original_name", "original_citation", "page_described", "authority", "year"] parent_of_taxon = {} def _find_parent(taxon): if taxon.is_page_root: return taxon.id elif taxon.id in parent_of_taxon: return parent_of_taxon[taxon.id] else: return _find_parent(taxon.parent) for taxon in Taxon.select(): parent_of_taxon[taxon.id] = _find_parent(taxon) counts_of_parent = collections.defaultdict(lambda: collections.defaultdict(int)) for name in Name.select(): parent_id = parent_of_taxon[name.taxon.id] counts_of_parent[parent_id]["total"] += 1 for attribute in attributes: if getattr(name, attribute) is not None: counts_of_parent[parent_id][attribute] += 1 for parent_id, data in counts_of_parent.items(): parent = Taxon.filter(Taxon.id == parent_id)[0] print("FILE", parent) total = data["total"] del data["total"] print("Total", total) for attribute in attributes: percentage = data[attribute] * 100.0 / total print("%s: %s (%.2f%%)" % (attribute, data[attribute], percentage))
def dup_taxa(): taxa = collections.defaultdict(list) for txn in Taxon.select(): if txn.rank == db.constants.SUBGENUS and len(taxa[txn.valid_name]) > 0: continue taxa[txn.valid_name].append(txn) return [taxa]
def taxon(name): """Finds a taxon with the given name.""" name = name.replace("_", " ") try: return Taxon.filter(Taxon.valid_name == name)[0] except IndexError: raise LookupError(name)
def find_rank_mismatch(): for taxon in Taxon.select(): expected_group = db.helpers.group_of_rank(taxon.rank) if expected_group != taxon.base_name.group: rank = db.constants.string_of_rank(taxon.rank) group = db.constants.string_of_group(taxon.base_name.group) print("Group mismatch for %s: rank %s but group %s" % (taxon, rank, group)) yield taxon
def keys(self): keys = set(super(_ShellNamespace, self).keys()) keys |= set(dir(__builtins__)) if not hasattr(self, "_names"): self._names = set( _encode_name(taxon.valid_name) for taxon in Taxon.select(Taxon.valid_name) if taxon.valid_name is not None ) return keys | self._names
def name_mismatches(max_count=None, correct=False, correct_undoubted=True): count = 0 for taxon in Taxon.select(): computed = taxon.compute_valid_name() if computed is not None and taxon.valid_name != computed: print("Mismatch for %s: %s (actual) vs. %s (computed)" % (taxon, taxon.valid_name, computed)) yield taxon count += 1 # for species-group taxa with a known genus parent, the computed valid name is almost # always right (the mismatch will usually happen after a change in genus classification) # one area that isn't well-covered yet is autocorrecting gender endings if ( correct_undoubted and taxon.base_name.group == db.constants.GROUP_SPECIES and taxon.has_parent_of_rank(db.constants.GENUS) ): taxon.recompute_name() elif correct: taxon.recompute_name() if max_count is not None and count == max_count: return
def childless_taxa(): return Taxon.raw( "SELECT * FROM taxon WHERE rank > 5 AND id NOT IN (SELECT parent_id FROM taxon WHERE parent_id IS NOT NULL)" )
def parentless_taxa(): return Taxon.filter(Taxon.parent == None)
def bad_base_names(): return Taxon.raw("SELECT * FROM taxon WHERE base_name_id IS NULL OR base_name_id NOT IN (SELECT id FROM name)")
name_row = export_tools.empty_row() name_row[0] = abbrev_of_status(name.status) name_row[1] = abbrev_of_age(txn.age) name_row[3] = txn.full_name() fill_in_name(name_row, name) sprsh.add_row(name_row, status=name.status) # Add children for child in txn.sorted_children(): export_taxon(child, sprsh, recurse=recurse) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Export the database into ODS files') parser.add_argument('--taxon', '-t', help="Taxon to export") parser.add_argument('--recursive', '-r', action='store_true', help="Perform the whole export in a single process") args = parser.parse_args() if args.recursive: root = Taxon.get(Taxon.rank == ROOT) export_spreadsheet(root, recurse=True) elif args.taxon: print "Exporting", args.taxon root = Taxon.get(Taxon.valid_name == args.taxon) export_spreadsheet(root, recurse=False) else: taxa = Taxon.filter(Taxon.is_page_root == True) for taxon in taxa: cmd = ' '.join(['python', 'export.py', '--taxon', taxon.valid_name]) subprocess.call(cmd, shell=True)
def create_root(): Taxon.create(rank=ROOT, valid_name='root', is_page_root=True)
def read_file(filename): with codecs.open(filename, mode='r') as file: reader = csv.reader(file) first_line = reader.next() # name of parent of root taxon should be in cell A1 root_name = first_line[0] if root_name: root_parent = Taxon.filter(Taxon.valid_name == root_name)[0] # maintain stack of taxa that are parents of the current taxon stack = [root_parent] else: stack = [] # current valid taxon (for synonyms) current_valid = None # whether current taxon should be marked as root of a page is_page_root = True error_occurred = False for row in reader: try: # ignore blank rows if row[3] == '' and row[0] == '': continue data = parse_row(row) if data['status'] == STATUS_VALID: # get stuff off the stack rank = data['rank'] # TODO: make this somehow unranked-clade-aware while len(stack) > 0 and rank >= stack[-1].rank: stack.pop() # create new Taxon current_valid = Taxon.create(valid_name=data['valid_name'], age=data['age'], rank=data['rank'], is_page_root=is_page_root, comments=data['comments_taxon'], data=data['data_taxon']) if len(stack) > 0: current_valid.parent = stack[-1] if is_page_root: is_page_root = False stack.append(current_valid) # create new Name data['taxon'] = current_valid assert current_valid.valid_name == data['valid_name'], \ "Valid name %s does not match expected %s" % (data['valid_name'], current_valid.valid_name) data['data'] = helpers.fix_data(data['data']) # Detect whether a name object is already present (Principle of Coordination) nm = None if data['root_name'][0:4] == 'see ': seen = data['root_name'][4:] nm = Taxon.get(Taxon.valid_name == seen).base_name # create a new Name if none was found if nm is None: nm = Name.create(**data) # set base_name field if data['status'] == STATUS_VALID: current_valid.base_name = nm except Exception: traceback.print_exc() print('Error parsing row: %s' % row) error_occurred = True # ignore error and happily go on with the next return not error_occurred