def print_percentages(): attributes = ["original_name", "original_citation", "page_described", "authority", "year"] parent_of_taxon = {} def _find_parent(taxon): if taxon.is_page_root: return taxon.id elif taxon.id in parent_of_taxon: return parent_of_taxon[taxon.id] else: return _find_parent(taxon.parent) for taxon in Taxon.select(): parent_of_taxon[taxon.id] = _find_parent(taxon) counts_of_parent = collections.defaultdict(lambda: collections.defaultdict(int)) for name in Name.select(): parent_id = parent_of_taxon[name.taxon.id] counts_of_parent[parent_id]["total"] += 1 for attribute in attributes: if getattr(name, attribute) is not None: counts_of_parent[parent_id][attribute] += 1 for parent_id, data in counts_of_parent.items(): parent = Taxon.filter(Taxon.id == parent_id)[0] print("FILE", parent) total = data["total"] del data["total"] print("Total", total) for attribute in attributes: percentage = data[attribute] * 100.0 / total print("%s: %s (%.2f%%)" % (attribute, data[attribute], percentage))
def taxon(name): """Finds a taxon with the given name.""" name = name.replace("_", " ") try: return Taxon.filter(Taxon.valid_name == name)[0] except IndexError: raise LookupError(name)
def parentless_taxa(): return Taxon.filter(Taxon.parent == None)
name_row = export_tools.empty_row() name_row[0] = abbrev_of_status(name.status) name_row[1] = abbrev_of_age(txn.age) name_row[3] = txn.full_name() fill_in_name(name_row, name) sprsh.add_row(name_row, status=name.status) # Add children for child in txn.sorted_children(): export_taxon(child, sprsh, recurse=recurse) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Export the database into ODS files') parser.add_argument('--taxon', '-t', help="Taxon to export") parser.add_argument('--recursive', '-r', action='store_true', help="Perform the whole export in a single process") args = parser.parse_args() if args.recursive: root = Taxon.get(Taxon.rank == ROOT) export_spreadsheet(root, recurse=True) elif args.taxon: print "Exporting", args.taxon root = Taxon.get(Taxon.valid_name == args.taxon) export_spreadsheet(root, recurse=False) else: taxa = Taxon.filter(Taxon.is_page_root == True) for taxon in taxa: cmd = ' '.join(['python', 'export.py', '--taxon', taxon.valid_name]) subprocess.call(cmd, shell=True)
def read_file(filename): with codecs.open(filename, mode='r') as file: reader = csv.reader(file) first_line = reader.next() # name of parent of root taxon should be in cell A1 root_name = first_line[0] if root_name: root_parent = Taxon.filter(Taxon.valid_name == root_name)[0] # maintain stack of taxa that are parents of the current taxon stack = [root_parent] else: stack = [] # current valid taxon (for synonyms) current_valid = None # whether current taxon should be marked as root of a page is_page_root = True error_occurred = False for row in reader: try: # ignore blank rows if row[3] == '' and row[0] == '': continue data = parse_row(row) if data['status'] == STATUS_VALID: # get stuff off the stack rank = data['rank'] # TODO: make this somehow unranked-clade-aware while len(stack) > 0 and rank >= stack[-1].rank: stack.pop() # create new Taxon current_valid = Taxon.create(valid_name=data['valid_name'], age=data['age'], rank=data['rank'], is_page_root=is_page_root, comments=data['comments_taxon'], data=data['data_taxon']) if len(stack) > 0: current_valid.parent = stack[-1] if is_page_root: is_page_root = False stack.append(current_valid) # create new Name data['taxon'] = current_valid assert current_valid.valid_name == data['valid_name'], \ "Valid name %s does not match expected %s" % (data['valid_name'], current_valid.valid_name) data['data'] = helpers.fix_data(data['data']) # Detect whether a name object is already present (Principle of Coordination) nm = None if data['root_name'][0:4] == 'see ': seen = data['root_name'][4:] nm = Taxon.get(Taxon.valid_name == seen).base_name # create a new Name if none was found if nm is None: nm = Name.create(**data) # set base_name field if data['status'] == STATUS_VALID: current_valid.base_name = nm except Exception: traceback.print_exc() print('Error parsing row: %s' % row) error_occurred = True # ignore error and happily go on with the next return not error_occurred