Ejemplo n.º 1
0
def print_percentages():
    attributes = ["original_name", "original_citation", "page_described", "authority", "year"]
    parent_of_taxon = {}

    def _find_parent(taxon):
        if taxon.is_page_root:
            return taxon.id
        elif taxon.id in parent_of_taxon:
            return parent_of_taxon[taxon.id]
        else:
            return _find_parent(taxon.parent)

    for taxon in Taxon.select():
        parent_of_taxon[taxon.id] = _find_parent(taxon)

    counts_of_parent = collections.defaultdict(lambda: collections.defaultdict(int))
    for name in Name.select():
        parent_id = parent_of_taxon[name.taxon.id]
        counts_of_parent[parent_id]["total"] += 1
        for attribute in attributes:
            if getattr(name, attribute) is not None:
                counts_of_parent[parent_id][attribute] += 1

    for parent_id, data in counts_of_parent.items():
        parent = Taxon.filter(Taxon.id == parent_id)[0]
        print("FILE", parent)
        total = data["total"]
        del data["total"]
        print("Total", total)
        for attribute in attributes:
            percentage = data[attribute] * 100.0 / total
            print("%s: %s (%.2f%%)" % (attribute, data[attribute], percentage))
Ejemplo n.º 2
0
def dup_taxa():
    taxa = collections.defaultdict(list)
    for txn in Taxon.select():
        if txn.rank == db.constants.SUBGENUS and len(taxa[txn.valid_name]) > 0:
            continue
        taxa[txn.valid_name].append(txn)
    return [taxa]
Ejemplo n.º 3
0
def taxon(name):
    """Finds a taxon with the given name."""
    name = name.replace("_", " ")
    try:
        return Taxon.filter(Taxon.valid_name == name)[0]
    except IndexError:
        raise LookupError(name)
Ejemplo n.º 4
0
def find_rank_mismatch():
    for taxon in Taxon.select():
        expected_group = db.helpers.group_of_rank(taxon.rank)
        if expected_group != taxon.base_name.group:
            rank = db.constants.string_of_rank(taxon.rank)
            group = db.constants.string_of_group(taxon.base_name.group)
            print("Group mismatch for %s: rank %s but group %s" % (taxon, rank, group))
            yield taxon
Ejemplo n.º 5
0
 def keys(self):
     keys = set(super(_ShellNamespace, self).keys())
     keys |= set(dir(__builtins__))
     if not hasattr(self, "_names"):
         self._names = set(
             _encode_name(taxon.valid_name)
             for taxon in Taxon.select(Taxon.valid_name)
             if taxon.valid_name is not None
         )
     return keys | self._names
Ejemplo n.º 6
0
def name_mismatches(max_count=None, correct=False, correct_undoubted=True):
    count = 0
    for taxon in Taxon.select():
        computed = taxon.compute_valid_name()
        if computed is not None and taxon.valid_name != computed:
            print("Mismatch for %s: %s (actual) vs. %s (computed)" % (taxon, taxon.valid_name, computed))
            yield taxon
            count += 1
            # for species-group taxa with a known genus parent, the computed valid name is almost
            # always right (the mismatch will usually happen after a change in genus classification)
            # one area that isn't well-covered yet is autocorrecting gender endings
            if (
                correct_undoubted
                and taxon.base_name.group == db.constants.GROUP_SPECIES
                and taxon.has_parent_of_rank(db.constants.GENUS)
            ):
                taxon.recompute_name()
            elif correct:
                taxon.recompute_name()
            if max_count is not None and count == max_count:
                return
Ejemplo n.º 7
0
def childless_taxa():
    return Taxon.raw(
        "SELECT * FROM taxon WHERE rank > 5 AND id NOT IN (SELECT parent_id FROM taxon WHERE parent_id IS NOT NULL)"
    )
Ejemplo n.º 8
0
def parentless_taxa():
    return Taxon.filter(Taxon.parent == None)
Ejemplo n.º 9
0
def bad_base_names():
    return Taxon.raw("SELECT * FROM taxon WHERE base_name_id IS NULL OR base_name_id NOT IN (SELECT id FROM name)")
Ejemplo n.º 10
0
		name_row = export_tools.empty_row()
		name_row[0] = abbrev_of_status(name.status)
		name_row[1] = abbrev_of_age(txn.age)
		name_row[3] = txn.full_name()
		fill_in_name(name_row, name)
		sprsh.add_row(name_row, status=name.status)

	# Add children
	for child in txn.sorted_children():
		export_taxon(child, sprsh, recurse=recurse)

if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Export the database into ODS files')

	parser.add_argument('--taxon', '-t', help="Taxon to export")
	parser.add_argument('--recursive', '-r', action='store_true', help="Perform the whole export in a single process")
	args = parser.parse_args()

	if args.recursive:
		root = Taxon.get(Taxon.rank == ROOT)
		export_spreadsheet(root, recurse=True)
	elif args.taxon:
		print "Exporting", args.taxon
		root = Taxon.get(Taxon.valid_name == args.taxon)
		export_spreadsheet(root, recurse=False)
	else:
		taxa = Taxon.filter(Taxon.is_page_root == True)
		for taxon in taxa:
			cmd = ' '.join(['python', 'export.py', '--taxon', taxon.valid_name])
			subprocess.call(cmd, shell=True)
Ejemplo n.º 11
0
def create_root():
	Taxon.create(rank=ROOT, valid_name='root', is_page_root=True)
Ejemplo n.º 12
0
def read_file(filename):
	with codecs.open(filename, mode='r') as file:
		reader = csv.reader(file)
		first_line = reader.next()

		# name of parent of root taxon should be in cell A1
		root_name = first_line[0]
		if root_name:
			root_parent = Taxon.filter(Taxon.valid_name == root_name)[0]

			# maintain stack of taxa that are parents of the current taxon
			stack = [root_parent]
		else:
			stack = []

		# current valid taxon (for synonyms)
		current_valid = None
		# whether current taxon should be marked as root of a page
		is_page_root = True
		error_occurred = False
		for row in reader:
			try:
				# ignore blank rows
				if row[3] == '' and row[0] == '':
					continue
				data = parse_row(row)

				if data['status'] == STATUS_VALID:
					# get stuff off the stack
					rank = data['rank']
					# TODO: make this somehow unranked-clade-aware
					while len(stack) > 0 and rank >= stack[-1].rank:
						stack.pop()
					# create new Taxon
					current_valid = Taxon.create(valid_name=data['valid_name'], age=data['age'],
						rank=data['rank'], is_page_root=is_page_root,
						comments=data['comments_taxon'], data=data['data_taxon'])
					if len(stack) > 0:
						current_valid.parent = stack[-1]
					if is_page_root:
						is_page_root = False
					stack.append(current_valid)
				# create new Name
				data['taxon'] = current_valid
				assert current_valid.valid_name == data['valid_name'], \
					"Valid name %s does not match expected %s" % (data['valid_name'], current_valid.valid_name)

				data['data'] = helpers.fix_data(data['data'])

				# Detect whether a name object is already present (Principle of Coordination)
				nm = None
				if data['root_name'][0:4] == 'see ':
					seen = data['root_name'][4:]
					nm = Taxon.get(Taxon.valid_name == seen).base_name

				# create a new Name if none was found
				if nm is None:
					nm = Name.create(**data)

				# set base_name field
				if data['status'] == STATUS_VALID:
					current_valid.base_name = nm

			except Exception:
				traceback.print_exc()
				print('Error parsing row: %s' % row)
				error_occurred = True
				# ignore error and happily go on with the next
	return not error_occurred