예제 #1
0
def stem_statistics():
    stem = Name.filter(Name.group == db.constants.GROUP_GENUS, ~(Name.stem >> None)).count()
    gender = Name.filter(Name.group == db.constants.GROUP_GENUS, ~(Name.gender >> None)).count()
    total = Name.filter(Name.group == db.constants.GROUP_GENUS).count()
    print("Genus-group names:")
    print("stem: %s/%s (%.02f%%)" % (stem, total, stem / total * 100))
    print("gender: %s/%s (%.02f%%)" % (gender, total, gender / total * 100))
    print("Family-group names:")
    total = Name.filter(Name.group == db.constants.GROUP_FAMILY).count()
    typ = Name.filter(Name.group == db.constants.GROUP_FAMILY, ~(Name.type >> None)).count()
    print("type: %s/%s (%.02f%%)" % (typ, total, typ / total * 100))
예제 #2
0
def print_percentages():
    attributes = ["original_name", "original_citation", "page_described", "authority", "year"]
    parent_of_taxon = {}

    def _find_parent(taxon):
        if taxon.is_page_root:
            return taxon.id
        elif taxon.id in parent_of_taxon:
            return parent_of_taxon[taxon.id]
        else:
            return _find_parent(taxon.parent)

    for taxon in Taxon.select():
        parent_of_taxon[taxon.id] = _find_parent(taxon)

    counts_of_parent = collections.defaultdict(lambda: collections.defaultdict(int))
    for name in Name.select():
        parent_id = parent_of_taxon[name.taxon.id]
        counts_of_parent[parent_id]["total"] += 1
        for attribute in attributes:
            if getattr(name, attribute) is not None:
                counts_of_parent[parent_id][attribute] += 1

    for parent_id, data in counts_of_parent.items():
        parent = Taxon.filter(Taxon.id == parent_id)[0]
        print("FILE", parent)
        total = data["total"]
        del data["total"]
        print("Total", total)
        for attribute in attributes:
            percentage = data[attribute] * 100.0 / total
            print("%s: %s (%.2f%%)" % (attribute, data[attribute], percentage))
예제 #3
0
def detect_types(max_count=None, verbose=False):
    """Converts verbatim_types into references to the actual names."""
    count = 0
    successful_count = 0
    group = (db.constants.GROUP_FAMILY, db.constants.GROUP_GENUS)
    for name in Name.filter(Name.verbatim_type != None, Name.type >> None, Name.group << group).limit(max_count):
        count += 1
        if name.detect_and_set_type(verbatim_type=name.verbatim_type, verbose=verbose):
            successful_count += 1
    print("Success: %d/%d" % (successful_count, count))
예제 #4
0
def add_page_described():
    for name in Name.filter(
        Name.original_citation != None, Name.page_described >> None, Name.year != "in press"
    ).order_by(Name.original_citation):
        message = "Name %s is missing page described, but has original citation {%s}" % (
            name.description(),
            name.original_citation,
        )
        name.page_described = getinput.get_line(
            message, handlers={"o": lambda _: name.open_description()}, should_stop=lambda line: line == "s"
        )
        name.save()
예제 #5
0
 def detect_from_root_name(name, root_name):
     candidates = Name.filter(
         Name.group == db.constants.GROUP_GENUS, (Name.stem == root_name) | (Name.stem == root_name + "i")
     )
     candidates = list(filter(lambda c: c.taxon.is_child_of(name.taxon), candidates))
     if len(candidates) == 1:
         print("Detected type for name %s: %s" % (name, candidates[0]))
         name.type = candidates[0]
         name.save()
         return True
     else:
         return False
예제 #6
0
def detect_stems():
    for name in Name.filter(Name.group == db.constants.GROUP_GENUS, Name.stem >> None):
        inferred = db.detection.detect_stem_and_gender(name.root_name)
        if inferred is None:
            continue
        if not inferred.confident:
            print("%s: stem %s, gender %s" % (name.description(), inferred.stem, inferred.gender))
            if not getinput.yes_no("Is this correct? "):
                continue
        print("Inferred stem and gender for %s: %s, %s" % (name, inferred.stem, inferred.gender))
        name.stem = inferred.stem
        name.gender = inferred.gender
        name.save()
예제 #7
0
def add_types():
    for name in Name.filter(
        Name.original_citation != None, Name.type >> None, Name.year > "1930", Name.group == db.constants.GROUP_GENUS
    ).order_by(Name.original_citation):
        name.taxon.display(full=True, max_depth=1)
        message = "Name %s is missing type, but has original citation {%s}" % (
            name.description(),
            name.original_citation,
        )
        verbatim_type = getinput.get_line(
            message, handlers={"o": lambda _: name.open_description()}, should_stop=lambda line: line == "s"
        )
        if verbatim_type is not None:
            name.detect_and_set_type(verbatim_type, verbose=True)
예제 #8
0
def add_original_names():
    for name in Name.select():
        if name.original_citation and not name.original_name:
            message = u"Name {} is missing an original name, but has original citation {{{}}}:{}".format(
                name.description(), name.original_citation, name.page_described
            )
            name.original_name = getinput.get_line(message, handlers={"o": lambda _: name.open_description()})
            if not name.page_described:
                name.page_described = getinput.get_line(
                    "Enter page described",
                    handlers={"o": lambda _: name.open_description()},
                    should_stop=lambda line: line == "s",
                )
            name.save()
예제 #9
0
def check_refs():
	for name in Name.select():
		# if there is an original_citation, check whether it is valid
		if name.original_citation:
			if not cite_exists(name.original_citation):
				print("Name:", name.description())
				print("Warning: invalid original citation:", name.original_citation)
		elif name.verbatim_citation and may_be_citation(name.verbatim_citation):
			if cite_exists(name.verbatim_citation):
				name.original_citation = name.verbatim_citation
				name.verbatim_citation = None
				name.save()
			elif must_be_citation(name.verbatim_citation):
				print("Name:", name.description())
				print("Warning: invalid citation:", name.verbatim_citation)
예제 #10
0
def root_name_mismatch():
    for name in Name.filter(Name.group == db.constants.GROUP_FAMILY, ~(Name.type >> None)):
        if name.is_unavailable():
            continue
        stem_name = name.type.stem
        if stem_name is None:
            continue
        if name.root_name == stem_name:
            continue
        for stripped in db.helpers.name_with_suffixes_removed(name.root_name):
            if stripped == stem_name or stripped + "i" == stem_name:
                print("Autocorrecting root name: %s -> %s" % (name.root_name, stem_name))
                name.root_name = stem_name
                name.save()
                break
        if name.root_name != stem_name:
            print("Stem mismatch for %s: %s vs. %s" % (name, name.root_name, stem_name))
            yield name
예제 #11
0
def detect_types_from_root_names(max_count=None):
    """Detects types for family-group names on the basis of the root_name."""

    def detect_from_root_name(name, root_name):
        candidates = Name.filter(
            Name.group == db.constants.GROUP_GENUS, (Name.stem == root_name) | (Name.stem == root_name + "i")
        )
        candidates = list(filter(lambda c: c.taxon.is_child_of(name.taxon), candidates))
        if len(candidates) == 1:
            print("Detected type for name %s: %s" % (name, candidates[0]))
            name.type = candidates[0]
            name.save()
            return True
        else:
            return False

    count = 0
    successful_count = 0
    for name in (
        Name.filter(Name.group == db.constants.GROUP_FAMILY, Name.type >> None)
        .order_by(Name.id.desc())
        .limit(max_count)
    ):
        if name.is_unavailable():
            continue
        count += 1
        if detect_from_root_name(name, name.root_name):
            successful_count += 1
        else:
            for stripped in db.helpers.name_with_suffixes_removed(name.root_name):
                if detect_from_root_name(name, stripped):
                    successful_count += 1
                    break
            else:
                print("Could not detect type for name %s (root_name = %s)" % (name, name.root_name))
    print("Success: %d/%d" % (successful_count, count))
예제 #12
0
def dup_names():
    original_year = collections.defaultdict(list)
    for name in Name.select():
        if name.original_name is not None and name.year is not None:
            original_year[(name.original_name, name.year)].append(name)
    return [original_year]
예제 #13
0
def dup_genus():
    names = collections.defaultdict(list)
    for name in Name.filter(Name.group == db.constants.GROUP_GENUS):
        full_name = "%s %s, %s" % (name.root_name, name.authority, name.year)
        names[full_name].append(name)
    return [names]
예제 #14
0
def endswith(end):
    return list(Name.filter(Name.group == db.constants.GROUP_GENUS, Name.root_name % ("%%%s" % end)))
예제 #15
0
def n(name):
    """Finds names with the given root name or original name."""
    return list(Name.filter((Name.root_name == name) | (Name.original_name == name)))
예제 #16
0
def bad_taxa():
    return Name.raw("SELECT * FROM name WHERE taxon_id IS NULL or taxon_id NOT IN (SELECT id FROM taxon)")
예제 #17
0
def read_file(filename):
	with codecs.open(filename, mode='r') as file:
		reader = csv.reader(file)
		first_line = reader.next()

		# name of parent of root taxon should be in cell A1
		root_name = first_line[0]
		if root_name:
			root_parent = Taxon.filter(Taxon.valid_name == root_name)[0]

			# maintain stack of taxa that are parents of the current taxon
			stack = [root_parent]
		else:
			stack = []

		# current valid taxon (for synonyms)
		current_valid = None
		# whether current taxon should be marked as root of a page
		is_page_root = True
		error_occurred = False
		for row in reader:
			try:
				# ignore blank rows
				if row[3] == '' and row[0] == '':
					continue
				data = parse_row(row)

				if data['status'] == STATUS_VALID:
					# get stuff off the stack
					rank = data['rank']
					# TODO: make this somehow unranked-clade-aware
					while len(stack) > 0 and rank >= stack[-1].rank:
						stack.pop()
					# create new Taxon
					current_valid = Taxon.create(valid_name=data['valid_name'], age=data['age'],
						rank=data['rank'], is_page_root=is_page_root,
						comments=data['comments_taxon'], data=data['data_taxon'])
					if len(stack) > 0:
						current_valid.parent = stack[-1]
					if is_page_root:
						is_page_root = False
					stack.append(current_valid)
				# create new Name
				data['taxon'] = current_valid
				assert current_valid.valid_name == data['valid_name'], \
					"Valid name %s does not match expected %s" % (data['valid_name'], current_valid.valid_name)

				data['data'] = helpers.fix_data(data['data'])

				# Detect whether a name object is already present (Principle of Coordination)
				nm = None
				if data['root_name'][0:4] == 'see ':
					seen = data['root_name'][4:]
					nm = Taxon.get(Taxon.valid_name == seen).base_name

				# create a new Name if none was found
				if nm is None:
					nm = Name.create(**data)

				# set base_name field
				if data['status'] == STATUS_VALID:
					current_valid.base_name = nm

			except Exception:
				traceback.print_exc()
				print('Error parsing row: %s' % row)
				error_occurred = True
				# ignore error and happily go on with the next
	return not error_occurred