Example #1
0
def read_file(filename):
	with codecs.open(filename, mode='r') as file:
		reader = csv.reader(file)
		first_line = reader.next()

		# maintain stack of taxa that are parents of the current taxon
		stack = []
		# name of parent of root taxon should be in cell A1
		root_name = first_line[0]
		if root_name != '':
			root_parent = Taxon.filter(Taxon.valid_name == root_name)[0]
			stack.append(root_parent)

		# current valid taxon (for synonyms)
		current_valid = None
		# whether current taxon should be marked as root of a page
		is_page_root = True
		error_occurred = False
		for row in reader:
			try:
				# ignore blank rows
				if row[14] == '' and row[8] == '':
					continue
				data = parse_row(row)
				# deal with "nHT", which is a pain. Some of these may need to be manually
				# readded to the DB
				if data['kind'] == 'nHT':
					if data['valid_name'] in ignored_nHT:
						continue
					else:
						raise Exception("Unrecognized nHT: " + str(data))
				# nsgen is i.s., just ignore
				if data['kind'] == 'nsgen':
					continue

				if data['status'] == STATUS_VALID:
					# get stuff off the stack
					rank = data['rank']
					if rank == ROOT:
						current_valid = Taxon.create(
							valid_name=data['valid_name'], age=data['age'],
							rank=data['rank'], is_page_root=True
						)
					else:
						# TODO: make this somehow unranked-clade-aware
						while rank >= stack[-1].rank:
							stack.pop()
						# create new Taxon
						current_valid = Taxon.create(
							valid_name=data['valid_name'], age=data['age'],
							rank=data['rank'], parent=stack[-1], is_page_root=is_page_root
						)
					if is_page_root:
						is_page_root = False
					stack.append(current_valid)
				# create new Name
				data['taxon'] = current_valid
				if data['status'] == STATUS_DUBIOUS:
					# current system is inadequate for properly marking si species
					# assume there's only genera and species
					if ' ' in data['valid_name']:
						data['group'] = GROUP_SPECIES
					else:
						data['group'] = GROUP_GENUS
					# si species don't have a meaningful "valid name", but preserve what's there now
					data['data']['si_valid_name'] = data['valid_name']
				else:
					# this will be wrong in the few cases where a high-ranked name is listed
					# as a synonym of a family-group name. Don't see a way to correct that
					# programmatically.
					data['group'] = helpers.group_of_rank(current_valid.rank)
					if data['status'] == STATUS_SYNONYM:
						if data['kind'] == 'synHT':
							valid_name = data['valid_name'].split(' ', 1)[1]
						else:
							valid_name = data['valid_name']
						if valid_name != current_valid.valid_name:
							raise Exception("Valid name of synonym does not match: " + data['valid_name'] + " and " + current_valid.valid_name)
				# shorten root name for family-group names
				if data['group'] == GROUP_FAMILY:
					data['root_name'] = helpers.strip_rank(data['root_name'], current_valid.rank)
				del data['kind']
				data['data'] = json.dumps(remove_null(data['data']))

				# Detect whether a name object is already present (Principle of Coordination)
				nm = None
				if data['status'] == STATUS_VALID:
					root_name = data['root_name']
					if current_valid.rank == FAMILY:
						nm = detect_super(root_name + 'oidea', SUPERFAMILY)
					elif current_valid.rank == SUBFAMILY:
						nm = detect_super(root_name + 'idae', FAMILY)
					elif current_valid.rank == TRIBE:
						nm = detect_super(root_name + 'inae', SUBFAMILY)
					elif current_valid.rank == SUBTRIBE:
						nm = detect_super(root_name + 'ini', TRIBE)
					elif current_valid.rank == SUBGENUS:
						nm = detect_super(root_name, GENUS)
					elif current_valid.rank == SPECIES:
						spg_name = helpers.spg_of_species(current_valid.valid_name)
						nm = detect_super(spg_name, SPECIES_GROUP)
					elif current_valid.rank == SUBSPECIES and helpers.is_nominate_subspecies(current_valid.valid_name):
						sp_name = helpers.species_of_subspecies(current_valid.valid_name)
						nm = detect_super(sp_name, SPECIES)
					if nm is not None:
						del data['taxon']
						nm.add_additional_data(data)
						# nm's Taxon should be the lowest-ranking one
						nm.taxon = current_valid

				# create a new Name if none was found
				if nm is None:
					nm = Name.create(**data)

				# set base_name field
				if data['status'] == STATUS_VALID:
					current_valid.base_name = nm
				if 'additional_synonyms' in data:
					group = helpers.group_of_rank(current_valid.rank)
					for synonym in data['additional_synonyms']:
						Name.create(taxon=current_valid, root_name=synonym, group=group, status=STATUS_SYNONYM)

			except Exception:
				traceback.print_exc()
				error_occurred = True
				# ignore error and happily go on with the next
	return not error_occurred
Example #2
0
def create_root():
	Taxon.create(rank=ROOT, valid_name='root', is_page_root=True)