def __getitem__(self, name): key = plant_key(name) try: return dict.__getitem__(self, key) except LookupError: plant = Plant(name, key) self[key] = plant # Species key: first two names, ignoring all descriptors key = tuple(name[0] for name in key[:2]) # If the last part of the key is a descriptor without a name, # truncate it from the key. Eg: (name) (spp agg) => (name) if not key[-1]: key = key[:-1] self.species.setdefault(key, list()).append(plant) return plant
def add_plant(self, plant): fields = ( "origin", "name", "auth", "common", "famnum", "family", "fam_com", "divnum", "group", "note", "specnum", ) key = plant_key(plant["name"]) try: item = self.items[key] except LookupError: item = self.list.add() self.items[key] = item current = self.list.item(item, option="values") current = tuple_record(current, fields, ("origin",)) if getattr(current, "origin", "?") == "?": current.origin = None for field in fields: if getattr(current, field, None) is None: value = plant.get(field) if value is None: if field == "origin": value = "?" else: value = "" setattr(current, field, value) self.list.item(item, values=tuple(getattr(current, field) for field in fields)) self.records += 1 if not self.records % 200: self.print_count()
def main(freqs, selection=None, *, synonyms=None): deadline = monotonic() + 1 midline = False tree = dict() # {subname: ..., ...} if selection: prev = None with open(selection, "rt") as reader: for plant in reader: plant = plant.rstrip(" \r\n") key = list() for word in plant.split(" "): abbr = word if abbr.endswith("."): abbr = abbr[:-1] if abbr in db.abbr: continue if not key: if word.istitle(): word = word.lower() else: msg = "Genus {!r} is not in title case" print(msg.format(word), file=stderr) if word.endswith("."): if prev is None: msg = "No previous entry to expand {!r} from" print(msg.format(plant), file=stderr) elif len(prev) > len(key) \ and prev[:len(key)] == key \ and prev[len(key)].startswith(word[:-1]): word = prev[len(key)] else: print("Abbreviated {!r} does not match " \ "previous entry".format(plant), file=stderr) key.append(word) prev = key [children, remainder] = lookup_tree(tree, key) if remainder: if children or children is tree: add_tree(children, remainder) else: msg = "Supertaxon of {} already listed".format(plant) print(msg, file=stderr) else: if children: while children: [subname, _] = children.popitem() msg = "{} subtaxon {} already listed" print(msg.format(plant, subname), file=stderr) else: msg = "{} equivalent already listed".format(plant) print(msg, file=stderr) parse_synonyms(synonyms, tree) selected = set() evcs = list() # [(evc, desc, {name: freq for each plant}) for each EVC] max_freqs = list() # [max(freq) for each EVC] with closing(FreqExcelReader(freqs)) as freqs: total = format(len(freqs)) last_evc = None for [i, plant] in enumerate(freqs): if stderr: now = monotonic() if now >= deadline: if midline: stderr.write("\r") msg = "Record {:{}}/{}".format(i + 1, len(total), total) stderr.write(msg) stderr.flush() midline = True deadline = now + 0.1 if plant["EVC"] != last_evc: last_evc = plant["EVC"] last_desc = plant["EVC_DESC"] plant_freqs = dict() evcs.append((last_evc, last_desc, plant_freqs)) max_freqs.append(plant["Frequency"]) else: max_freqs[-1] = max(max_freqs[-1], plant["Frequency"]) if plant["EVC_DESC"] != last_desc: msg = "EVC {} EVC_DESC inconsistent between {!r} and " \ "{!r}".format(last_evc, last_desc, plant["EVC_DESC"]) print(msg, file=stderr) last_desc = plant["EVC_DESC"] name = plant["NAME"] if selection: key = list(n[0] for n in db.plant_key(name)) if not key[-1]: key.pop() [children, remainder] = lookup_tree(tree, key) if remainder and children: continue selected.add(name) if name in plant_freqs: msg = "Duplicate record for {NAME} in {EVC}" print(msg.format_map(plant), file=stderr) plant_freqs[name] = plant_freqs.get(name, 0) + plant["Frequency"] if stderr and midline: stderr.write("\x1B[1K\r") stderr.flush() out = TextIOWrapper(stdout.buffer, stdout.encoding, stdout.errors, newline="", line_buffering=stdout.line_buffering) try: writer = csv.writer(out) writer.writerow(("EVC", "EVC_DESC", "max(Frequency)")) for [[evc, desc, _], max_freq] in zip(evcs, max_freqs): writer.writerow((evc, desc, max_freq)) writer.writerow(("NAME",) + tuple(evc for [evc, _, _] in evcs)) for plant in sorted(selected, key=db.plant_key): row = [plant] for [[_, _, freqs], max_freq] in zip(evcs, max_freqs): freq = freqs.get(plant) if freq is None: row.append(None) continue found = True row.append(format(freq / max_freq, ".2f")) writer.writerow(row) if selection: # Prune any non-branching paths leading to this entry key = list(n[0] for n in db.plant_key(plant)) if not key[-1]: key.pop() node = tree for subkey in key: if len(node) > 1: branch_node = node branch_name = subkey try: node = node[subkey] except LookupError: break if not node: del branch_node[branch_name] finally: out.detach() if selection: for path in walk_tree(tree): msg = "No records matching {}" print(msg.format(" ".join(path).capitalize()), file=stderr)