Ejemplo n.º 1
0
 def __getitem__(self, name):
     key = plant_key(name)
     try:
         return dict.__getitem__(self, key)
     except LookupError:
         plant = Plant(name, key)
         self[key] = plant
         
         # Species key: first two names, ignoring all descriptors
         key = tuple(name[0] for name in key[:2])
         
         # If the last part of the key is a descriptor without a name,
         # truncate it from the key. Eg: (name) (spp agg) => (name)
         if not key[-1]:
             key = key[:-1]
         
         self.species.setdefault(key, list()).append(plant)
         
         return plant
Ejemplo n.º 2
0
    def add_plant(self, plant):
        fields = (
            "origin",
            "name",
            "auth",
            "common",
            "famnum",
            "family",
            "fam_com",
            "divnum",
            "group",
            "note",
            "specnum",
        )
        key = plant_key(plant["name"])
        try:
            item = self.items[key]
        except LookupError:
            item = self.list.add()
            self.items[key] = item

        current = self.list.item(item, option="values")
        current = tuple_record(current, fields, ("origin",))
        if getattr(current, "origin", "?") == "?":
            current.origin = None
        for field in fields:
            if getattr(current, field, None) is None:
                value = plant.get(field)
                if value is None:
                    if field == "origin":
                        value = "?"
                    else:
                        value = ""
                setattr(current, field, value)
        self.list.item(item, values=tuple(getattr(current, field) for field in fields))

        self.records += 1
        if not self.records % 200:
            self.print_count()
Ejemplo n.º 3
0
def main(freqs, selection=None, *, synonyms=None):
    deadline = monotonic() + 1
    midline = False
    
    tree = dict()  # {subname: ..., ...}
    if selection:
        prev = None
        with open(selection, "rt") as reader:
            for plant in reader:
                plant = plant.rstrip(" \r\n")
                key = list()
                for word in plant.split(" "):
                    abbr = word
                    if abbr.endswith("."):
                        abbr = abbr[:-1]
                    if abbr in db.abbr:
                        continue
                    
                    if not key:
                        if word.istitle():
                            word = word.lower()
                        else:
                            msg = "Genus {!r} is not in title case"
                            print(msg.format(word), file=stderr)
                    if word.endswith("."):
                        if prev is None:
                            msg = "No previous entry to expand {!r} from"
                            print(msg.format(plant), file=stderr)
                        elif len(prev) > len(key) \
                                and prev[:len(key)] == key \
                                and prev[len(key)].startswith(word[:-1]):
                            word = prev[len(key)]
                        else:
                            print("Abbreviated {!r} does not match " \
                                "previous entry".format(plant), file=stderr)
                    key.append(word)
                prev = key
                
                [children, remainder] = lookup_tree(tree, key)
                if remainder:
                    if children or children is tree:
                        add_tree(children, remainder)
                    else:
                        msg = "Supertaxon of {} already listed".format(plant)
                        print(msg, file=stderr)
                else:
                    if children:
                        while children:
                            [subname, _] = children.popitem()
                            msg = "{} subtaxon {} already listed"
                            print(msg.format(plant, subname), file=stderr)
                    else:
                        msg = "{} equivalent already listed".format(plant)
                        print(msg, file=stderr)
    
    parse_synonyms(synonyms, tree)
    selected = set()
    evcs = list()  # [(evc, desc, {name: freq for each plant}) for each EVC]
    max_freqs = list()  # [max(freq) for each EVC]
    with closing(FreqExcelReader(freqs)) as freqs:
        total = format(len(freqs))
        last_evc = None
        for [i, plant] in enumerate(freqs):
            if stderr:
                now = monotonic()
                if now >= deadline:
                    if midline:
                        stderr.write("\r")
                    msg = "Record {:{}}/{}".format(i + 1, len(total), total)
                    stderr.write(msg)
                    stderr.flush()
                    midline = True
                    deadline = now + 0.1
            
            if plant["EVC"] != last_evc:
                last_evc = plant["EVC"]
                last_desc = plant["EVC_DESC"]
                plant_freqs = dict()
                evcs.append((last_evc, last_desc, plant_freqs))
                max_freqs.append(plant["Frequency"])
            else:
                max_freqs[-1] = max(max_freqs[-1], plant["Frequency"])
                if plant["EVC_DESC"] != last_desc:
                    msg = "EVC {} EVC_DESC inconsistent between {!r} and " \
                        "{!r}".format(last_evc, last_desc, plant["EVC_DESC"])
                    print(msg, file=stderr)
                    last_desc = plant["EVC_DESC"]
            name = plant["NAME"]
            if selection:
                key = list(n[0] for n in db.plant_key(name))
                if not key[-1]:
                    key.pop()
                [children, remainder] = lookup_tree(tree, key)
                if remainder and children:
                    continue
            selected.add(name)
            if name in plant_freqs:
                msg = "Duplicate record for {NAME} in {EVC}"
                print(msg.format_map(plant), file=stderr)
            plant_freqs[name] = plant_freqs.get(name, 0) + plant["Frequency"]
    
    if stderr and midline:
        stderr.write("\x1B[1K\r")
        stderr.flush()
    
    out = TextIOWrapper(stdout.buffer, stdout.encoding, stdout.errors,
        newline="", line_buffering=stdout.line_buffering)
    try:
        writer = csv.writer(out)
        writer.writerow(("EVC", "EVC_DESC", "max(Frequency)"))
        for [[evc, desc, _], max_freq] in zip(evcs, max_freqs):
            writer.writerow((evc, desc, max_freq))
        
        writer.writerow(("NAME",) + tuple(evc for [evc, _, _] in evcs))
        for plant in sorted(selected, key=db.plant_key):
            row = [plant]
            for [[_, _, freqs], max_freq] in zip(evcs, max_freqs):
                freq = freqs.get(plant)
                if freq is None:
                    row.append(None)
                    continue
                found = True
                row.append(format(freq / max_freq, ".2f"))
            writer.writerow(row)
            
            if selection:
                # Prune any non-branching paths leading to this entry
                key = list(n[0] for n in db.plant_key(plant))
                if not key[-1]:
                    key.pop()
                node = tree
                for subkey in key:
                    if len(node) > 1:
                        branch_node = node
                        branch_name = subkey
                    try:
                        node = node[subkey]
                    except LookupError:
                        break
                if not node:
                    del branch_node[branch_name]
    finally:
        out.detach()
    
    if selection:
        for path in walk_tree(tree):
            msg = "No records matching {}"
            print(msg.format(" ".join(path).capitalize()), file=stderr)