Example #1
0
 def __init__(self, s, p):
     self.name = s
     self.id = p.get(s, 'id')
     self.rank = p.getint(s, 'rank')
     self.abbv = p.get(s, 'abbv')
     self.bibabbv = p.get(s, 'bibabbv')
     self.triggers = [Trigger('hhtype', self.id, t)
                      for t in p.get(s, 'triggers').strip().splitlines() or []]
Example #2
0
    def test_Trigger(self):
        from pyglottolog.util import Trigger

        t1 = Trigger('hhtype', 'grammar', 'phonologie AND NOT morphologie')
        t2 = Trigger('hhtype', 'phonology', 'phonologie')
        t3 = Trigger('hhtype', 'grammar', 'grammar')

        assert t1 != t3 and t1 == t1
        allkeys = range(5)
        keys_by_word = dict(grammar=[1, 2], phonologie=[2, 3], morphologie=[3, 4])
        self.assertEqual(t1(allkeys, keys_by_word), {2})
        self.assertEqual(t2(allkeys, keys_by_word), {2, 3})
        self.assertEqual(t3(allkeys, keys_by_word), {1, 2})
        self.assertIn('not morphologie and phonologie', Trigger.format('a', t1))

        for t in sorted([t1, t2, t3]):
            self.assertIn(t.type, Trigger.format(t.type, t))
Example #3
0
def load_triggers(tree=TREE):
    res = {'inlg': [], 'lgcode': []}
    for lang in walk_tree(tree):
        for type_ in res:
            if lang.cfg.has_option('triggers', type_):
                label = '%s [%s]' % (lang.name, lang.hid or lang.id)
                res[type_].extend([Trigger(type_, label, text)
                                   for text in lang.cfg.getlist('triggers', type_)])
    return res
Example #4
0
def markall(e, trigs, verbose=True, rank=None):
    # the set of fields triggers relate to:
    clss = set(t.field for t in trigs)

    # all bibitems lacking any of the potential triggered fields:
    ei = {
        k: (typ, fields)
        for k, (typ, fields) in e.items() if any(c not in fields for c in clss)
    }
    eikeys = set(list(ei.keys()))

    # map words in titles to lists of bibitem keys having the word in the title:
    wk = defaultdict(set)
    for k, (typ, fields) in ei.items():
        for w in wrds(fields.get('title', '')):
            wk[w].add(k)

    u = defaultdict(lambda: defaultdict(list))
    for clauses, triggers in Trigger.group(trigs):
        for k in triggers[0](eikeys, wk):
            for t in triggers:
                u[k][t.cls].append(t)

    for k, t_by_c in u.items():
        t, f = e[k]
        f2 = {a: b for a, b in f.items()}
        for (field, type_), triggers in sorted(t_by_c.items(),
                                               key=lambda i: len(i[1])):
            # Make sure we handle the trigger class with the biggest number of matching
            # triggers last.
            if rank and field in f2:
                # only update the assigned hhtype if something better comes along:
                if rank(f2[field].split(' (comp')[0]) >= rank(type_):
                    continue
            f2[field] = Trigger.format(type_, triggers)
        e[k] = (t, f2)

    if verbose:
        print("trigs", len(trigs))
        print("label classes", len(clss))
        print("unlabeled refs", len(ei))
        print("updates", len(u))
    return e
Example #5
0
def markall(e, trigs, verbose=True, rank=None):
    # the set of fields triggers relate to:
    clss = set(t.field for t in trigs)

    # all bibitems lacking any of the potential triggered fields:
    ei = {k: (typ, fields) for k, (typ, fields) in e.items()
          if any(c not in fields for c in clss)}
    eikeys = set(list(ei.keys()))

    # map words in titles to lists of bibitem keys having the word in the title:
    wk = defaultdict(set)
    for k, (typ, fields) in ei.items():
        for w in wrds(fields.get('title', '')):
            wk[w].add(k)

    u = defaultdict(lambda: defaultdict(list))
    for clauses, triggers in Trigger.group(trigs):
        for k in triggers[0](eikeys, wk):
            for t in triggers:
                u[k][t.cls].append(t)

    for k, t_by_c in u.items():
        t, f = e[k]
        f2 = {a: b for a, b in f.items()}
        for (field, type_), triggers in sorted(t_by_c.items(), key=lambda i: len(i[1])):
            # Make sure we handle the trigger class with the biggest number of matching
            # triggers last.
            if rank and field in f2:
                # only update the assigned hhtype if something better comes along:
                if rank(f2[field].split(' (comp')[0]) >= rank(type_):
                    continue
            f2[field] = Trigger.format(type_, triggers)
        e[k] = (t, f2)

    if verbose:
        print("trigs", len(trigs))
        print("label classes", len(clss))
        print("unlabeled refs", len(ei))
        print("updates", len(u))
    return e