Esempio n. 1
0
def run_collapse_differences(fs, get=getdst):
    base = fs[0]
    del fs[0]
    subs = [
        [get(rule) for rule in concat(compare(base, f)) if rule.type == lev.SUB and rule.dst != rule.src] for f in fs
    ]
    return dct.zip(dct.count(concat(subs)), default=0, *map(dct.count, subs))
Esempio n. 2
0
def sharedtopwords(talbanken, regions):
    talwords = [fst(line.split()) for line in s.splitlines() if line[0] != ' ']
    talvocab = set(talwords)
    talcount = dct.count(talwords)
    swewords = [w for r in regions for w in r.splitlines()]
    swevocab = set(swewords)
    swecount = dct.count(swewords)
    unsharedTokensTal = sum(talcount[w] for w in sharedVocab)
    unsharedTokensSwe = sum(swecount[w] for w in sharedVocab)
    print(len(talvocab))
    print(len(swevocab))
    print(len(swevocab & talvocab))
    print(len(swevocab - talvocab))
    print(len(talvocab - swevocab)
          + len(swevocab - talvocab)
          + len(swevocab & talvocab))
    print(unsharedTokensTal)
    print(unsharedTokensSwe)
    print(len(talwords) + len(swewords) - unsharedTokensTal - unsharedTokensSwe)
Esempio n. 3
0
def bracketpaths(paths):
    "add brackets to disambiguate paths (and remove Eq wrapper)"
    spans = dct.count(concat(paths))
    hapax = set(node for (node,n) in spans.items() if n==1)
    firsts = dict((node,findif(elem(node),paths)[-1]) for node in spans)
    lasts = dict((node,findif(elem(node),reversed(paths))[-1]) for node in spans)
    @typecheck([Eq], [str])
    def bracket(path):
        first = edge(path, firsts, hapax)
        last = edge(path, lasts, hapax)
        if first != -1:
            return map(Eq.get,path[:first+1])+["["]+map(Eq.get,path[first+1:])
        elif last != -1:
            return map(Eq.get,path[:last])+["]"]+map(Eq.get,path[last:])
        else:
            return map(Eq.get, path)
    return map(bracket, paths)
Esempio n. 4
0
def testPaths(self):
    acls = map(path.paths, acltree["A"])
    test(acls,
         [["A-[-B-p", "A-]-B-q", "A-B-[-r", "]-A-B-s"],
          ["A-[-B-p", "A-B-q", "A-B-r", "]-A-B-s"]])
    ps = map(path.paths, palmtree["A"])
    test(ps, [['S-[-Ns-the',
               'S-Ns-closest',
               'S-Ns-thing',
               'S-Ns-P-[-to',
               'S-Ns-P-Ns-[-a',
               'S-]-Ns-P-Ns-home',
               'S-Vsb-was',
               'S-N-[-a',
               'S-N-string',
               'S-N-hammock',
               'S-N-S+-[-and',
               'S-N-S+-+,',
               'S-N-S+-Fa-[-Rq-when',
               'S-N-S+-Fa-Ni-it',
               'S-N-S+-]-Fa-Vd-rained',
               'S-N-S+-+,',
               'S-N-S+-Np-[-some',
               'S-N-S+-Np-palm',
               'S-N-S+-]-Np-fronds',
               'S-N-S+-Vd-draped',
               'S-N-S+-P-[-over',
               ']-S-N-S+-P-sticks']])
    test({'hi-[-child-grandchild0':1,
          ']-hi-child3': 1,
          'hi-child2':1,
          'hi-]-child-grandchild1':1},
         dct.count(path.paths(iceread.sentences('''[<sent> <#1:1:A>]
hi
 child
  grandchild0
  grandchild1
 child2
 child3'''.split('\n'))['A'][0])))
Esempio n. 5
0
def count(filepattern, params):
    return dct.count(
        [(src, dst) for param in params for (src, dst) in significants(clean(filepattern % tuple(param)), 1000)]
    )
Esempio n. 6
0
def countpaths(a,b):
    "[Path]*[Path]->{Path:(float,float)}"
    #TODO:I should change default=0 to some smoothed value
    return dct_zip(dct.count(a), dct.count(b), default=0)
Esempio n. 7
0
def tinify(regions):
    items = sorted(dct.count(mapn(concat, regions.values())).items(), key=snd)
    code = encode(map(fst, items))
    return dct.map(cur(map, cur(map, code.__getitem__)), regions)