Ejemplo n.º 1
0
def sentences(lines):
    #@typecheck([str], [(str, [object])], n=int)
    def parseloop(lines, n=0):
        return [(clean(lines[0]),
                 parseloop(lines[1:], n=n+1) if lines[1:] else [])
                for lines in splitby(lambda line:n==indent(line), lines, True)]
    return dct.collapse(filter(None, splitby(elem('<sent>'), lines, first=True)),
                        pipe(car, speaker_code),
                        pipe(cdr, cur(filter, useful), parseloop, car))
Ejemplo n.º 2
0
def run_compare_sgb_and_siy_to_base(fs):
    """[str] -> {utf-8-char:set<lev.Rule>}
    ((sgb <=> base) & (sgb <=> base)) - (map (<=> base) rest)"""
    sgb = fs[2]
    siy = fs[8]
    base = fs[0]
    del fs[8]
    del fs[2]
    del fs[0]  # dangerous but who cares
    outsiders = dct.zipwith(and_, classify(compare(base, sgb)), classify(compare(base, siy)), default=set())
    others = map(compose(classify, cur(compare, base)), fs)
    kws = {"default": set()}
    return dct.zipwith((lambda v, *rest: reduce(sub, rest, v)), outsiders, *others, **kws)
Ejemplo n.º 3
0
def compare(a,b):
    r_total = debug(r(normalise(concat(a),concat(b)).itervalues()))
    # Original code: (1 line to 10)
    # return countif(cur(le,r_total), map(r,unzip(normaliseall(a,b).values())))
    count = 0
    totals = [0] * 1000
    normeds = normaliseall(a,b)
    for ab in normeds.itervalues():
        for i in xrange(1000):
            a,b = ab[i]
            avg_ab = (a + b) / 2
            totals[i] += abs(a - avg_ab) + abs(b - avg_ab)
    return countif(cur(le,r_total), totals)
Ejemplo n.º 4
0
def run_compare_all_to_sgbsiy(fs):
    """[str] -> {utf-8-char:set<lev.Rule>}
    (siy<=>sgb) - (map (base<=>) rest)"""
    sgb = fs[2]
    siy = fs[8]
    base = fs[0]
    del fs[8]
    del fs[2]
    del fs[0]  # dangerous but who cares
    diff = classify(compare(sgb, siy))
    others = map(compose(classify, cur(compare, base)), fs)
    # return dct_mapall(lambda v,*rest: reduce(sub, rest, v), diff, *others)
    kws = {"default": set()}
    return dct.zipwith((lambda v, *rest: reduce(sub, rest, v)), diff, *others, **kws)
Ejemplo n.º 5
0
def run_compare_to_base(fs):
    "[str] -> [{utf-8-char:set<lev.Rule>}]"
    return map(pipe(cur(compare, fs[0]), classify), fs)
Ejemplo n.º 6
0

def find_collapsed(f, collapsed):
    "{char:[int]} -> [(char,int)] (sorted)"
    return sorted(dct.map(f, collapsed).items(), key=snd, reverse=True)


diff = lambda freqs: avg([freqs[2], freqs[8]]) - avg(lst_except(freqs, 0, 2, 8))


def variance(freqs):
    average = avg(cdr(freqs))
    return sum((average - c) ** 2 for c in cdr(freqs)) / average


find_difference = cur(find_collapsed, diff)
find_variance = cur(find_collapsed, variance)


def to_html_group_differences(f, name, differences):
    print >> f, "<h1>%s</h1>" % name
    print >> f, "<table border=1 cellspacing=0 bordercolor='black'><tr><td></td><th>Char</th><th>Variance</th>",
    for i, (sub, variance) in enumerate(differences):
        if isinstance(sub, tuple):
            s = "<tr><td>%s</td><td>%s &rarr; %s</td><td>%s</td></tr>"
            row = i, sub[1], sub[0], variance
        else:
            s = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>"
            row = i, sub, variance
        print >> f, s % row
    print >> f, "</table>"
Ejemplo n.º 7
0
def readcorpus(extractor, speakers, delimiter='\t'):
    return dct.map(cur(map, extractor), iceread.read(speakers, 12, delimiter))
Ejemplo n.º 8
0
def tinify(regions):
    items = sorted(dct.count(mapn(concat, regions.values())).items(), key=snd)
    code = encode(map(fst, items))
    return dct.map(cur(map, cur(map, code.__getitem__)), regions)
Ejemplo n.º 9
0
def groupedRegions(path, regions):
    "path*{region:[site]} -> {region:[filename]}"
    return dct.map(pipe(cur(groupedSites)(path), dict.values, concat),
                   regions)