def group_words(csv): "[[str]]-> {str:{str:{str:[float]}}} ie {Word:{Segment:{Feature:[Value]}}}" segment_name = lambda s: s[:re.search('[0-9]', s).end()] segment = fnc.pipe(car, dropwhile(str.islower), segment_name) feature = lambda s: s[re.search('[0-9]', s).end():] fillsegments = curried(dct.map_items)(makesegment) features = carcdr(lambda title, data:(feature(title), map(float, data))) phones = lambda l: dct.map(dict, dct.collapse(l, segment, features)) words = dct.collapse(cdr(csv), fnc.pipe(car, takewhile(str.islower)), fnc.ident) return dct.map(fnc.pipe(phones, fillsegments), words)
def groupedSites(path, sites): "path*[site] -> {site:[filename]}" corpora = dct.collapse(filter(visible, os.listdir(path)), keymap=lambda f: findif(f.startswith, sites)) if None in corpora: # print ("Missing:", corpora[None]) del corpora[None] return corpora
def sentences(lines): #@typecheck([str], [(str, [object])], n=int) def parseloop(lines, n=0): return [(clean(lines[0]), parseloop(lines[1:], n=n+1) if lines[1:] else []) for lines in splitby(lambda line:n==indent(line), lines, True)] return dct.collapse(filter(None, splitby(elem('<sent>'), lines, first=True)), pipe(car, speaker_code), pipe(cdr, cur(filter, useful), parseloop, car))
def classify(row): "[[lev.Rule]] -> {utf-8-char:set<lev.Rule>}" return dct.map( set, dct.collapse(filter(negate(self_sub), concat(row)), keymap=lambda rule: rule.src) # collapse_envs, )