Example #1
0
def group_words(csv):
    "[[str]]-> {str:{str:{str:[float]}}} ie {Word:{Segment:{Feature:[Value]}}}"
    segment_name = lambda s: s[:re.search('[0-9]', s).end()]
    segment = fnc.pipe(car, dropwhile(str.islower), segment_name)
    feature = lambda s: s[re.search('[0-9]', s).end():]
    fillsegments = curried(dct.map_items)(makesegment)
    features = carcdr(lambda title, data:(feature(title), map(float, data)))
    phones = lambda l: dct.map(dict, dct.collapse(l, segment, features))

    words = dct.collapse(cdr(csv),
                         fnc.pipe(car, takewhile(str.islower)),
                         fnc.ident)
    return dct.map(fnc.pipe(phones, fillsegments), words)
Example #2
0
def corpus(speakers):
    "Warning! This contains a hard-coded path specific to jones"
    #@typecheck((str,str), [(str, [object])])
    def per_speaker((fname,speaker)):
         return sentences(open('/Volumes/Data/Corpora/en/ice-gb/ice-gb-2/data/'+
                               fname.lower()+'.cor'))[speaker]
    return dct.map(lambda files: mapn(per_speaker, files), speakers)
Example #3
0
def group_regions(regions, words):
    """{str:[int]}*{str:{str:{str:[float]}}} ->
         {str:{str:{str:{str:[float]}}}}
    that is, {Region:{Word:{Segment:(Type,{Feature:[Value]})}}}"""
    sub2 = lambda n: n-2
    dctmapper = curried(dct.map)
    def outermost(range):
        inner = dctmapper(dctmapper(lst_extract(map(sub2, range))))
        return dct.map(inner, words)
    return dct.map(outermost, regions)
Example #4
0
def makesegment(type,d):
    # C's numbers:
    # GL=PV: {0,.5,1}, H/HW/W: {0,1}, V=C=PL=IR=VO={0,1}, L={0,1,2}
    # I think H/HW/W should be collapsed at read time. L(6), PV(5) and C(4) not
    # also not IR,VO,PL(2) but I wish we had more of them.
    size = len(d.itervalues().next())
    features = dict(C=dict(GL=0.0, V=0.0, H=0.0, PV=0.0, L=0.0),#H=HW=W total(6)
                    V=dict(B=1.0, H=1.0, L=1.0, R=1.0), #Got rid of '' and "RH"
                    R=dict(MN=1.5, PL=1.0),
                    # mult's range is 0.0 - 2.0 but its meaning varies?
                    MULT=dict(MULT=1.0),
                    VC=dict()) # VC is erroneous data eh.
    #TODO:Collapse H/HW/W
    #TODO:Decide if V's L and C's L are different and if so make them different
    keys = dct.map(lambda default:[default]*size, features[chop(type)])
    keys.update(d)
    return keys
Example #5
0
 def outermost(range):
     inner = dctmapper(dctmapper(lst_extract(map(sub2, range))))
     return dct.map(inner, words)
Example #6
0
def classify(row):
    "[[lev.Rule]] -> {utf-8-char:set<lev.Rule>}"
    return dct.map(
        set, dct.collapse(filter(negate(self_sub), concat(row)), keymap=lambda rule: rule.src)  # collapse_envs,
    )
Example #7
0
def find_collapsed(f, collapsed):
    "{char:[int]} -> [(char,int)] (sorted)"
    return sorted(dct.map(f, collapsed).items(), key=snd, reverse=True)
Example #8
0
def readcorpus(extractor, speakers, delimiter='\t'):
    return dct.map(cur(map, extractor), iceread.read(speakers, 12, delimiter))
Example #9
0
def tinify(regions):
    items = sorted(dct.count(mapn(concat, regions.values())).items(), key=snd)
    code = encode(map(fst, items))
    return dct.map(cur(map, cur(map, code.__getitem__)), regions)
Example #10
0
def corpusSize(path, regions):
    "path*{region:[filename]}"
    numbers = dct.map(lambda files:lap(pipe(read(path), len), files), regions)
    return sorted(dct.map(sum, numbers).items(), key=snd)
Example #11
0
def groupedRegions(path, regions):
    "path*{region:[site]} -> {region:[filename]}"
    return dct.map(pipe(cur(groupedSites)(path), dict.values, concat),
                   regions)