Beispiel #1
0
def bracketpaths(paths):
    "add brackets to disambiguate paths (and remove Eq wrapper)"
    spans = dct.count(concat(paths))
    hapax = set(node for (node,n) in spans.items() if n==1)
    firsts = dict((node,findif(elem(node),paths)[-1]) for node in spans)
    lasts = dict((node,findif(elem(node),reversed(paths))[-1]) for node in spans)
    @typecheck([Eq], [str])
    def bracket(path):
        first = edge(path, firsts, hapax)
        last = edge(path, lasts, hapax)
        if first != -1:
            return map(Eq.get,path[:first+1])+["["]+map(Eq.get,path[first+1:])
        elif last != -1:
            return map(Eq.get,path[:last])+["]"]+map(Eq.get,path[last:])
        else:
            return map(Eq.get, path)
    return map(bracket, paths)
Beispiel #2
0
def groupedSites(path, sites):
    "path*[site] -> {site:[filename]}"
    corpora = dct.collapse(filter(visible, os.listdir(path)),
                           keymap=lambda f: findif(f.startswith, sites))
    if None in corpora:
        # print ("Missing:", corpora[None])
        del corpora[None]
    return corpora