def bracketpaths(paths): "add brackets to disambiguate paths (and remove Eq wrapper)" spans = dct.count(concat(paths)) hapax = set(node for (node,n) in spans.items() if n==1) firsts = dict((node,findif(elem(node),paths)[-1]) for node in spans) lasts = dict((node,findif(elem(node),reversed(paths))[-1]) for node in spans) @typecheck([Eq], [str]) def bracket(path): first = edge(path, firsts, hapax) last = edge(path, lasts, hapax) if first != -1: return map(Eq.get,path[:first+1])+["["]+map(Eq.get,path[first+1:]) elif last != -1: return map(Eq.get,path[:last])+["]"]+map(Eq.get,path[last:]) else: return map(Eq.get, path) return map(bracket, paths)
def groupedSites(path, sites): "path*[site] -> {site:[filename]}" corpora = dct.collapse(filter(visible, os.listdir(path)), keymap=lambda f: findif(f.startswith, sites)) if None in corpora: # print ("Missing:", corpora[None]) del corpora[None] return corpora