def pruneRelativeEntropy(filename, outfile):
    pt = PhraseTable(fileLines(filename))
    mapFn = lambda line: computeRelEnt(pt, line)
    with timer('pruning') as tim:
        with openMaybeGz(outfile, 'w') as o:
            count = 0
            chunksize = 100
            for line in threaded_map(mapFn, fileChunks(filename, chunksize), threadCount = 6, maxInputQ = 1024):
                o.write(line)
                count += chunksize
                if 0 == count % 500:
                    (elapsed, remaining, totalTime) = tim.predict(count, pt.count)
                    print "{0:.3f} elapsed; {1:.3f} remaining; {2:.3f} total; count = {3}  \r".format(elapsed, remaining, totalTime, count), ; stdout.flush()
def fileChunks(filename, chunkSize):
    with openMaybeGz(filename, 'r') as fh:
        chunk = ''
        ccount = 0
        for line in fh:
            chunk += line
            ccount += 1
            if ccount == chunkSize:
                yield chunk
                ccount = 0
                chunk = ''
        if ccount > 0:
            yield chunk