Example #1
0
def sort(path, threshold, tmpdir=None):
    tmpfiles = []

    # Split and Sort
    if os.stat(path).st_size > threshold:
        tslines = _read_raw_file(path)
        for chunk in _slice_tsfile(tslines, threshold):
            chunk.sort()

            # Write temp file
            fd, fdpath = mkstemp(prefix=SORT_FILE_PREFIX, dir=tmpdir)
            fd = os.fdopen(fd, 'w+t')
            for line in chunk:
                fd.write('%s %s\n' % line)
            tmpfiles.append((fd, fdpath))

        # Merge Temp Files
        for ts, data in merge(*tuple(_read_raw_fd(fd) for fd, _ in tmpfiles)):
            yield msec_to_timestamp(int(ts)), data

        for fd, fd_path in tmpfiles:
            fd.close()
            os.unlink(fd_path)
    else:
        tslines = list(_read_raw_file(path))
        tslines.sort()
        for ts, data in tslines:
            yield msec_to_timestamp(int(ts)), data
Example #2
0
def _read_file(path):
    for file_cls in (GzipFile, BZ2File, open):
        fd = file_cls(path)
        try:
            for line in fd:
                ts, data = line.strip().split(' ', 1)
                yield msec_to_timestamp(int(ts)), data
            break
        except IOError:
            pass
        finally:
            fd.close()