Ejemplo n.º 1
0
def disk_sort(input, filename, sort_keys, binaries=(), sort_buffer_size='10%',
              desc=False):
    import ujson
    from disco.comm import open_local
    from disco.fileutils import AtomicFile
    import base64
    # import sys
    # sys.path.append('/Library/Python/2.7/site-packages/pycharm-debug.egg')
    # import pydevd
    # pydevd.settrace('localhost', port=12999, stdoutToServer=True, stderrToServer=True)
    out_fd = AtomicFile(filename)
    key_types = None
    MPT = ()
    # print "SORTKEY: %s" % repr(sort_keys)
    for key, _ in input:
        if isinstance(key, (str, unicode)):
            raise ValueError("Keys must be sequences", key)

        # determine if the key is numeric
        if key_types is None:
            key_types = []
            for kt in key:
                try:
                    float(kt)
                    key_types.append('n')
                except:
                    key_types.append('')

        #serialize the key - encoded either as NULL, json, or b64 - note that
        for i, each_key in enumerate(key):
            if each_key is None:
                ukey = b'\x00'
            elif i in binaries and key_types[i] != 'n':
                ukey = base64.b64encode(each_key)
            else:
                ukey = ujson.dumps(each_key)
            out_fd.write(ukey)
            out_fd.write(b'\xff')
        out_fd.write('\n')
    out_fd.flush()
    out_fd.close()
    unix_sort(filename,
              [(sk, key_types[sk]) for sk in sort_keys],
              sort_buffer_size=sort_buffer_size,
              desc=desc)
    fd = open_local(filename)
    for k in sort_reader(fd, fd.url):
        # yield [ujson.loads(key) if key != b'\x00' else None for key in k], MPT

        rval = []
        for i, key in enumerate(k):
            if key == b'\x00':
                rkey = None
            elif i in binaries:
                rkey = base64.b64decode(key)
            else:
                rkey = ujson.loads(key)
            rval.append(rkey)
        yield rval, MPT