예제 #1
0
def mergesort(filename, output=None, key=None, maxitems=1e6, progress=True):
    """Given an input file sort it by performing a merge sort on disk.

    :param filename: Either a filename as a ``str`` or a ``py._path.local.LocalPath`` instance.
    :type filename:  ``str`` or ``py._path.local.LocalPath``

    :param output: An optional output filename as a ``str`` or a ``py._path.local.LocalPath`` instance.
    :type output:  ``str`` or ``py._path.local.LocalPath`` or ``None``

    :param key: An optional key to sort the data on.
    :type key:  ``function`` or ``None``

    :param maxitems: Maximum number of items to hold in memory at a time.
    :type maxitems:  ``int``

    :param progress: Whether or not to display a progress bar
    :type progress: ``bool``

    This uses ``py._path.local.LocalPath.make_numbered_dir`` to create temporry scratch space to work
    with when splitting the input file into sorted chunks. The mergesort is processed iteratively in-memory
    using the ``~merge`` function which is almost identical to ``~heapq.merge`` but adds in the support of
    an optional key function.
    """

    p = filename if isinstance(filename, LocalPath) else LocalPath(filename)
    output = p if output is None else output
    key = key if key is not None else lambda x: x

    scratch = LocalPath.make_numbered_dir(prefix="mergesort-")

    nlines = sum(1 for line in p.open("r"))

    # Compute a reasonable chunksize < maxitems
    chunksize = first(ifilter(lambda x: x < maxitems, imap(lambda x: nlines / (2**x), count(1))))

    # Split the file up into n sorted files
    if progress:
        bar = ProgressBar("Split/Sorting Data", max=(nlines / chunksize))
    for i, items in enumerate(ichunks(chunksize, jsonstream(p))):
        with scratch.ensure("{0:d}.json".format(i)).open("w") as f:
            f.write("\n".join(map(dumps, sorted(items, key=key))))
        if progress:
            bar.next()
    if progress:
        bar.finish()

    q = scratch.listdir("*.json")
    with output.open("w") as f:
        if progress:
            bar = ProgressBar("Merge/Sorting Data", max=nlines)
        for item in merge(*imap(jsonstream, q)):
            f.write("{0:s}\n".format(dumps(item)))
            if progress:
                bar.next()
        if progress:
            bar.finish()
예제 #2
0
def walk_down(root, skip=constantly(False), include_self=True):
    """Yield each node from here downward, myself included,
    in depth-first pre-order.

    :arg skip: A predicate decribing nodes to not descend into. We always
        return ourselves, even if the predicate says to skip us.
    :arg include_self: A flag for including the root in the walk down.

    The AST we get from Reflect.parse is somewhat unsatisfying. It's not a
    uniform tree shape; it seems to have already been turned into more
    specialized objects. Thus, we have to traverse into different fields
    depending on node type.

    """
    if include_self:
        yield root
    for child in ifilter(is_node, iflatten(root.itervalues())):
        if skip(child):
            yield child
            continue
        # Just a "yield from":
        for ret in walk_down(child, skip=skip):
            yield ret
예제 #3
0
def walk_down(root, skip=constantly(False), include_self=True):
    """Yield each node from here downward, myself included,
    in depth-first pre-order.

    :arg skip: A predicate decribing nodes to not descend into. We always
        return ourselves, even if the predicate says to skip us.
    :arg include_self: A flag for including the root in the walk down.

    The AST we get from Reflect.parse is somewhat unsatisfying. It's not a
    uniform tree shape; it seems to have already been turned into more
    specialized objects. Thus, we have to traverse into different fields
    depending on node type.

    """
    if include_self:
        yield root
    for child in ifilter(is_node, iflatten(root.itervalues())):
        if skip(child):
            yield child
            continue
        # Just a "yield from":
        for ret in walk_down(child, skip=skip):
            yield ret
예제 #4
0
 def search(self, query, treat_as_regex=True):
     if treat_as_regex:
         return map(Task, select(query, self.iterate(raw=True)))
     return map(Task, ifilter(lambda t: query in t, self.iterate(raw=True)))
예제 #5
0
 def delete(self, task):
     tasks = ifilter(partial(operator.ne, self.task(task)), self)
     self._atomic_write(tasks)
예제 #6
0
 def get(self, task_uuid):
     return first(ifilter(partial(operator.eq, self.task(task_uuid)), self))
예제 #7
0
파일: condense.py 프로젝트: mcphail/dxr
def process_function(props):
    # Compute FuncSig based on args:
    input_args = tuple(
        ifilter(bool, imap(str.lstrip, props['args'][1:-1].split(","))))
    props['type'] = c_type_sig(input_args, props['type'])
    return props
예제 #8
0
파일: condense.py 프로젝트: gartung/dxr
def process_function(props):
    # Compute FuncSig based on args:
    input_args = tuple(ifilter(
        bool, imap(str.lstrip, props['args'][1:-1].split(","))))
    props['type'] = c_type_sig(input_args, props['type'])
    return props