Python shortlistmergesorted Examples

Programming Language: Python

Namespace/Package Name: petl.util

Method/Function: shortlistmergesorted

Examples at hotexamples.com: 5

Python shortlistmergesorted - 5 examples found. These are the top rated real world Python examples of petl.util.shortlistmergesorted extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: sorts.py Project: talwai/petl

def itermergesort(sources, key, header, missing, reverse):

    # first need to standardise headers of all input tables
    # borrow this from itercat - TODO remove code smells

    its = [iter(t) for t in sources]
    source_flds_lists = [it.next() for it in its]

    if header is None:
        # determine output fields by gathering all fields found in the sources
        outflds = list()
        for flds in source_flds_lists:
            for f in flds:
                if f not in outflds:
                    # add any new fields as we find them
                    outflds.append(f)
    else:
        # predetermined output fields
        outflds = header
    yield tuple(outflds)

    def _standardisedata(it, flds, outflds):
        # now construct and yield the data rows
        for row in it:
            try:
                # should be quickest to do this way
                yield tuple(row[flds.index(f)] if f in flds else missing
                            for f in outflds)
            except IndexError:
                # handle short rows
                outrow = [missing] * len(outflds)
                for i, f in enumerate(flds):
                    try:
                        outrow[outflds.index(f)] = row[i]
                    except IndexError:
                        pass  # be relaxed about short rows
                yield tuple(outrow)

    # wrap all iterators to standardise fields
    sits = [
        _standardisedata(it, flds, outflds)
        for flds, it in zip(source_flds_lists, its)
    ]

    # now determine key function
    getkey = None
    if key is not None:
        # convert field selection into field indices
        indices = asindices(outflds, key)
        # now use field indices to construct a _getkey function
        # N.B., this will probably raise an exception on short rows
        getkey = operator.itemgetter(*indices)

    # OK, do the merge sort
    for row in shortlistmergesorted(getkey, reverse, *sits):
        yield row

Example #2

Show file

File: sorts.py Project: pombredanne/petl

def _mergesorted(key=None, reverse=False, *iterables):

    # N.B., I've used heapq for normal merge sort and shortlist merge sort for reverse
    # merge sort because I've assumed that heapq.merge is faster and so is preferable
    # but it doesn't support reverse sorting so the shortlist merge sort has to
    # be used for reverse sorting. Some casual profiling suggests there isn't much
    # between the two in terms of speed, but might be worth profiling more carefully

    if reverse:
        return shortlistmergesorted(key, True, *iterables)
    else:
        return heapqmergesorted(key, *iterables)

Example #3

Show file

File: push.py Project: pombredanne/petl

 def close(self):
     # sort anything remaining in the cache
     self.cache.sort(key=self.getkey, reverse=self.reverse)
     if self.chunkfiles:
         chunkiters = [iterchunk(f) for f in self.chunkfiles]
         chunkiters.append(self.cache) # make sure any left in cache are included
         for row in shortlistmergesorted(self.getkey, self.reverse, *chunkiters):
             self.broadcast(row)
     else:
         for row in self.cache:
             self.broadcast(row)
     super(SortConnection, self).close()

Example #4

Show file

File: push.py Project: talwai/petl

 def close(self):
     # sort anything remaining in the cache
     self.cache.sort(key=self.getkey, reverse=self.reverse)
     if self.chunkfiles:
         chunkiters = [iterchunk(f) for f in self.chunkfiles]
         chunkiters.append(self.cache) # make sure any left in cache are included
         for row in shortlistmergesorted(self.getkey, self.reverse, *chunkiters):
             self.broadcast(row)
     else:
         for row in self.cache:
             self.broadcast(row)
     super(SortConnection, self).close()

Example #5

Show file

File: sorts.py Project: pombredanne/petl

def itermergesort(sources, key, header, missing, reverse):

    # first need to standardise headers of all input tables
    # borrow this from itercat - TODO remove code smells

    its = [iter(t) for t in sources]
    source_flds_lists = [it.next() for it in its]

    if header is None:
        # determine output fields by gathering all fields found in the sources
        outflds = list()
        for flds in source_flds_lists:
            for f in flds:
                if f not in outflds:
                    # add any new fields as we find them
                    outflds.append(f)
    else:
        # predetermined output fields
        outflds = header
    yield tuple(outflds)

    def _standardisedata(it, flds, outflds):
        # now construct and yield the data rows
        for row in it:
            try:
                # should be quickest to do this way
                yield tuple(row[flds.index(f)] if f in flds else missing for f in outflds)
            except IndexError:
                # handle short rows
                outrow = [missing] * len(outflds)
                for i, f in enumerate(flds):
                    try:
                        outrow[outflds.index(f)] = row[i]
                    except IndexError:
                        pass # be relaxed about short rows
                yield tuple(outrow)

    # wrap all iterators to standardise fields
    sits = [_standardisedata(it, flds, outflds) for flds, it in zip(source_flds_lists, its)]

    # now determine key function
    getkey = None
    if key is not None:
        # convert field selection into field indices
        indices = asindices(outflds, key)
        # now use field indices to construct a _getkey function
        # N.B., this will probably raise an exception on short rows
        getkey = operator.itemgetter(*indices)

    # OK, do the merge sort
    for row in shortlistmergesorted(getkey, reverse, *sits):
        yield row