예제 #1
0
def test_sort_5():

    table = (('foo', 'bar'),
            (2.3, 2),
            (1.2, 9),
            (2.3, 6),
            (3.2, 1),
            (1.2, 10))

    expectation = (('foo', 'bar'),
                   (1.2, 9),
                   (1.2, 10),
                   (2.3, 2),
                   (2.3, 6),
                   (3.2, 1))

    # can use either field names or indices (from 1) to specify sort key
    result = sort(table, key=('foo', 'bar'))
    ieq(expectation, result)
    result = sort(table, key=(0, 1))
    ieq(expectation, result)
    result = sort(table, key=('foo', 1))
    ieq(expectation, result)
    result = sort(table, key=(0, 'bar'))
    ieq(expectation, result)
예제 #2
0
 def __init__(self,
              left,
              right,
              lkey,
              rkey,
              presorted=False,
              missing=None,
              buffersize=None,
              tempdir=None,
              cache=True,
              lprefix=None,
              rprefix=None):
     if presorted:
         self.left = left
         self.right = right
     else:
         self.left = sort(left,
                          lkey,
                          buffersize=buffersize,
                          tempdir=tempdir,
                          cache=cache)
         self.right = sort(right,
                           rkey,
                           buffersize=buffersize,
                           tempdir=tempdir,
                           cache=cache)
     self.lkey = lkey
     self.rkey = rkey
     self.missing = missing
     self.lprefix = lprefix
     self.rprefix = rprefix
예제 #3
0
def test_sort_2():

    table = (('foo', 'bar'),
            ('C', '2'),
            ('A', '9'),
            ('A', '6'),
            ('F', '1'),
            ('D', '10'))

    result = sort(table, key=('foo', 'bar'))
    expectation = (('foo', 'bar'),
                   ('A', '6'),
                   ('A', '9'),
                   ('C', '2'),
                   ('D', '10'),
                   ('F', '1'))
    ieq(expectation, result)

    result = sort(table)  # default is lexical sort
    expectation = (('foo', 'bar'),
                   ('A', '6'),
                   ('A', '9'),
                   ('C', '2'),
                   ('D', '10'),
                   ('F', '1'))
    ieq(expectation, result)
예제 #4
0
def test_mergesort_3():

    table1 = (('foo', 'bar'),
              ('A', 9),
              ('C', 2),
              ('D', 10),
              ('A', 6),
              ('F', 1))

    table2 = (('foo', 'baz'),
              ('B', 3),
              ('D', 10),
              ('A', 10),
              ('F', 4))

    # should be same as concatenate then sort (but more efficient, esp. when
    # presorted)
    expect = sort(cat(table1, table2), key='foo', reverse=True)

    actual = mergesort(table1, table2, key='foo', reverse=True)
    ieq(expect, actual)
    ieq(expect, actual)

    actual = mergesort(sort(table1, key='foo', reverse=True),
                       sort(table2, key='foo', reverse=True),
                       key='foo', reverse=True, presorted=True)
    ieq(expect, actual)
    ieq(expect, actual)
예제 #5
0
    def test_fromhdf5sorted():

        f = NamedTemporaryFile()

        # set up a new hdf5 table to work with
        h5file = tables.open_file(f.name, mode='w', title='Test file')
        h5file.create_group('/', 'testgroup', 'Test Group')
        h5table = h5file.create_table('/testgroup', 'testtable', FooBar,
                                      'Test Table')

        # load some data into the table
        table1 = (('foo', 'bar'), (3, b'asdfgh'), (2, b'qwerty'), (1,
                                                                   b'zxcvbn'))
        for row in table1[1:]:
            for i, f in enumerate(table1[0]):
                h5table.row[f] = row[i]
            h5table.row.append()
        h5table.cols.foo.create_csindex()
        h5file.flush()

        # verify we can get the data back out
        table2 = fromhdf5sorted(h5table, sortby='foo')
        ieq(sort(table1, 'foo'), table2)
        ieq(sort(table1, 'foo'), table2)

        # clean up
        h5file.close()
예제 #6
0
파일: setops.py 프로젝트: pombredanne/petl
 def __init__(self, a, b, presorted=False, buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.a = a
         self.b = b
     else:
         self.a = sort(a, buffersize=buffersize, tempdir=tempdir, cache=cache)
         self.b = sort(b, buffersize=buffersize, tempdir=tempdir, cache=cache)
예제 #7
0
    def test_fromhdf5sorted():

        f = NamedTemporaryFile()

        # set up a new hdf5 table to work with
        h5file = tables.open_file(f.name, mode='w', title='Test file')
        h5file.create_group('/', 'testgroup', 'Test Group')
        h5table = h5file.create_table('/testgroup', 'testtable', FooBar,
                                      'Test Table')

        # load some data into the table
        table1 = (('foo', 'bar'),
                  (3, b'asdfgh'),
                  (2, b'qwerty'),
                  (1, b'zxcvbn'))
        for row in table1[1:]:
            for i, f in enumerate(table1[0]):
                h5table.row[f] = row[i]
            h5table.row.append()
        h5table.cols.foo.create_csindex()
        h5file.flush()

        # verify we can get the data back out
        table2 = fromhdf5sorted(h5table, sortby='foo')
        ieq(sort(table1, 'foo'), table2)
        ieq(sort(table1, 'foo'), table2)

        # clean up
        h5file.close()
예제 #8
0
 def __init__(self,
              left,
              right,
              lkey,
              rkey,
              presorted=False,
              buffersize=None,
              tempdir=None,
              cache=True):
     if presorted:
         self.left = left
         self.right = right
     else:
         self.left = sort(left,
                          lkey,
                          buffersize=buffersize,
                          tempdir=tempdir,
                          cache=cache)
         self.right = sort(right,
                           rkey,
                           buffersize=buffersize,
                           tempdir=tempdir,
                           cache=cache)
     self.lkey = lkey
     self.rkey = rkey
예제 #9
0
def test_sort_none():

    table = (('foo', 'bar'),
            ('C', 2),
            ('A', 9),
            ('A', None),
            ('F', 1),
            ('D', 10))

    result = sort(table, 'bar')
    expectation = (('foo', 'bar'),
                   ('A', None),
                   ('F', 1),
                   ('C', 2),
                   ('A', 9),
                   ('D', 10))
    ieq(expectation, result)

    dt = datetime.now().replace

    table = (('foo', 'bar'),
            ('C', dt(hour=5)),
            ('A', dt(hour=1)),
            ('A', None),
            ('F', dt(hour=9)),
            ('D', dt(hour=17)))

    result = sort(table, 'bar')
    expectation = (('foo', 'bar'),
                   ('A', None),
                   ('A', dt(hour=1)),
                   ('C', dt(hour=5)),
                   ('F', dt(hour=9)),
                   ('D', dt(hour=17)))
    ieq(expectation, result)
예제 #10
0
def diff(a, b, presorted=False, buffersize=None, tempdir=None, cache=True):
    """
    Find the difference between rows in two tables. Returns a pair of tables.
    E.g.::

        >>> import petl as etl
        >>> a = [['foo', 'bar', 'baz'],
        ...      ['A', 1, True],
        ...      ['C', 7, False],
        ...      ['B', 2, False],
        ...      ['C', 9, True]]
        >>> b = [['x', 'y', 'z'],
        ...      ['B', 2, False],
        ...      ['A', 9, False],
        ...      ['B', 3, True],
        ...      ['C', 9, True]]
        >>> added, subtracted = etl.diff(a, b)
        >>> # rows in b not in a
        ... added
        +-----+---+-------+
        | x   | y | z     |
        +=====+===+=======+
        | 'A' | 9 | False |
        +-----+---+-------+
        | 'B' | 3 | True  |
        +-----+---+-------+

        >>> # rows in a not in b
        ... subtracted
        +-----+-----+-------+
        | foo | bar | baz   |
        +=====+=====+=======+
        | 'A' |   1 | True  |
        +-----+-----+-------+
        | 'C' |   7 | False |
        +-----+-----+-------+

    Convenient shorthand for ``(complement(b, a), complement(a, b))``. See also
    :func:`petl.transform.setops.complement`.

    If `presorted` is True, it is assumed that the data are already sorted by
    the given key, and the `buffersize`, `tempdir` and `cache` arguments are
    ignored. Otherwise, the data are sorted, see also the discussion of the
    `buffersize`, `tempdir` and `cache` arguments under the
    :func:`petl.transform.sorts.sort` function.

    """

    if not presorted:
        a = sort(a)
        b = sort(b)
    added = complement(b, a, presorted=True, buffersize=buffersize,
                       tempdir=tempdir, cache=cache)
    subtracted = complement(a, b, presorted=True, buffersize=buffersize,
                            tempdir=tempdir, cache=cache)
    return added, subtracted
예제 #11
0
def test_sort_buffered_tempdir():

    table = (("foo", "bar"), ("C", 2), ("A", 9), ("A", 6), ("F", 1), ("D", 10))

    # test sort forwards
    expectation = (("foo", "bar"), ("F", 1), ("C", 2), ("A", 6), ("A", 9), ("D", 10))
    result = sort(table, "bar")
    ieq(expectation, result)
    result = sort(table, "bar", buffersize=2, tempdir="/tmp")
    ieq(expectation, result)
예제 #12
0
 def __init__(self, a, b, presorted=False, buffersize=None, tempdir=None,
              cache=True):
     if presorted:
         self.a = a
         self.b = b
     else:
         self.a = sort(a, buffersize=buffersize, tempdir=tempdir,
                       cache=cache)
         self.b = sort(b, buffersize=buffersize, tempdir=tempdir,
                       cache=cache)
예제 #13
0
def test_sort_2():

    table = (("foo", "bar"), ("C", "2"), ("A", "9"), ("A", "6"), ("F", "1"), ("D", "10"))

    result = sort(table, key=("foo", "bar"))
    expectation = (("foo", "bar"), ("A", "6"), ("A", "9"), ("C", "2"), ("D", "10"), ("F", "1"))
    ieq(expectation, result)

    result = sort(table)  # default is lexical sort
    expectation = (("foo", "bar"), ("A", "6"), ("A", "9"), ("C", "2"), ("D", "10"), ("F", "1"))
    ieq(expectation, result)
예제 #14
0
def test_sort_buffered_tempdir():

    table = (('foo', 'bar'), ('C', 2), ('A', 9), ('A', 6), ('F', 1), ('D', 10))

    # test sort forwards
    expectation = (('foo', 'bar'), ('F', 1), ('C', 2), ('A', 6), ('A', 9),
                   ('D', 10))
    result = sort(table, 'bar')
    ieq(expectation, result)
    result = sort(table, 'bar', buffersize=2, tempdir='/tmp')
    ieq(expectation, result)
예제 #15
0
파일: joins.py 프로젝트: podpearson/petl
 def __init__(self, left, right, lkey, rkey, presorted=False,
              buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.left = left
         self.right = right
     else:
         self.left = sort(left, lkey, buffersize=buffersize,
                          tempdir=tempdir, cache=cache)
         self.right = sort(right, rkey, buffersize=buffersize,
                           tempdir=tempdir, cache=cache)
     self.lkey = lkey
     self.rkey = rkey
예제 #16
0
def test_sort_buffered_tempdir():

    table = (("foo", "bar"), ("C", 2), ("A", 9), ("A", 6), ("F", 1), ("D", 10))

    # test sort forwards
    expectation = (("foo", "bar"), ("F", 1), ("C", 2), ("A", 6), ("A", 9), ("D", 10))
    result = sort(table, "bar")
    ieq(expectation, result)
    tempdir = "tmp"
    if not os.path.exists(tempdir):
        os.mkdir(tempdir)
    result = sort(table, "bar", buffersize=2, tempdir=tempdir)
    ieq(expectation, result)
예제 #17
0
def test_sort_buffered_tempdir():

    table = (('foo', 'bar'), ('C', 2), ('A', 9), ('A', 6), ('F', 1), ('D', 10))

    # test sort forwards
    expectation = (('foo', 'bar'), ('F', 1), ('C', 2), ('A', 6), ('A', 9),
                   ('D', 10))
    result = sort(table, 'bar')
    ieq(expectation, result)
    tempdir = 'tmp'
    if not os.path.exists(tempdir):
        os.mkdir(tempdir)
    result = sort(table, 'bar', buffersize=2, tempdir=tempdir)
    ieq(expectation, result)
예제 #18
0
def test_sort_2():

    table = (('foo', 'bar'), ('C', '2'), ('A', '9'), ('A', '6'), ('F', '1'),
             ('D', '10'))

    result = sort(table, key=('foo', 'bar'))
    expectation = (('foo', 'bar'), ('A', '6'), ('A', '9'), ('C', '2'),
                   ('D', '10'), ('F', '1'))
    ieq(expectation, result)

    result = sort(table)  # default is lexical sort
    expectation = (('foo', 'bar'), ('A', '6'), ('A', '9'), ('C', '2'),
                   ('D', '10'), ('F', '1'))
    ieq(expectation, result)
예제 #19
0
def test_sort_5():

    table = (('foo', 'bar'), (2.3, 2), (1.2, 9), (2.3, 6), (3.2, 1), (1.2, 10))

    expectation = (('foo', 'bar'), (1.2, 9), (1.2, 10), (2.3, 2), (2.3, 6),
                   (3.2, 1))

    # can use either field names or indices (from 1) to specify sort key
    result = sort(table, key=('foo', 'bar'))
    ieq(expectation, result)
    result = sort(table, key=(0, 1))
    ieq(expectation, result)
    result = sort(table, key=('foo', 1))
    ieq(expectation, result)
    result = sort(table, key=(0, 'bar'))
    ieq(expectation, result)
예제 #20
0
def test_sort_3():

    table = (("foo", "bar"), ("C", "2"), ("A", "9"), ("A", "6"), ("F", "1"), ("D", "10"))

    result = sort(table, "bar")
    expectation = (("foo", "bar"), ("F", "1"), ("D", "10"), ("C", "2"), ("A", "6"), ("A", "9"))
    ieq(expectation, result)
예제 #21
0
파일: dedup.py 프로젝트: rogerkwoodley/petl
 def __init__(self, table, key=None, count=None, presorted=False, buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.table = table
     else:
         self.table = sort(table, key=key, buffersize=buffersize, tempdir=tempdir, cache=cache)
     self.key = key
     self.count = count
예제 #22
0
def test_sort_4():

    table = (("foo", "bar"), ("C", 2), ("A", 9), ("A", 6), ("F", 1), ("D", 10))

    result = sort(table, "bar")
    expectation = (("foo", "bar"), ("F", 1), ("C", 2), ("A", 6), ("A", 9), ("D", 10))
    ieq(expectation, result)
예제 #23
0
 def __init__(self,
              source,
              key,
              aggregation=None,
              presorted=False,
              buffersize=None,
              tempdir=None,
              cache=True):
     if presorted:
         self.source = source
     else:
         self.source = sort(source,
                            key,
                            buffersize=buffersize,
                            tempdir=tempdir,
                            cache=cache)
     self.key = key
     if aggregation is None:
         self.aggregation = OrderedDict()
     elif isinstance(aggregation, (list, tuple)):
         self.aggregation = OrderedDict()
         for t in aggregation:
             self.aggregation[t[0]] = t[1:]
     elif isinstance(aggregation, dict):
         self.aggregation = aggregation
     else:
         raise ArgumentError(
             'expected aggregation is None, list, tuple or dict, found %r' %
             aggregation)
예제 #24
0
def test_sort_missing_cell_numeric():
    """ Sorting table with missing values raises IndexError #385 """
    tbl = (('a', 'b'), ('4', ), ('2', '1'), ('1', ))
    expect = (('a', 'b'), ('1', ), ('2', '1'), ('4', ))

    tbl_sorted = sort(tbl)
    ieq(expect, tbl_sorted)
예제 #25
0
def test_sort_buffered_independent():

    table = (('foo', 'bar'),
             ('C', 2),
             ('A', 9),
             ('A', 6),
             ('F', 1),
             ('D', 10))
    expectation = (('foo', 'bar'),
                   ('F', 1),
                   ('C', 2),
                   ('A', 6),
                   ('A', 9),
                   ('D', 10))

    result = sort(table, 'bar', buffersize=4)
    nrows(result)  # cause data to be cached
    # check that two row iterators are independent, i.e., consuming rows
    # from one does not affect the other
    it1 = iter(result)
    it2 = iter(result)
    eq_(expectation[0], it1.next())
    eq_(expectation[1], it1.next())
    eq_(expectation[0], it2.next())
    eq_(expectation[1], it2.next())
    eq_(expectation[2], it2.next())
    eq_(expectation[2], it1.next())
예제 #26
0
def test_sort_missing_cell_text():
    """ Sorting table with missing values raises IndexError #385 """
    tbl = (('a', 'b', 'c'), ('C', ), ('A', '4', '5'))
    expect = (('a', 'b', 'c'), ('A', '4', '5'), ('C', ))

    tbl_sorted = sort(tbl)
    ieq(expect, tbl_sorted)
예제 #27
0
파일: joins.py 프로젝트: podpearson/petl
 def __init__(self, left, right, lkey, rkey, presorted=False, missing=None,
              buffersize=None, tempdir=None, cache=True,
              lprefix=None, rprefix=None):
     if presorted:
         self.left = left
         self.right = right
     else:
         self.left = sort(left, lkey, buffersize=buffersize,
                          tempdir=tempdir, cache=cache)
         self.right = sort(right, rkey, buffersize=buffersize,
                           tempdir=tempdir, cache=cache)
     self.lkey = lkey
     self.rkey = rkey
     self.missing = missing
     self.lprefix = lprefix
     self.rprefix = rprefix
예제 #28
0
def test_sort_6():

    table = (("foo", "bar"), (2.3, 2), (1.2, 9), (2.3, 6), (3.2, 1), (1.2, 10))

    expectation = (("foo", "bar"), (3.2, 1), (2.3, 6), (2.3, 2), (1.2, 10), (1.2, 9))

    result = sort(table, key=("foo", "bar"), reverse=True)
    ieq(expectation, result)
예제 #29
0
 def __init__(self, source, key=None, presorted=False, buffersize=None, 
              tempdir=None, cache=True):
     if presorted:
         self.source = source
     else:
         self.source = sort(source, key, buffersize=buffersize, 
                            tempdir=tempdir, cache=cache)
     self.key = key
예제 #30
0
파일: dedup.py 프로젝트: pombredanne/petl
 def __init__(self, source, key=None, presorted=False, buffersize=None, 
              tempdir=None, cache=True):
     if presorted:
         self.source = source
     else:
         self.source = sort(source, key, buffersize=buffersize, 
                            tempdir=tempdir, cache=cache)
     self.key = key # TODO property
예제 #31
0
def test_sort_4():

    table = (('foo', 'bar'), ('C', 2), ('A', 9), ('A', 6), ('F', 1), ('D', 10))

    result = sort(table, 'bar')
    expectation = (('foo', 'bar'), ('F', 1), ('C', 2), ('A', 6), ('A', 9),
                   ('D', 10))
    ieq(expectation, result)
예제 #32
0
def test_mergesort_2():

    table1 = (("foo", "bar"), ("A", 9), ("C", 2), ("D", 10), ("A", 6), ("F", 1))

    table2 = (("foo", "baz"), ("B", 3), ("D", 10), ("A", 10), ("F", 4))

    # should be same as concatenate then sort (but more efficient, esp. when
    # presorted)
    expect = sort(cat(table1, table2), key="foo")

    actual = mergesort(table1, table2, key="foo")
    ieq(expect, actual)
    ieq(expect, actual)

    actual = mergesort(sort(table1, key="foo"), sort(table2, key="foo"), key="foo", presorted=True)
    ieq(expect, actual)
    ieq(expect, actual)
예제 #33
0
 def __init__(self, table, key=None, count=None, presorted=False,
              buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.table = table
     else:
         self.table = sort(table, key=key, buffersize=buffersize,
                           tempdir=tempdir, cache=cache)
     self.key = key
     self.count = count
예제 #34
0
def test_sort_6():

    table = (('foo', 'bar'), (2.3, 2), (1.2, 9), (2.3, 6), (3.2, 1), (1.2, 10))

    expectation = (('foo', 'bar'), (3.2, 1), (2.3, 6), (2.3, 2), (1.2, 10),
                   (1.2, 9))

    result = sort(table, key=('foo', 'bar'), reverse=True)
    ieq(expectation, result)
예제 #35
0
def test_mergesort_1():

    table1 = (('foo', 'bar'), ('A', 6), ('C', 2), ('D', 10), ('A', 9), ('F',
                                                                        1))

    table2 = (('foo', 'bar'), ('B', 3), ('D', 10), ('A', 10), ('F', 4))

    # should be same as concatenate then sort (but more efficient, esp. when
    # presorted)
    expect = sort(cat(table1, table2))

    actual = mergesort(table1, table2)
    ieq(expect, actual)
    ieq(expect, actual)

    actual = mergesort(sort(table1), sort(table2), presorted=True)
    ieq(expect, actual)
    ieq(expect, actual)
예제 #36
0
 def __init__(self, table, key, aggregation=list, value=None, presorted=False,
              buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.table = table
     else:
         self.table = sort(table, key, buffersize=buffersize, tempdir=tempdir, cache=cache)    
     self.key = key
     self.aggregation = aggregation
     self.value = value
예제 #37
0
 def __init__(self, source, key, reducer, fields=None, 
              presorted=False, buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.source = source
     else:
         self.source = sort(source, key, buffersize=buffersize, tempdir=tempdir, cache=cache)
     self.key = key
     self.fields = fields
     self.reducer = reducer
예제 #38
0
def test_sort_3():

    table = (('foo', 'bar'), ('C', '2'), ('A', '9'), ('A', '6'), ('F', '1'),
             ('D', '10'))

    result = sort(table, 'bar')
    expectation = (('foo', 'bar'), ('F', '1'), ('D', '10'), ('C', '2'),
                   ('A', '6'), ('A', '9'))
    ieq(expectation, result)
예제 #39
0
파일: reshape.py 프로젝트: podpearson/petl
 def __init__(self, source, f1, f2, f3, aggfun, missing=None,
              presorted=False, buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.source = source
     else:
         self.source = sort(source, key=(f1, f2), buffersize=buffersize, tempdir=tempdir, cache=cache)
     self.f1, self.f2, self.f3 = f1, f2, f3
     self.aggfun = aggfun
     self.missing = missing
예제 #40
0
파일: reductions.py 프로젝트: DeanWay/petl
def groupselectmax(table, key, value, presorted=False, buffersize=None,
                   tempdir=None, cache=True):
    """Group by the `key` field then return the row with the minimum of the
    `value` field within each group. N.B., will only return one row for each
    group, even if multiple rows have the same (maximum) value."""

    return groupselectfirst(sort(table, value, reverse=True), key,
                            presorted=presorted, buffersize=buffersize,
                            tempdir=tempdir, cache=cache)
예제 #41
0
def test_sort_none():

    table = (('foo', 'bar'), ('C', 2), ('A', 9), ('A', None), ('F', 1), ('D',
                                                                         10))

    result = sort(table, 'bar')
    expectation = (('foo', 'bar'), ('A', None), ('F', 1), ('C', 2), ('A', 9),
                   ('D', 10))
    ieq(expectation, result)

    dt = datetime.now().replace

    table = (('foo', 'bar'), ('C', dt(hour=5)), ('A', dt(hour=1)), ('A', None),
             ('F', dt(hour=9)), ('D', dt(hour=17)))

    result = sort(table, 'bar')
    expectation = (('foo', 'bar'), ('A', None), ('A', dt(hour=1)),
                   ('C', dt(hour=5)), ('F', dt(hour=9)), ('D', dt(hour=17)))
    ieq(expectation, result)
예제 #42
0
 def __init__(self, source, f1, f2, f3, aggfun, missing=None,
              presorted=False, buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.source = source
     else:
         self.source = sort(source, key=(f1, f2), buffersize=buffersize,
                            tempdir=tempdir, cache=cache)
     self.f1, self.f2, self.f3 = f1, f2, f3
     self.aggfun = aggfun
     self.missing = missing
예제 #43
0
파일: maps.py 프로젝트: DeanWay/petl
 def __init__(self, source, key, mapper, header=None,
              presorted=False, buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.source = source
     else:
         self.source = sort(source, key, buffersize=buffersize,
                            tempdir=tempdir, cache=cache)
     self.key = key
     self.header = header
     self.mapper = mapper
예제 #44
0
def test_mergesort_4():

    table1 = (("foo", "bar", "baz"), (1, "A", True), (2, "B", None), (4, "C", True))
    table2 = (("bar", "baz", "quux"), ("A", True, 42.0), ("B", False, 79.3), ("C", False, 12.4))

    expect = sort(cat(table1, table2), key="bar")

    actual = mergesort(table1, table2, key="bar")
    ieq(expect, actual)
    ieq(expect, actual)
예제 #45
0
파일: intervals.py 프로젝트: larissarmp/TCC
def collapsedintervals(table, start='start', stop='stop', key=None):
    """
    Utility function to collapse intervals in a table. 
    
    If no facet `key` is given, returns an iterator over `(start, stop)` tuples.
    
    If facet `key` is given, returns an iterator over `(key, start, stop)`
    tuples.
    
    """

    if key is None:
        table = sort(table, key=start)
        for iv in _collapse(values(table, (start, stop))):
            yield iv
    else:
        table = sort(table, key=(key, start))
        for k, g in rowgroupby(table, key=key, value=(start, stop)):
            for iv in _collapse(g):
                yield (k, ) + iv
예제 #46
0
def test_sort_buffered_tempdir():

    table = (('foo', 'bar'),
             ('C', 2),
             ('A', 9),
             ('A', 6),
             ('F', 1),
             ('D', 10))

    # test sort forwards
    expectation = (('foo', 'bar'),
                   ('F', 1),
                   ('C', 2),
                   ('A', 6),
                   ('A', 9),
                   ('D', 10))
    result = sort(table, 'bar')
    ieq(expectation, result)
    result = sort(table, 'bar', buffersize=2, tempdir='/tmp')
    ieq(expectation, result)
예제 #47
0
 def __init__(self, source, key, missing=None, exclude=None, include=None, 
              presorted=False, buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.source = source
     else:
         self.source = sort(source, key, buffersize=buffersize,
                            tempdir=tempdir, cache=cache)
     self.key = key
     self.missing = missing
     self.exclude = exclude
     self.include = include
예제 #48
0
파일: intervals.py 프로젝트: DeanWay/petl
def collapsedintervals(table, start='start', stop='stop', key=None):
    """
    Utility function to collapse intervals in a table. 
    
    If no facet `key` is given, returns an iterator over `(start, stop)` tuples.
    
    If facet `key` is given, returns an iterator over `(key, start, stop)`
    tuples.
    
    """
    
    if key is None:
        table = sort(table, key=start)
        for iv in _collapse(values(table, (start, stop))):
            yield iv
    else:
        table = sort(table, key=(key, start))
        for k, g in rowgroupby(table, key=key, value=(start, stop)):
            for iv in _collapse(g):
                yield (k,) + iv
예제 #49
0
def groupselectmax(table, key, value):
    """
    Group by the `key` field then return the row with the minimum of the `value`
    field within each group. N.B., will only return one row for each group,
    even if multiple rows have the same (maximum) value.

    .. versionadded:: 0.14
    
    """

    return groupselectfirst(sort(table, value, reverse=True), key)
예제 #50
0
파일: dedup.py 프로젝트: pombredanne/petl
 def __init__(self, source, key, missing=None, exclude=None, include=None, 
              presorted=False, buffersize=None, tempdir=None, cache=True):
     if presorted:
         self.source = source
     else:
         self.source = sort(source, key, buffersize=buffersize,
                            tempdir=tempdir, cache=cache)
     self.key = key
     self.missing = missing
     self.exclude = exclude
     self.include = include
예제 #51
0
def test_mergesort_4():

    table1 = (('foo', 'bar', 'baz'), (1, 'A', True), (2, 'B', None), (4, 'C',
                                                                      True))
    table2 = (('bar', 'baz', 'quux'), ('A', True, 42.0), ('B', False, 79.3),
              ('C', False, 12.4))

    expect = sort(cat(table1, table2), key='bar')

    actual = mergesort(table1, table2, key='bar')
    ieq(expect, actual)
    ieq(expect, actual)
예제 #52
0
def test_sort_buffered():

    table = (("foo", "bar"), ("C", 2), ("A", 9), ("A", 6), ("F", 1), ("D", 10))

    # test sort forwards
    expectation = (("foo", "bar"), ("F", 1), ("C", 2), ("A", 6), ("A", 9), ("D", 10))
    result = sort(table, "bar")
    ieq(expectation, result)
    result = sort(table, "bar", buffersize=2)  #    print list(result)
    ieq(expectation, result)

    # sort in reverse
    expectation = (("foo", "bar"), ("D", 10), ("A", 9), ("A", 6), ("C", 2), ("F", 1))

    result = sort(table, "bar", reverse=True)
    ieq(expectation, result)
    result = sort(table, "bar", reverse=True, buffersize=2)
    ieq(expectation, result)

    # no key
    expectation = (("foo", "bar"), ("F", 1), ("D", 10), ("C", 2), ("A", 9), ("A", 6))
    result = sort(table, reverse=True)
    ieq(expectation, result)
    result = sort(table, reverse=True, buffersize=2)
    ieq(expectation, result)
예제 #53
0
def test_sort_buffered():

    table = (('foo', 'bar'), ('C', 2), ('A', 9), ('A', 6), ('F', 1), ('D', 10))

    # test sort forwards
    expectation = (('foo', 'bar'), ('F', 1), ('C', 2), ('A', 6), ('A', 9),
                   ('D', 10))
    result = sort(table, 'bar')
    ieq(expectation, result)
    result = sort(table, 'bar', buffersize=2)
    ieq(expectation, result)

    # sort in reverse
    expectation = (('foo', 'bar'), ('D', 10), ('A', 9), ('A', 6), ('C', 2),
                   ('F', 1))

    result = sort(table, 'bar', reverse=True)
    ieq(expectation, result)
    result = sort(table, 'bar', reverse=True, buffersize=2)
    ieq(expectation, result)

    # no key
    expectation = (('foo', 'bar'), ('F', 1), ('D', 10), ('C', 2), ('A', 9),
                   ('A', 6))
    result = sort(table, reverse=True)
    ieq(expectation, result)
    result = sort(table, reverse=True, buffersize=2)
    ieq(expectation, result)
예제 #54
0
def groupselectmax(table,
                   key,
                   value,
                   presorted=False,
                   buffersize=None,
                   tempdir=None,
                   cache=True):
    """Group by the `key` field then return the row with the maximum of the
    `value` field within each group. N.B., will only return one row for each
    group, even if multiple rows have the same (maximum) value."""

    return groupselectfirst(sort(table, value, reverse=True),
                            key,
                            presorted=presorted,
                            buffersize=buffersize,
                            tempdir=tempdir,
                            cache=cache)
예제 #55
0
def test_sort_buffered_independent():

    table = (('foo', 'bar'), ('C', 2), ('A', 9), ('A', 6), ('F', 1), ('D', 10))
    expectation = (('foo', 'bar'), ('F', 1), ('C', 2), ('A', 6), ('A', 9),
                   ('D', 10))

    result = sort(table, 'bar', buffersize=4)
    nrows(result)  # cause data to be cached
    # check that two row iterators are independent, i.e., consuming rows
    # from one does not affect the other
    it1 = iter(result)
    it2 = iter(result)
    eq_(expectation[0], next(it1))
    eq_(expectation[1], next(it1))
    eq_(expectation[0], next(it2))
    eq_(expectation[1], next(it2))
    eq_(expectation[2], next(it2))
    eq_(expectation[2], next(it1))
예제 #56
0
def test_sort_buffered_cleanup():

    table = (('foo', 'bar'), ('C', 2), ('A', 9), ('A', 6), ('F', 1), ('D', 10))
    result = sort(table, 'bar', buffersize=2)
    debug('initially filecache should be empty')
    eq_(None, result._filecache)
    debug('pull rows through, should populate file cache')
    eq_(5, nrows(result))
    eq_(3, len(result._filecache))
    debug('check all files exist')
    filenames = _get_names(result._filecache)
    for fn in filenames:
        assert os.path.exists(fn), fn
    debug('delete object and garbage collect')
    del result
    gc.collect()
    debug('check all files have been deleted')
    for fn in filenames:
        assert not os.path.exists(fn), fn
예제 #57
0
 def __init__(self,
              table,
              key,
              aggregation=list,
              value=None,
              presorted=False,
              buffersize=None,
              tempdir=None,
              cache=True):
     if presorted:
         self.table = table
     else:
         self.table = sort(table,
                           key,
                           buffersize=buffersize,
                           tempdir=tempdir,
                           cache=cache)
     self.key = key
     self.aggregation = aggregation
     self.value = value
예제 #58
0
def people_list(request, uuid):
    try:
        csvdownload = CSVDownload.objects.get(uuid=uuid)
    except CSVDownload.DoesNotExist:
        return HttpResponseNotFound("Not found.")

    fname = '{0}.csv'.format(csvdownload.uuid)
    full_fname = os.path.join(settings.CSV_DIR, fname)
    people = fromcsv(full_fname)

    sortby = request.GET.get('sortby', 'name')
    ordering = request.GET.get('ordering', 'asc')
    count_str = request.GET.get('count', '10')

    if sortby not in header(people):
        return HttpResponseBadRequest('Bad request.')
    if ordering not in ('asc', 'desc'):
        return HttpResponseBadRequest('Bad request.')
    try:
        count = int(count_str)
    except ValueError:
        return HttpResponseBadRequest('Bad request.')
    if count < 1:
        return HttpResponseBadRequest('Bad request.')

    people = sort(people, sortby, reverse=ordering == 'desc')
    people = head(people, count)

    return render(
        request, 'people_list.html', {
            'csvdownload': csvdownload,
            'headers': header(people),
            'people': data(people),
            'has_more': len(people) > count,
            'queryparams': {
                'sortby': sortby,
                'ordering': ordering,
                'count': str(count + 10)
            }
        })
예제 #59
0
def test_sort_buffered_cleanup_open_iterator():

    table = (('foo', 'bar'), ('C', 2), ('A', 9), ('A', 6), ('F', 1), ('D', 10))
    # check if cleanup is robust against open iterators
    result = sort(table, 'bar', buffersize=2)
    debug('pull rows through, should populate file cache')
    eq_(5, nrows(result))
    eq_(3, len(result._filecache))
    debug('check all files exist')
    filenames = _get_names(result._filecache)
    for fn in filenames:
        assert os.path.exists(fn), fn
    debug(filenames)
    debug('open an iterator')
    it = iter(result)
    next(it)
    next(it)
    debug('delete objects and garbage collect')
    del result
    del it
    gc.collect()
    for fn in filenames:
        assert not os.path.exists(fn), fn