Esempio n. 1
0
def counts(request, uuid):
    try:
        csvdownload = CSVDownload.objects.get(uuid=uuid)
    except CSVDownload.DoesNotExist:
        return HttpResponseNotFound("Not found.")

    fname = '{0}.csv'.format(csvdownload.uuid)
    full_fname = os.path.join(settings.CSV_DIR, fname)
    people = fromcsv(full_fname)

    columns_str = request.GET.get('columns', '')
    columns = sorted([c for c in columns_str.split(',') if c.strip()])
    for column in columns:
        if column not in header(people):
            return HttpResponseBadRequest('Bad request.')
    if not columns:
        return redirect(to=reverse('people_list', kwargs={'uuid': uuid}))

    counts = valuecounts(people, *columns)
    counts = cutout(counts, 'frequency')

    return render(
        request, 'counts.html', {
            'csvdownload': csvdownload,
            'columns': header(people),
            'headers': header(counts),
            'counts': data(counts),
            'queryparams': {
                'columns': columns
            }
        })
Esempio n. 2
0
def test_header():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = header(table)
    expect = ('foo', 'bar')
    eq_(expect, actual)
    table = (['foo', 'bar'], ['a', 1], ['b', 2])
    actual = header(table)
    eq_(expect, actual)
Esempio n. 3
0
def test_header():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = header(table)
    expect = ('foo', 'bar')
    eq_(expect, actual)
    table = (['foo', 'bar'], ['a', 1], ['b', 2])
    actual = header(table)
    eq_(expect, actual)
Esempio n. 4
0
def natural_key(left, right):
    # determine key field or fields
    lhdr = header(left)
    lflds = list(map(str, lhdr))
    rhdr = header(right)
    rflds = list(map(str, rhdr))
    key = [f for f in lflds if f in rflds]
    assert len(key) > 0, 'no fields in common'
    if len(key) == 1:
        key = key[0]  # deal with singletons
    return key
Esempio n. 5
0
def natural_key(left, right):
    # determine key field or fields
    lhdr = header(left)
    lflds = list(map(str, lhdr))
    rhdr = header(right)
    rflds = list(map(str, rhdr))
    key = [f for f in lflds if f in rflds]
    assert len(key) > 0, 'no fields in common'
    if len(key) == 1:
        key = key[0]  # deal with singletons
    return key
Esempio n. 6
0
def recordcomplement(a, b, buffersize=None, tempdir=None, cache=True,
                     strict=False):
    """
    Find records in `a` that are not in `b`. E.g.::

        >>> import petl as etl
        >>> a = [['foo', 'bar', 'baz'],
        ...      ['A', 1, True],
        ...      ['C', 7, False],
        ...      ['B', 2, False],
        ...      ['C', 9, True]]
        >>> b = [['bar', 'foo', 'baz'],
        ...      [2, 'B', False],
        ...      [9, 'A', False],
        ...      [3, 'B', True],
        ...      [9, 'C', True]]
        >>> aminusb = etl.recordcomplement(a, b)
        >>> aminusb
        +-----+-----+-------+
        | foo | bar | baz   |
        +=====+=====+=======+
        | 'A' |   1 | True  |
        +-----+-----+-------+
        | 'C' |   7 | False |
        +-----+-----+-------+

        >>> bminusa = etl.recordcomplement(b, a)
        >>> bminusa
        +-----+-----+-------+
        | bar | foo | baz   |
        +=====+=====+=======+
        |   3 | 'B' | True  |
        +-----+-----+-------+
        |   9 | 'A' | False |
        +-----+-----+-------+

    Note that both tables must have the same set of fields, but that the order
    of the fields does not matter. See also the
    :func:`petl.transform.setops.complement` function.

    See also the discussion of the `buffersize`, `tempdir` and `cache` arguments
    under the :func:`petl.transform.sorts.sort` function.

    """

    # TODO possible with only one pass?

    ha = header(a)
    hb = header(b)
    assert set(ha) == set(hb), 'both tables must have the same set of fields'
    # make sure fields are in the same order
    bv = cut(b, *ha)
    return complement(a, bv, buffersize=buffersize, tempdir=tempdir,
                      cache=cache, strict=strict)
Esempio n. 7
0
def recordcomplement(a, b, buffersize=None, tempdir=None, cache=True,
                     strict=False):
    """
    Find records in `a` that are not in `b`. E.g.::

        >>> import petl as etl
        >>> a = [['foo', 'bar', 'baz'],
        ...      ['A', 1, True],
        ...      ['C', 7, False],
        ...      ['B', 2, False],
        ...      ['C', 9, True]]
        >>> b = [['bar', 'foo', 'baz'],
        ...      [2, 'B', False],
        ...      [9, 'A', False],
        ...      [3, 'B', True],
        ...      [9, 'C', True]]
        >>> aminusb = etl.recordcomplement(a, b)
        >>> aminusb
        +-----+-----+-------+
        | foo | bar | baz   |
        +=====+=====+=======+
        | 'A' |   1 | True  |
        +-----+-----+-------+
        | 'C' |   7 | False |
        +-----+-----+-------+

        >>> bminusa = etl.recordcomplement(b, a)
        >>> bminusa
        +-----+-----+-------+
        | bar | foo | baz   |
        +=====+=====+=======+
        |   3 | 'B' | True  |
        +-----+-----+-------+
        |   9 | 'A' | False |
        +-----+-----+-------+

    Note that both tables must have the same set of fields, but that the order
    of the fields does not matter. See also the
    :func:`petl.transform.setops.complement` function.

    See also the discussion of the `buffersize`, `tempdir` and `cache` arguments
    under the :func:`petl.transform.sorts.sort` function.

    """

    # TODO possible with only one pass?

    ha = header(a)
    hb = header(b)
    assert set(ha) == set(hb), 'both tables must have the same set of fields'
    # make sure fields are in the same order
    bv = cut(b, *ha)
    return complement(a, bv, buffersize=buffersize, tempdir=tempdir,
                      cache=cache, strict=strict)
Esempio n. 8
0
def itercrossjoin(sources, prefix):

    # construct fields
    outhdr = list()
    for i, s in enumerate(sources):
        if prefix:
            # use one-based numbering
            outhdr.extend([str(i+1) + '_' + str(f) for f in header(s)])
        else:
            outhdr.extend(header(s))
    yield tuple(outhdr)

    datasrcs = [data(src) for src in sources]
    for prod in itertools.product(*datasrcs):
        outrow = list()
        for row in prod:
            outrow.extend(row)
        yield tuple(outrow)
Esempio n. 9
0
def diffheaders(t1, t2):
    """
    Return the difference between the headers of the two tables as a pair of
    sets. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['a', 1, .3]]
        >>> table2 = [['baz', 'bar', 'quux'],
        ...           ['a', 1, .3]]
        >>> add, sub = etl.diffheaders(table1, table2)
        >>> add
        {'quux'}
        >>> sub
        {'foo'}

    """

    t1h = set(header(t1))
    t2h = set(header(t2))
    return t2h - t1h, t1h - t2h
Esempio n. 10
0
def people_list(request, uuid):
    try:
        csvdownload = CSVDownload.objects.get(uuid=uuid)
    except CSVDownload.DoesNotExist:
        return HttpResponseNotFound("Not found.")

    fname = '{0}.csv'.format(csvdownload.uuid)
    full_fname = os.path.join(settings.CSV_DIR, fname)
    people = fromcsv(full_fname)

    sortby = request.GET.get('sortby', 'name')
    ordering = request.GET.get('ordering', 'asc')
    count_str = request.GET.get('count', '10')

    if sortby not in header(people):
        return HttpResponseBadRequest('Bad request.')
    if ordering not in ('asc', 'desc'):
        return HttpResponseBadRequest('Bad request.')
    try:
        count = int(count_str)
    except ValueError:
        return HttpResponseBadRequest('Bad request.')
    if count < 1:
        return HttpResponseBadRequest('Bad request.')

    people = sort(people, sortby, reverse=ordering == 'desc')
    people = head(people, count)

    return render(
        request, 'people_list.html', {
            'csvdownload': csvdownload,
            'headers': header(people),
            'people': data(people),
            'has_more': len(people) > count,
            'queryparams': {
                'sortby': sortby,
                'ordering': ordering,
                'count': str(count + 10)
            }
        })
def convertall(table, *args, **kwargs):
    """
    Convenience function to convert all fields in the table using a common
    function or mapping. See also :func:`convert`.

    The ``where`` keyword argument can be given with a callable or expression
    which is evaluated on each row and which should return True if the
    conversion should be applied on that row, else False.

    """

    # TODO don't read the data twice!
    return convert(table, header(table), *args, **kwargs)
Esempio n. 12
0
def convertall(table, *args, **kwargs):
    """
    Convenience function to convert all fields in the table using a common
    function or mapping. See also :func:`convert`.

    The ``where`` keyword argument can be given with a callable or expression
    which is evaluated on each row and which should return True if the
    conversion should be applied on that row, else False.

    """

    # TODO don't read the data twice!
    return convert(table, header(table), *args, **kwargs)
def itertranspose(source):
    hdr = header(source)
    its = [iter(source) for _ in hdr]
    for i in range(len(hdr)):
        yield tuple(row[i] for row in its[i])