def test_cutout(): table = (('foo', 'bar', 'baz'), ('A', 1, 2), ('B', '2', '3.4'), (u'B', u'3', u'7.8', True), ('D', 'xyz', 9.0), ('E', None)) cut1 = cutout(table, 'bar', 'baz') expectation = (('foo',), ('A',), ('B',), (u'B',), ('D',), ('E',)) ieq(expectation, cut1) cut2 = cutout(table, 'bar') expectation = (('foo', 'baz'), ('A', 2), ('B', '3.4'), (u'B', u'7.8'), ('D', 9.0), ('E', None)) ieq(expectation, cut2) cut3 = cutout(table, 1) expectation = (('foo', 'baz'), ('A', 2), ('B', '3.4'), (u'B', u'7.8'), ('D', 9.0), ('E', None)) ieq(expectation, cut3)
def counts(request, uuid): try: csvdownload = CSVDownload.objects.get(uuid=uuid) except CSVDownload.DoesNotExist: return HttpResponseNotFound("Not found.") fname = '{0}.csv'.format(csvdownload.uuid) full_fname = os.path.join(settings.CSV_DIR, fname) people = fromcsv(full_fname) columns_str = request.GET.get('columns', '') columns = sorted([c for c in columns_str.split(',') if c.strip()]) for column in columns: if column not in header(people): return HttpResponseBadRequest('Bad request.') if not columns: return redirect(to=reverse('people_list', kwargs={'uuid': uuid})) counts = valuecounts(people, *columns) counts = cutout(counts, 'frequency') return render( request, 'counts.html', { 'csvdownload': csvdownload, 'columns': header(people), 'headers': header(counts), 'counts': data(counts), 'queryparams': { 'columns': columns } })
def fetch_people_table(): planet_fetcher = CachedPlanetFetcher() first_page_response = _fetch_people_page(1).json() total_count = first_page_response['count'] fetched_results = first_page_response['results'] fetched_count = len(fetched_results) remaining_count = total_count - fetched_count remaining_pages = math.ceil(remaining_count / fetched_count) table = fromdicts(fetched_results, header=PEOPLE_HEADER) with ThreadPoolExecutor(max_workers=8) as executor: response_futures = [ executor.submit(_fetch_people_page, page_number) for page_number in range(2, 2 + remaining_pages) ] for future in as_completed(response_futures): page_response = future.result().json() table = cat( table, fromdicts(page_response['results'], header=PEOPLE_HEADER)) table = addfields(table, [('date', lambda rec: datetime.fromisoformat(rec[ 'edited'].replace('Z', '+00:00')).date().isoformat())]) table = cutout(table, 'edited') table = convert( table, 'homeworld', lambda homeworld_url: planet_fetcher.fetch( homeworld_url).json()['name']) return table
def unjoin(table, value, key=None, autoincrement=(1, 1), presorted=False, buffersize=None, tempdir=None, cache=True): """ Split a table into two tables by reversing an inner join. E.g., if the join key is present in the table:: >>> from petl import look, unjoin >>> look(table1) +-------+-------+----------+ | 'foo' | 'bar' | 'baz' | +=======+=======+==========+ | 'A' | 1 | 'apple' | +-------+-------+----------+ | 'B' | 1 | 'apple' | +-------+-------+----------+ | 'C' | 2 | 'orange' | +-------+-------+----------+ >>> table2, table3 = unjoin(table1, 'baz', key='bar') >>> look(table2) +-------+-------+ | 'foo' | 'bar' | +=======+=======+ | 'A' | 1 | +-------+-------+ | 'B' | 1 | +-------+-------+ | 'C' | 2 | +-------+-------+ >>> look(table3) +-------+----------+ | 'bar' | 'baz' | +=======+==========+ | 1 | 'apple' | +-------+----------+ | 2 | 'orange' | +-------+----------+ An integer join key can also be reconstructed, e.g.:: >>> look(table4) +-------+----------+ | 'foo' | 'bar' | +=======+==========+ | 'A' | 'apple' | +-------+----------+ | 'B' | 'apple' | +-------+----------+ | 'C' | 'orange' | +-------+----------+ >>> table5, table6 = unjoin(table4, 'bar') >>> look(table5) +-------+----------+ | 'foo' | 'bar_id' | +=======+==========+ | 'A' | 1 | +-------+----------+ | 'B' | 1 | +-------+----------+ | 'C' | 2 | +-------+----------+ >>> look(table6) +------+----------+ | 'id' | 'bar' | +======+==========+ | 1 | 'apple' | +------+----------+ | 2 | 'orange' | +------+----------+ .. versionadded:: 0.12 """ if key is None: # first sort the table by the value field if presorted: tbl_sorted = table else: tbl_sorted = sort(table, value, buffersize=buffersize, tempdir=tempdir, cache=cache) # on the left, return the original table but with the value field # replaced by an incrementing integer left = ConvertToIncrementingCounterView(tbl_sorted, value, autoincrement) # on the right, return a new table with distinct values from the # given field right = EnumerateDistinctView(tbl_sorted, value, autoincrement) else: # on the left, return distinct rows from the original table # with the value field cut out left = distinct(cutout(table, value)) # on the right, return distinct rows from the original table # with all fields but the key and value cut out right = distinct(cut(table, key, value)) return left, right
def unjoin(table, value, key=None, autoincrement=(1, 1), presorted=False, buffersize=None, tempdir=None, cache=True): """ Split a table into two tables by reversing an inner join. E.g.:: >>> import petl as etl >>> # join key is present in the table ... table1 = (('foo', 'bar', 'baz'), ... ('A', 1, 'apple'), ... ('B', 1, 'apple'), ... ('C', 2, 'orange')) >>> table2, table3 = etl.unjoin(table1, 'baz', key='bar') >>> table2 +-----+-----+ | foo | bar | +=====+=====+ | 'A' | 1 | +-----+-----+ | 'B' | 1 | +-----+-----+ | 'C' | 2 | +-----+-----+ >>> table3 +-----+----------+ | bar | baz | +=====+==========+ | 1 | 'apple' | +-----+----------+ | 2 | 'orange' | +-----+----------+ >>> # an integer join key can also be reconstructed ... table4 = (('foo', 'bar'), ... ('A', 'apple'), ... ('B', 'apple'), ... ('C', 'orange')) >>> table5, table6 = etl.unjoin(table4, 'bar') >>> table5 +-----+--------+ | foo | bar_id | +=====+========+ | 'A' | 1 | +-----+--------+ | 'B' | 1 | +-----+--------+ | 'C' | 2 | +-----+--------+ >>> table6 +----+----------+ | id | bar | +====+==========+ | 1 | 'apple' | +----+----------+ | 2 | 'orange' | +----+----------+ The `autoincrement` parameter controls how an integer join key is reconstructed, and should be a tuple of (`start`, `step`). """ if key is None: # first sort the table by the value field if presorted: tbl_sorted = table else: tbl_sorted = sort(table, value, buffersize=buffersize, tempdir=tempdir, cache=cache) # on the left, return the original table but with the value field # replaced by an incrementing integer left = ConvertToIncrementingCounterView(tbl_sorted, value, autoincrement) # on the right, return a new table with distinct values from the # given field right = EnumerateDistinctView(tbl_sorted, value, autoincrement) else: # on the left, return distinct rows from the original table # with the value field cut out left = distinct(cutout(table, value)) # on the right, return distinct rows from the original table # with all fields but the key and value cut out right = distinct(cut(table, key, value)) return left, right