def test_cat_with_header(): table1 = (('bar', 'foo'), ('A', 1), ('B', 2)) table2 = (('bar', 'baz'), ('C', True), ('D', False)) actual = cat(table1, header=['A', 'foo', 'B', 'bar', 'C']) expect = (('A', 'foo', 'B', 'bar', 'C'), (None, 1, None, 'A', None), (None, 2, None, 'B', None)) ieq(expect, actual) ieq(expect, actual) actual = cat(table1, table2, header=['A', 'foo', 'B', 'bar', 'C']) expect = (('A', 'foo', 'B', 'bar', 'C'), (None, 1, None, 'A', None), (None, 2, None, 'B', None), (None, None, None, 'C', None), (None, None, None, 'D', None)) ieq(expect, actual) ieq(expect, actual)
def test_cat_dupfields(): table1 = (('foo', 'foo'), (1, 'A'), (2,), (3, 'B', True)) # these cases are pathological, including to confirm expected behaviour, # but user needs to rename fields to get something sensible actual = cat(table1) expect = (('foo', 'foo'), (1, 1), (2, 2), (3, 3)) ieq(expect, actual) table2 = (('foo', 'foo', 'bar'), (4, 'C', True), (5, 'D', False)) actual = cat(table1, table2) expect = (('foo', 'foo', 'bar'), (1, 1, None), (2, 2, None), (3, 3, None), (4, 4, True), (5, 5, False)) ieq(expect, actual)
def test_cat(): table1 = (('foo', 'bar'), (1, 'A'), (2, 'B')) table2 = (('bar', 'baz'), ('C', True), ('D', False)) cat1 = cat(table1, table2, missing=None) expectation = (('foo', 'bar', 'baz'), (1, 'A', None), (2, 'B', None), (None, 'C', True), (None, 'D', False)) ieq(expectation, cat1) # how does cat cope with uneven rows? table3 = (('foo', 'bar', 'baz'), ('A', 1, 2), ('B', '2', '3.4'), (u'B', u'3', u'7.8', True), ('D', 'xyz', 9.0), ('E', None)) cat3 = cat(table3, missing=None) expectation = (('foo', 'bar', 'baz'), ('A', 1, 2), ('B', '2', '3.4'), (u'B', u'3', u'7.8'), ('D', 'xyz', 9.0), ('E', None, None)) ieq(expectation, cat3) # cat more than two tables? cat4 = cat(table1, table2, table3) expectation = (('foo', 'bar', 'baz'), (1, 'A', None), (2, 'B', None), (None, 'C', True), (None, 'D', False), ('A', 1, 2), ('B', '2', '3.4'), (u'B', u'3', u'7.8'), ('D', 'xyz', 9.0), ('E', None, None)) ieq(expectation, cat4)
def test_cat_dupfields(): table1 = (('foo', 'foo'), (1, 'A'), (2, ), (3, 'B', True)) # these cases are pathological, including to confirm expected behaviour, # but user needs to rename fields to get something sensible actual = cat(table1) expect = (('foo', 'foo'), (1, 1), (2, 2), (3, 3)) ieq(expect, actual) table2 = (('foo', 'foo', 'bar'), (4, 'C', True), (5, 'D', False)) actual = cat(table1, table2) expect = (('foo', 'foo', 'bar'), (1, 1, None), (2, 2, None), (3, 3, None), (4, 4, True), (5, 5, False)) ieq(expect, actual)
def fetch_people_table(): planet_fetcher = CachedPlanetFetcher() first_page_response = _fetch_people_page(1).json() total_count = first_page_response['count'] fetched_results = first_page_response['results'] fetched_count = len(fetched_results) remaining_count = total_count - fetched_count remaining_pages = math.ceil(remaining_count / fetched_count) table = fromdicts(fetched_results, header=PEOPLE_HEADER) with ThreadPoolExecutor(max_workers=8) as executor: response_futures = [ executor.submit(_fetch_people_page, page_number) for page_number in range(2, 2 + remaining_pages) ] for future in as_completed(response_futures): page_response = future.result().json() table = cat( table, fromdicts(page_response['results'], header=PEOPLE_HEADER)) table = addfields(table, [('date', lambda rec: datetime.fromisoformat(rec[ 'edited'].replace('Z', '+00:00')).date().isoformat())]) table = cutout(table, 'edited') table = convert( table, 'homeworld', lambda homeworld_url: planet_fetcher.fetch( homeworld_url).json()['name']) return table
def test_mergesort_3(): table1 = (('foo', 'bar'), ('A', 9), ('C', 2), ('D', 10), ('A', 6), ('F', 1)) table2 = (('foo', 'baz'), ('B', 3), ('D', 10), ('A', 10), ('F', 4)) # should be same as concatenate then sort (but more efficient, esp. when # presorted) expect = sort(cat(table1, table2), key='foo', reverse=True) actual = mergesort(table1, table2, key='foo', reverse=True) ieq(expect, actual) ieq(expect, actual) actual = mergesort(sort(table1, key='foo', reverse=True), sort(table2, key='foo', reverse=True), key='foo', reverse=True, presorted=True) ieq(expect, actual) ieq(expect, actual)
def test_cat_empty(): table1 = (('foo', 'bar'), (1, 'A'), (2, 'B')) table2 = (('bar', 'baz'),) expect = (('foo', 'bar', 'baz'), (1, 'A', None), (2, 'B', None)) actual = cat(table1, table2) ieq(expect, actual)
def test_mergesort_4(): table1 = (("foo", "bar", "baz"), (1, "A", True), (2, "B", None), (4, "C", True)) table2 = (("bar", "baz", "quux"), ("A", True, 42.0), ("B", False, 79.3), ("C", False, 12.4)) expect = sort(cat(table1, table2), key="bar") actual = mergesort(table1, table2, key="bar") ieq(expect, actual) ieq(expect, actual)
def test_mergesort_4(): table1 = (('foo', 'bar', 'baz'), (1, 'A', True), (2, 'B', None), (4, 'C', True)) table2 = (('bar', 'baz', 'quux'), ('A', True, 42.0), ('B', False, 79.3), ('C', False, 12.4)) expect = sort(cat(table1, table2), key='bar') actual = mergesort(table1, table2, key='bar') ieq(expect, actual) ieq(expect, actual)
def _append_to_avro_file(test_rows1, test_rows2, test_schema, test_expect=None, print_tables=True): _show__expect_rows(test_rows1, print_tables) _show__expect_rows(test_rows2, print_tables) test_filename = _get_tempfile_path() toavro(test_rows1, test_filename, schema=test_schema) appendavro(test_rows2, test_filename, schema=test_schema) test_actual = fromavro(test_filename) if test_expect is not None: test_expect2 = test_expect else: test_expect2 = cat(test_rows1, test_rows2) _assert_rows_are_equals(test_expect2, test_actual, print_tables)
def test_mergesort_2(): table1 = (("foo", "bar"), ("A", 9), ("C", 2), ("D", 10), ("A", 6), ("F", 1)) table2 = (("foo", "baz"), ("B", 3), ("D", 10), ("A", 10), ("F", 4)) # should be same as concatenate then sort (but more efficient, esp. when # presorted) expect = sort(cat(table1, table2), key="foo") actual = mergesort(table1, table2, key="foo") ieq(expect, actual) ieq(expect, actual) actual = mergesort(sort(table1, key="foo"), sort(table2, key="foo"), key="foo", presorted=True) ieq(expect, actual) ieq(expect, actual)
def test_mergesort_1(): table1 = (('foo', 'bar'), ('A', 6), ('C', 2), ('D', 10), ('A', 9), ('F', 1)) table2 = (('foo', 'bar'), ('B', 3), ('D', 10), ('A', 10), ('F', 4)) # should be same as concatenate then sort (but more efficient, esp. when # presorted) expect = sort(cat(table1, table2)) actual = mergesort(table1, table2) ieq(expect, actual) ieq(expect, actual) actual = mergesort(sort(table1), sort(table2), presorted=True) ieq(expect, actual) ieq(expect, actual)