def test_concatenation_table_from_tables(axis, in_memory_pa_table, arrow_file): in_memory_table = InMemoryTable(in_memory_pa_table) concatenation_table = ConcatenationTable.from_blocks(in_memory_table) memory_mapped_table = MemoryMappedTable.from_file(arrow_file) tables = [ in_memory_pa_table, in_memory_table, concatenation_table, memory_mapped_table ] if axis == 0: expected_table = pa.concat_tables([in_memory_pa_table] * len(tables)) else: expected_table = in_memory_pa_table for _ in range(1, len(tables)): for name, col in zip(in_memory_pa_table.column_names, in_memory_pa_table.columns): expected_table = expected_table.append_column(name, col) with assert_arrow_memory_doesnt_increase(): table = ConcatenationTable.from_tables(tables, axis=axis) assert isinstance(table, ConcatenationTable) assert table.table == expected_table # because of consolidation, we end up with 1 InMemoryTable and 1 MemoryMappedTable assert len(table.blocks) == 1 if axis == 1 else 2 assert len(table.blocks[0]) == 1 if axis == 0 else 2 assert axis == 1 or len(table.blocks[1]) == 1 assert isinstance(table.blocks[0][0], InMemoryTable) assert isinstance(table.blocks[1][0] if axis == 0 else table.blocks[0][1], MemoryMappedTable)
def test_concatenation_table_from_tables(axis, in_memory_pa_table, arrow_file): in_memory_table = InMemoryTable(in_memory_pa_table) concatenation_table = ConcatenationTable.from_blocks(in_memory_table) memory_mapped_table = MemoryMappedTable.from_file(arrow_file) tables = [ in_memory_pa_table, in_memory_table, concatenation_table, memory_mapped_table ] if axis == 0: expected_table = pa.concat_tables([in_memory_pa_table] * len(tables)) else: # avoids error due to duplicate column names tables[1:] = [ add_suffix_to_column_names(table, i) for i, table in enumerate(tables[1:], 1) ] expected_table = in_memory_pa_table for table in tables[1:]: for name, col in zip(table.column_names, table.columns): expected_table = expected_table.append_column(name, col) with assert_arrow_memory_doesnt_increase(): table = ConcatenationTable.from_tables(tables, axis=axis) assert isinstance(table, ConcatenationTable) assert table.table == expected_table # because of consolidation, we end up with 1 InMemoryTable and 1 MemoryMappedTable assert len(table.blocks) == 1 if axis == 1 else 2 assert len(table.blocks[0]) == 1 if axis == 0 else 2 assert axis == 1 or len(table.blocks[1]) == 1 assert isinstance(table.blocks[0][0], InMemoryTable) assert isinstance(table.blocks[1][0] if axis == 0 else table.blocks[0][1], MemoryMappedTable)
def test_concatenation_table_from_tables(in_memory_pa_table): in_memory_table = InMemoryTable(in_memory_pa_table) concatenation_table = ConcatenationTable.from_blocks(in_memory_table) with assert_arrow_memory_doesnt_increase(): table = ConcatenationTable.from_tables( [in_memory_pa_table, in_memory_table, concatenation_table]) assert table.table == pa.concat_tables([in_memory_pa_table] * 3) assert isinstance(table, ConcatenationTable) assert len(table.blocks) == 3 assert all(len(tables) == 1 for tables in table.blocks) assert all( isinstance(tables[0], InMemoryTable) for tables in table.blocks) assert all(tables[0].table == in_memory_pa_table for tables in table.blocks)