def test_simple_add(self): ''' Test that + preserves order of first summand ''' columns1 = ColumnList(col_names=['thanks', 'obama']) columns2 = ColumnList(col_names=['obama', '2008']) self.assertEqual((columns1 + columns2).col_names, ['thanks', 'obama', '2008'])
def test_eq_1(self): ''' Test that column lists that have the same names in different order evaluate to 1 under the equality operator ''' columns1 = ColumnList(col_names=['thanks', 'obama']) columns2 = ColumnList(col_names=['obama', 'thanks']) self.assertEqual((columns1 == columns2), 1)
def test_sub(self): # ['capital', 'country', 'currency', 'demonym', 'population'] columns1 = ColumnList(col_names=world_countries_cols()) columns2 = ColumnList(col_names=['Capital', 'Country']) expected_result = ColumnList( col_names=['Currency', 'Demonym', 'Population']) self.assertEqual(columns1 - columns2, expected_result)
def test_expand_table(self, expand_input=True): # Input table has several less columns than mock SQL table input = world_countries_table().subset('Capital', 'Country') sql_table = world_countries_table() new_table = _modify_tables(input, ColumnList(sql_table.col_names, sql_table.col_types), expand_input=expand_input) # Case insensitive comparison new_cols = ColumnList(new_table.col_names, new_table.col_types) sql_cols = ColumnList(sql_table.col_names, sql_table.col_types) self.assertEqual(new_cols, sql_cols)
def test_div(self): # Test that "division" works columns1 = ColumnList( col_names=world_countries_cols(), col_types=['text', 'text', 'text', 'bigint', 'boolean']) columns2 = ColumnList(col_names=world_countries_cols(), col_types=[ 'text', 'bigint', 'text', 'double precision', 'boolean' ]) expected_result = ColumnList(col_names=['Country', 'Demonym'], col_types=['text', 'bigint']) self.assertEqual(columns1 / columns2, expected_result)
def get_table_schema(table, conn=None, **kwargs): ''' Get table schema or None if table DNE * Returns a ColumnList object ''' sql_schema = get_schema(conn=conn).groupby('Table Name') sql_schema = sql_schema[table] if sql_schema['Column Name']: return ColumnList( col_names=sql_schema['Column Name'], col_types=sql_schema['Data Type']) else: return ColumnList()
def test_get_schema_dne(self): schema = get_table_schema('sasquatch', conn=self.conn) self.assertEqual(schema, ColumnList())
def test_lower(self): # Assert that comparison is case insensitive columns1 = ColumnList(col_names=['thanks', 'OBaMa']) columns2 = ColumnList(col_names=['thanks', 'obama']) self.assertEqual((columns1 == columns2), 2)
def setUp(self): self.columns1 = ColumnList(col_names=['thanks', 'OBaMa'])
class TestColumns(unittest.TestCase): ''' Test that this helper data structure works ''' def setUp(self): self.columns1 = ColumnList(col_names=['thanks', 'OBaMa']) ################# # Helper Checks # ################# def test_index(self): self.assertEqual(self.columns1.index('thanks'), 0) def test_not_in_list(self): with self.assertRaises(KeyError): self.columns1.index('georgebush') ######################### # Input Checks # ######################### def test_invalid_pkey(self): with self.assertRaises(TypeError): self.columns1.p_key = set() ############################# # "Mathematical Operations" # ############################# def test_smaller_input(self): # Test that map() function works correctly self.assertEqual(self.columns1.map('thanks'), {0: 'thanks'}) def test_bigger_input(self): # Test that map() function doesn't try to map non-existent keys self.assertEqual(self.columns1.map('thanks', 'alex', 'jones'), {0: 'thanks'}) def test_lower(self): # Assert that comparison is case insensitive columns1 = ColumnList(col_names=['thanks', 'OBaMa']) columns2 = ColumnList(col_names=['thanks', 'obama']) self.assertEqual((columns1 == columns2), 2) def test_simple_add(self): ''' Test that + preserves order of first summand ''' columns1 = ColumnList(col_names=['thanks', 'obama']) columns2 = ColumnList(col_names=['obama', '2008']) self.assertEqual((columns1 + columns2).col_names, ['thanks', 'obama', '2008']) def test_sub(self): # ['capital', 'country', 'currency', 'demonym', 'population'] columns1 = ColumnList(col_names=world_countries_cols()) columns2 = ColumnList(col_names=['Capital', 'Country']) expected_result = ColumnList( col_names=['Currency', 'Demonym', 'Population']) self.assertEqual(columns1 - columns2, expected_result) def test_div(self): # Test that "division" works columns1 = ColumnList( col_names=world_countries_cols(), col_types=['text', 'text', 'text', 'bigint', 'boolean']) columns2 = ColumnList(col_names=world_countries_cols(), col_types=[ 'text', 'bigint', 'text', 'double precision', 'boolean' ]) expected_result = ColumnList(col_names=['Country', 'Demonym'], col_types=['text', 'bigint']) self.assertEqual(columns1 / columns2, expected_result) def test_eq_1(self): ''' Test that column lists that have the same names in different order evaluate to 1 under the equality operator ''' columns1 = ColumnList(col_names=['thanks', 'obama']) columns2 = ColumnList(col_names=['obama', 'thanks']) self.assertEqual((columns1 == columns2), 1) def test_less_than(self): ''' Test that column lists are case insensitive w.r.t. < ''' columns1 = ColumnList(col_names=['thaNks', 'oBaMA']) columns2 = ColumnList(col_names=['thanks', 'alot', 'obama']) self.assertTrue(columns1 < columns2) def test_greater_than(self): ''' Test that column lists are case insensitive w.r.t. > ''' columns1 = ColumnList(col_names=['thanks', 'alot', 'obama']) columns2 = ColumnList(col_names=['thaNks', 'oBaMA']) self.assertTrue(columns1 > columns2)
def test_greater_than(self): ''' Test that column lists are case insensitive w.r.t. > ''' columns1 = ColumnList(col_names=['thanks', 'alot', 'obama']) columns2 = ColumnList(col_names=['thaNks', 'oBaMA']) self.assertTrue(columns1 > columns2)
def copy_csv(file, name, encoding=None, header=0, subset=[], verbose=True, conn=None, compression=None, skiplines=0, **kwargs): ''' Uploads a CSV (or other delimited-separated values) file to PostgreSQL. The delimiter is automatically inferred, so this function can be used to upload--for example--tab-delimited text files as well. **Basic Usage:** >>> import pgreaper >>> pgreaper.copy_csv('slim_shady.txt', ... dbname='stan_db', ... name='slim_shady', ... username='******', ... password='******' ... host='localhost') .. note:: The name argument here was unnecessary, because the filename without the extension is used as a fallback for the table name. Postgres Connection Args: Specify one of the following: dbname, user, password, and host: Method 1: If any of the parameters in this group are omitted, the values from the default settings are used. conn: psycopg2 Connection Method 2: Manually pass in a connection created with `psycopg2.connect()` Args: file: str Name of the file name: str Name of the table compression: 'gzip', 'bz2', or 'lzma' (default: None) The algorithm used to compress the file subset: list[str] (default: []) A list of column names to upload header: int (default: 0, i.e. first line is the header) * No header should be specified with `header=False` or `header=None` skiplines: int (default: 0) How many lines after the header to skip ''' cur = conn.cursor() # COPY statement if encoding: copy_stmt = ("COPY {0} FROM STDIN (FORMAT csv," "HEADER, DELIMITER ','{1})").format( name, ", ENCODING '{}'".format(encoding)) else: copy_stmt = ("COPY {0} FROM STDIN (FORMAT csv," "HEADER, DELIMITER ',')").format(name) # Clean the CSV and calculate statistics csv_meta = to_csv(filename=file, output=file + '_temp.csv', header=header, compression=compression, columns=subset, skiplines=skiplines) col_names = csv_meta['col_names'] schema = csv_meta['dtypes'] col_types = [] for count in schema: if count['str']: col_types.append('text') elif count['float']: col_types.append('double precision') elif count['int']: col_types.append('bigint') else: col_types.append('text') with zip.open(file + '_temp.csv', mode='rb') as temp_file: # Clean column names and create table cols = ColumnList(col_names, col_types) cur.execute(_create_table( name, col_names=cols.sanitize(), col_types=col_types)) # COPY cur.copy_expert(copy_stmt, temp_file) os.remove(file + '_temp.csv') conn.commit() conn.close()