Example #1
0
    def test_simple_add(self):
        ''' Test that + preserves order of first summand '''

        columns1 = ColumnList(col_names=['thanks', 'obama'])
        columns2 = ColumnList(col_names=['obama', '2008'])

        self.assertEqual((columns1 + columns2).col_names,
                         ['thanks', 'obama', '2008'])
Example #2
0
    def test_eq_1(self):
        '''
        Test that column lists that have the same names in different 
        order evaluate to 1 under the equality operator
        '''

        columns1 = ColumnList(col_names=['thanks', 'obama'])
        columns2 = ColumnList(col_names=['obama', 'thanks'])
        self.assertEqual((columns1 == columns2), 1)
Example #3
0
    def test_sub(self):
        # ['capital', 'country', 'currency', 'demonym', 'population']
        columns1 = ColumnList(col_names=world_countries_cols())
        columns2 = ColumnList(col_names=['Capital', 'Country'])

        expected_result = ColumnList(
            col_names=['Currency', 'Demonym', 'Population'])

        self.assertEqual(columns1 - columns2, expected_result)
Example #4
0
    def test_expand_table(self, expand_input=True):
        # Input table has several less columns than mock SQL table
        input = world_countries_table().subset('Capital', 'Country')
        sql_table = world_countries_table()

        new_table = _modify_tables(input,
                                   ColumnList(sql_table.col_names,
                                              sql_table.col_types),
                                   expand_input=expand_input)

        # Case insensitive comparison
        new_cols = ColumnList(new_table.col_names, new_table.col_types)
        sql_cols = ColumnList(sql_table.col_names, sql_table.col_types)
        self.assertEqual(new_cols, sql_cols)
Example #5
0
    def test_div(self):
        # Test that "division" works
        columns1 = ColumnList(
            col_names=world_countries_cols(),
            col_types=['text', 'text', 'text', 'bigint', 'boolean'])
        columns2 = ColumnList(col_names=world_countries_cols(),
                              col_types=[
                                  'text', 'bigint', 'text', 'double precision',
                                  'boolean'
                              ])

        expected_result = ColumnList(col_names=['Country', 'Demonym'],
                                     col_types=['text', 'bigint'])
        self.assertEqual(columns1 / columns2, expected_result)
Example #6
0
def get_table_schema(table, conn=None, **kwargs):
    '''
    Get table schema or None if table DNE
     * Returns a ColumnList object
    '''
    
    sql_schema = get_schema(conn=conn).groupby('Table Name')
    sql_schema = sql_schema[table]
    
    if sql_schema['Column Name']:
        return ColumnList(
            col_names=sql_schema['Column Name'],
            col_types=sql_schema['Data Type'])
    else:
        return ColumnList()
Example #7
0
 def test_get_schema_dne(self):
     schema = get_table_schema('sasquatch', conn=self.conn)
     self.assertEqual(schema, ColumnList())
Example #8
0
    def test_lower(self):
        # Assert that comparison is case insensitive
        columns1 = ColumnList(col_names=['thanks', 'OBaMa'])
        columns2 = ColumnList(col_names=['thanks', 'obama'])

        self.assertEqual((columns1 == columns2), 2)
Example #9
0
 def setUp(self):
     self.columns1 = ColumnList(col_names=['thanks', 'OBaMa'])
Example #10
0
class TestColumns(unittest.TestCase):
    ''' Test that this helper data structure works '''
    def setUp(self):
        self.columns1 = ColumnList(col_names=['thanks', 'OBaMa'])

    #################
    # Helper Checks #
    #################

    def test_index(self):
        self.assertEqual(self.columns1.index('thanks'), 0)

    def test_not_in_list(self):
        with self.assertRaises(KeyError):
            self.columns1.index('georgebush')

    #########################
    # Input Checks          #
    #########################

    def test_invalid_pkey(self):
        with self.assertRaises(TypeError):
            self.columns1.p_key = set()

    #############################
    # "Mathematical Operations" #
    #############################

    def test_smaller_input(self):
        # Test that map() function works correctly
        self.assertEqual(self.columns1.map('thanks'), {0: 'thanks'})

    def test_bigger_input(self):
        # Test that map() function doesn't try to map non-existent keys
        self.assertEqual(self.columns1.map('thanks', 'alex', 'jones'),
                         {0: 'thanks'})

    def test_lower(self):
        # Assert that comparison is case insensitive
        columns1 = ColumnList(col_names=['thanks', 'OBaMa'])
        columns2 = ColumnList(col_names=['thanks', 'obama'])

        self.assertEqual((columns1 == columns2), 2)

    def test_simple_add(self):
        ''' Test that + preserves order of first summand '''

        columns1 = ColumnList(col_names=['thanks', 'obama'])
        columns2 = ColumnList(col_names=['obama', '2008'])

        self.assertEqual((columns1 + columns2).col_names,
                         ['thanks', 'obama', '2008'])

    def test_sub(self):
        # ['capital', 'country', 'currency', 'demonym', 'population']
        columns1 = ColumnList(col_names=world_countries_cols())
        columns2 = ColumnList(col_names=['Capital', 'Country'])

        expected_result = ColumnList(
            col_names=['Currency', 'Demonym', 'Population'])

        self.assertEqual(columns1 - columns2, expected_result)

    def test_div(self):
        # Test that "division" works
        columns1 = ColumnList(
            col_names=world_countries_cols(),
            col_types=['text', 'text', 'text', 'bigint', 'boolean'])
        columns2 = ColumnList(col_names=world_countries_cols(),
                              col_types=[
                                  'text', 'bigint', 'text', 'double precision',
                                  'boolean'
                              ])

        expected_result = ColumnList(col_names=['Country', 'Demonym'],
                                     col_types=['text', 'bigint'])
        self.assertEqual(columns1 / columns2, expected_result)

    def test_eq_1(self):
        '''
        Test that column lists that have the same names in different 
        order evaluate to 1 under the equality operator
        '''

        columns1 = ColumnList(col_names=['thanks', 'obama'])
        columns2 = ColumnList(col_names=['obama', 'thanks'])
        self.assertEqual((columns1 == columns2), 1)

    def test_less_than(self):
        ''' Test that column lists are case insensitive w.r.t. < '''

        columns1 = ColumnList(col_names=['thaNks', 'oBaMA'])
        columns2 = ColumnList(col_names=['thanks', 'alot', 'obama'])
        self.assertTrue(columns1 < columns2)

    def test_greater_than(self):
        ''' Test that column lists are case insensitive w.r.t. > '''

        columns1 = ColumnList(col_names=['thanks', 'alot', 'obama'])
        columns2 = ColumnList(col_names=['thaNks', 'oBaMA'])
        self.assertTrue(columns1 > columns2)
Example #11
0
    def test_greater_than(self):
        ''' Test that column lists are case insensitive w.r.t. > '''

        columns1 = ColumnList(col_names=['thanks', 'alot', 'obama'])
        columns2 = ColumnList(col_names=['thaNks', 'oBaMA'])
        self.assertTrue(columns1 > columns2)
Example #12
0
def copy_csv(file, name, encoding=None, header=0, subset=[],
    verbose=True, conn=None, compression=None, skiplines=0, **kwargs):
    '''
    Uploads a CSV (or other delimited-separated values) file to PostgreSQL.
    The delimiter is automatically inferred, so this function can be used to
    upload--for example--tab-delimited text files as well.
    
    **Basic Usage:**
     >>> import pgreaper
     >>> pgreaper.copy_csv('slim_shady.txt',
     ...    dbname='stan_db',
     ...    name='slim_shady',
     ...    username='******',
     ...    password='******'
     ...    host='localhost')
     
    .. note:: The name argument here was unnecessary, because the
       filename without the extension is used as a fallback for the 
       table name.
    
    Postgres Connection Args: Specify one of the following:
        dbname, user, password, and host:
                        Method 1: If any of the parameters in this group are
                        omitted, the values from the default settings are used.
        conn:           psycopg2 Connection
                        Method 2: Manually pass in a connection created with
                        `psycopg2.connect()`
    
    Args:
        file:           str
                        Name of the file
        name:           str
                        Name of the table
        compression:    'gzip', 'bz2', or 'lzma' (default: None)
                        The algorithm used to compress the file
        subset:         list[str] (default: [])
                        A list of column names to upload
        header:         int (default: 0, i.e. first line is the header)
                         * No header should be specified with `header=False` or `header=None`                    
        skiplines:      int (default: 0)
                        How many lines after the header to skip  
    '''
    
    cur = conn.cursor()

    # COPY statement
    if encoding:
        copy_stmt = ("COPY {0} FROM STDIN (FORMAT csv,"
                      "HEADER, DELIMITER ','{1})").format(
            name, ", ENCODING '{}'".format(encoding))
    else:
        copy_stmt = ("COPY {0} FROM STDIN (FORMAT csv,"
                      "HEADER, DELIMITER ',')").format(name)
    
    # Clean the CSV and calculate statistics
    csv_meta = to_csv(filename=file, output=file + '_temp.csv', header=header,
        compression=compression, columns=subset, skiplines=skiplines)
    col_names = csv_meta['col_names']
    schema = csv_meta['dtypes']

    col_types = []
    for count in schema:
        if count['str']:
            col_types.append('text')
        elif count['float']:
            col_types.append('double precision')
        elif count['int']:
            col_types.append('bigint')
        else:
            col_types.append('text')
    
    with zip.open(file + '_temp.csv', mode='rb') as temp_file:
        # Clean column names and create table
        cols = ColumnList(col_names, col_types)
        cur.execute(_create_table(
            name, col_names=cols.sanitize(), col_types=col_types))

        # COPY
        cur.copy_expert(copy_stmt, temp_file)
    
    os.remove(file + '_temp.csv')    
    conn.commit()
    conn.close()