def test_to_sql_with_multiple_partitions(self, cursor): df = pd.DataFrame({ "col_int": np.int32([i for i in xrange(10)]), "col_bigint": np.int64([12345 for _ in xrange(10)]), "col_string": ["a" for _ in xrange(5)] + ["b" for _ in xrange(5)], }) table_name = "to_sql_{0}".format(str(uuid.uuid4()).replace("-", "")) location = "{0}{1}/{2}/".format(ENV.s3_staging_dir, S3_PREFIX, table_name) to_sql( df, table_name, cursor._connection, location, schema=SCHEMA, partitions=["col_int", "col_string"], if_exists="fail", compression="snappy", ) cursor.execute("SHOW PARTITIONS {0}".format(table_name)) self.assertEqual( sorted(cursor.fetchall()), [("col_int={0}/col_string=a".format(i), ) for i in xrange(5)] + [("col_int={0}/col_string=b".format(i), ) for i in xrange(5, 10)], ) cursor.execute("SELECT COUNT(*) FROM {0}".format(table_name)) self.assertEqual(cursor.fetchall(), [(10, )])
def test_to_sql_invalid_args(self, cursor): df = pd.DataFrame({"col_int": np.int32([1])}) table_name = "to_sql_{0}".format(str(uuid.uuid4()).replace("-", "")) location = "{0}{1}/{2}/".format(ENV.s3_staging_dir, S3_PREFIX, table_name) # invalid if_exists with self.assertRaises(ValueError): to_sql( df, table_name, cursor._connection, location, schema=SCHEMA, if_exists="foobar", compression="snappy", ) # invalid compression with self.assertRaises(ValueError): to_sql( df, table_name, cursor._connection, location, schema=SCHEMA, if_exists="fail", compression="foobar", )
def test_to_sql_with_index(self, cursor): df = pd.DataFrame({'col_int': np.int32([1])}) table_name = 'to_sql_{0}'.format(str(uuid.uuid4()).replace('-', '')) location = '{0}{1}/{2}/'.format(ENV.s3_staging_dir, S3_PREFIX, table_name) to_sql(df, table_name, cursor._connection, location, schema=SCHEMA, if_exists='fail', compression='snappy', index=True, index_label='col_index') cursor.execute('SELECT * FROM {0}'.format(table_name)) self.assertEqual(cursor.fetchall(), [(0, 1)]) self.assertEqual([(d[0], d[1]) for d in cursor.description], [ ('col_index', 'bigint'), ('col_int', 'integer'), ])
def test_to_sql_with_index(self, cursor): df = pd.DataFrame({"col_int": np.int32([1])}) table_name = "to_sql_{0}".format(str(uuid.uuid4()).replace("-", "")) location = "{0}{1}/{2}/".format(ENV.s3_staging_dir, S3_PREFIX, table_name) to_sql( df, table_name, cursor._connection, location, schema=SCHEMA, if_exists="fail", compression="snappy", index=True, index_label="col_index", ) cursor.execute("SELECT * FROM {0}".format(table_name)) self.assertEqual(cursor.fetchall(), [(0, 1)]) self.assertEqual( [(d[0], d[1]) for d in cursor.description], [("col_index", "bigint"), ("col_int", "integer")], )
def test_to_sql_invalid_args(self, cursor): df = pd.DataFrame({'col_int': np.int32([1])}) table_name = 'to_sql_{0}'.format(str(uuid.uuid4()).replace('-', '')) location = '{0}{1}/{2}/'.format(ENV.s3_staging_dir, S3_PREFIX, table_name) # invalid if_exists with self.assertRaises(ValueError): to_sql(df, table_name, cursor._connection, location, schema=SCHEMA, if_exists='foobar', compression='snappy') # invalid compression with self.assertRaises(ValueError): to_sql(df, table_name, cursor._connection, location, schema=SCHEMA, if_exists='fail', compression='foobar')
def test_to_sql(self, cursor): # TODO Add binary column (After dropping support for Python 2.7) df = pd.DataFrame({ "col_int": np.int32([1]), "col_bigint": np.int64([12345]), "col_float": np.float32([1.0]), "col_double": np.float64([1.2345]), "col_string": ["a"], "col_boolean": np.bool_([True]), "col_timestamp": [datetime(2020, 1, 1, 0, 0, 0)], "col_date": [date(2020, 12, 31)], }) # Explicitly specify column order df = df[[ "col_int", "col_bigint", "col_float", "col_double", "col_string", "col_boolean", "col_timestamp", "col_date", ]] table_name = "to_sql_{0}".format(str(uuid.uuid4()).replace("-", "")) location = "{0}{1}/{2}/".format(ENV.s3_staging_dir, S3_PREFIX, table_name) to_sql( df, table_name, cursor._connection, location, schema=SCHEMA, if_exists="fail", compression="snappy", ) # table already exists with self.assertRaises(OperationalError): to_sql( df, table_name, cursor._connection, location, schema=SCHEMA, if_exists="fail", compression="snappy", ) # replace to_sql( df, table_name, cursor._connection, location, schema=SCHEMA, if_exists="replace", compression="snappy", ) cursor.execute("SELECT * FROM {0}".format(table_name)) self.assertEqual( cursor.fetchall(), [( 1, 12345, 1.0, 1.2345, "a", True, datetime(2020, 1, 1, 0, 0, 0), date(2020, 12, 31), )], ) self.assertEqual( [(d[0], d[1]) for d in cursor.description], [ ("col_int", "integer"), ("col_bigint", "bigint"), ("col_float", "float"), ("col_double", "double"), ("col_string", "varchar"), ("col_boolean", "boolean"), ("col_timestamp", "timestamp"), ("col_date", "date"), ], ) # append to_sql( df, table_name, cursor._connection, location, schema=SCHEMA, if_exists="append", compression="snappy", ) cursor.execute("SELECT * FROM {0}".format(table_name)) self.assertEqual( cursor.fetchall(), [ ( 1, 12345, 1.0, 1.2345, "a", True, datetime(2020, 1, 1, 0, 0, 0), date(2020, 12, 31), ), ( 1, 12345, 1.0, 1.2345, "a", True, datetime(2020, 1, 1, 0, 0, 0), date(2020, 12, 31), ), ], )
def test_to_sql(self, cursor): # TODO Add binary column (Drop support for Python 2.7) df = pd.DataFrame({ 'col_int': np.int32([1]), 'col_bigint': np.int64([12345]), 'col_float': np.float32([1.0]), 'col_double': np.float64([1.2345]), 'col_string': ['a'], 'col_boolean': np.bool_([True]), 'col_timestamp': [datetime(2020, 1, 1, 0, 0, 0)], 'col_date': [date(2020, 12, 31)], }) # Explicitly specify column order df = df[[ 'col_int', 'col_bigint', 'col_float', 'col_double', 'col_string', 'col_boolean', 'col_timestamp', 'col_date' ]] table_name = 'to_sql_{0}'.format(str(uuid.uuid4()).replace('-', '')) location = '{0}{1}/{2}/'.format(ENV.s3_staging_dir, S3_PREFIX, table_name) to_sql(df, table_name, cursor._connection, location, schema=SCHEMA, if_exists='fail', compression='snappy') # table already exists with self.assertRaises(OperationalError): to_sql(df, table_name, cursor._connection, location, schema=SCHEMA, if_exists='fail', compression='snappy') # replace to_sql(df, table_name, cursor._connection, location, schema=SCHEMA, if_exists='replace', compression='snappy') cursor.execute('SELECT * FROM {0}'.format(table_name)) self.assertEqual(cursor.fetchall(), [( 1, 12345, 1.0, 1.2345, 'a', True, datetime(2020, 1, 1, 0, 0, 0), date(2020, 12, 31), )]) self.assertEqual([(d[0], d[1]) for d in cursor.description], [ ('col_int', 'integer'), ('col_bigint', 'bigint'), ('col_float', 'float'), ('col_double', 'double'), ('col_string', 'varchar'), ('col_boolean', 'boolean'), ('col_timestamp', 'timestamp'), ('col_date', 'date'), ]) # append to_sql(df, table_name, cursor._connection, location, schema=SCHEMA, if_exists='append', compression='snappy') cursor.execute('SELECT * FROM {0}'.format(table_name)) self.assertEqual(cursor.fetchall(), [( 1, 12345, 1.0, 1.2345, 'a', True, datetime(2020, 1, 1, 0, 0, 0), date(2020, 12, 31), ), ( 1, 12345, 1.0, 1.2345, 'a', True, datetime(2020, 1, 1, 0, 0, 0), date(2020, 12, 31), )])