예제 #1
0
 def test_to_sql_with_multiple_partitions(self, cursor):
     df = pd.DataFrame({
         "col_int":
         np.int32([i for i in xrange(10)]),
         "col_bigint":
         np.int64([12345 for _ in xrange(10)]),
         "col_string": ["a" for _ in xrange(5)] + ["b" for _ in xrange(5)],
     })
     table_name = "to_sql_{0}".format(str(uuid.uuid4()).replace("-", ""))
     location = "{0}{1}/{2}/".format(ENV.s3_staging_dir, S3_PREFIX,
                                     table_name)
     to_sql(
         df,
         table_name,
         cursor._connection,
         location,
         schema=SCHEMA,
         partitions=["col_int", "col_string"],
         if_exists="fail",
         compression="snappy",
     )
     cursor.execute("SHOW PARTITIONS {0}".format(table_name))
     self.assertEqual(
         sorted(cursor.fetchall()),
         [("col_int={0}/col_string=a".format(i), )
          for i in xrange(5)] + [("col_int={0}/col_string=b".format(i), )
                                 for i in xrange(5, 10)],
     )
     cursor.execute("SELECT COUNT(*) FROM {0}".format(table_name))
     self.assertEqual(cursor.fetchall(), [(10, )])
예제 #2
0
 def test_to_sql_invalid_args(self, cursor):
     df = pd.DataFrame({"col_int": np.int32([1])})
     table_name = "to_sql_{0}".format(str(uuid.uuid4()).replace("-", ""))
     location = "{0}{1}/{2}/".format(ENV.s3_staging_dir, S3_PREFIX,
                                     table_name)
     # invalid if_exists
     with self.assertRaises(ValueError):
         to_sql(
             df,
             table_name,
             cursor._connection,
             location,
             schema=SCHEMA,
             if_exists="foobar",
             compression="snappy",
         )
     # invalid compression
     with self.assertRaises(ValueError):
         to_sql(
             df,
             table_name,
             cursor._connection,
             location,
             schema=SCHEMA,
             if_exists="fail",
             compression="foobar",
         )
예제 #3
0
 def test_to_sql_with_index(self, cursor):
     df = pd.DataFrame({'col_int': np.int32([1])})
     table_name = 'to_sql_{0}'.format(str(uuid.uuid4()).replace('-', ''))
     location = '{0}{1}/{2}/'.format(ENV.s3_staging_dir, S3_PREFIX,
                                     table_name)
     to_sql(df,
            table_name,
            cursor._connection,
            location,
            schema=SCHEMA,
            if_exists='fail',
            compression='snappy',
            index=True,
            index_label='col_index')
     cursor.execute('SELECT * FROM {0}'.format(table_name))
     self.assertEqual(cursor.fetchall(), [(0, 1)])
     self.assertEqual([(d[0], d[1]) for d in cursor.description], [
         ('col_index', 'bigint'),
         ('col_int', 'integer'),
     ])
예제 #4
0
 def test_to_sql_with_index(self, cursor):
     df = pd.DataFrame({"col_int": np.int32([1])})
     table_name = "to_sql_{0}".format(str(uuid.uuid4()).replace("-", ""))
     location = "{0}{1}/{2}/".format(ENV.s3_staging_dir, S3_PREFIX,
                                     table_name)
     to_sql(
         df,
         table_name,
         cursor._connection,
         location,
         schema=SCHEMA,
         if_exists="fail",
         compression="snappy",
         index=True,
         index_label="col_index",
     )
     cursor.execute("SELECT * FROM {0}".format(table_name))
     self.assertEqual(cursor.fetchall(), [(0, 1)])
     self.assertEqual(
         [(d[0], d[1]) for d in cursor.description],
         [("col_index", "bigint"), ("col_int", "integer")],
     )
예제 #5
0
 def test_to_sql_invalid_args(self, cursor):
     df = pd.DataFrame({'col_int': np.int32([1])})
     table_name = 'to_sql_{0}'.format(str(uuid.uuid4()).replace('-', ''))
     location = '{0}{1}/{2}/'.format(ENV.s3_staging_dir, S3_PREFIX,
                                     table_name)
     # invalid if_exists
     with self.assertRaises(ValueError):
         to_sql(df,
                table_name,
                cursor._connection,
                location,
                schema=SCHEMA,
                if_exists='foobar',
                compression='snappy')
     # invalid compression
     with self.assertRaises(ValueError):
         to_sql(df,
                table_name,
                cursor._connection,
                location,
                schema=SCHEMA,
                if_exists='fail',
                compression='foobar')
예제 #6
0
    def test_to_sql(self, cursor):
        # TODO Add binary column (After dropping support for Python 2.7)
        df = pd.DataFrame({
            "col_int": np.int32([1]),
            "col_bigint": np.int64([12345]),
            "col_float": np.float32([1.0]),
            "col_double": np.float64([1.2345]),
            "col_string": ["a"],
            "col_boolean": np.bool_([True]),
            "col_timestamp": [datetime(2020, 1, 1, 0, 0, 0)],
            "col_date": [date(2020, 12, 31)],
        })
        # Explicitly specify column order
        df = df[[
            "col_int",
            "col_bigint",
            "col_float",
            "col_double",
            "col_string",
            "col_boolean",
            "col_timestamp",
            "col_date",
        ]]
        table_name = "to_sql_{0}".format(str(uuid.uuid4()).replace("-", ""))
        location = "{0}{1}/{2}/".format(ENV.s3_staging_dir, S3_PREFIX,
                                        table_name)
        to_sql(
            df,
            table_name,
            cursor._connection,
            location,
            schema=SCHEMA,
            if_exists="fail",
            compression="snappy",
        )
        # table already exists
        with self.assertRaises(OperationalError):
            to_sql(
                df,
                table_name,
                cursor._connection,
                location,
                schema=SCHEMA,
                if_exists="fail",
                compression="snappy",
            )
        # replace
        to_sql(
            df,
            table_name,
            cursor._connection,
            location,
            schema=SCHEMA,
            if_exists="replace",
            compression="snappy",
        )

        cursor.execute("SELECT * FROM {0}".format(table_name))
        self.assertEqual(
            cursor.fetchall(),
            [(
                1,
                12345,
                1.0,
                1.2345,
                "a",
                True,
                datetime(2020, 1, 1, 0, 0, 0),
                date(2020, 12, 31),
            )],
        )
        self.assertEqual(
            [(d[0], d[1]) for d in cursor.description],
            [
                ("col_int", "integer"),
                ("col_bigint", "bigint"),
                ("col_float", "float"),
                ("col_double", "double"),
                ("col_string", "varchar"),
                ("col_boolean", "boolean"),
                ("col_timestamp", "timestamp"),
                ("col_date", "date"),
            ],
        )

        # append
        to_sql(
            df,
            table_name,
            cursor._connection,
            location,
            schema=SCHEMA,
            if_exists="append",
            compression="snappy",
        )
        cursor.execute("SELECT * FROM {0}".format(table_name))
        self.assertEqual(
            cursor.fetchall(),
            [
                (
                    1,
                    12345,
                    1.0,
                    1.2345,
                    "a",
                    True,
                    datetime(2020, 1, 1, 0, 0, 0),
                    date(2020, 12, 31),
                ),
                (
                    1,
                    12345,
                    1.0,
                    1.2345,
                    "a",
                    True,
                    datetime(2020, 1, 1, 0, 0, 0),
                    date(2020, 12, 31),
                ),
            ],
        )
예제 #7
0
    def test_to_sql(self, cursor):
        # TODO Add binary column (Drop support for Python 2.7)
        df = pd.DataFrame({
            'col_int': np.int32([1]),
            'col_bigint': np.int64([12345]),
            'col_float': np.float32([1.0]),
            'col_double': np.float64([1.2345]),
            'col_string': ['a'],
            'col_boolean': np.bool_([True]),
            'col_timestamp': [datetime(2020, 1, 1, 0, 0, 0)],
            'col_date': [date(2020, 12, 31)],
        })
        # Explicitly specify column order
        df = df[[
            'col_int', 'col_bigint', 'col_float', 'col_double', 'col_string',
            'col_boolean', 'col_timestamp', 'col_date'
        ]]
        table_name = 'to_sql_{0}'.format(str(uuid.uuid4()).replace('-', ''))
        location = '{0}{1}/{2}/'.format(ENV.s3_staging_dir, S3_PREFIX,
                                        table_name)
        to_sql(df,
               table_name,
               cursor._connection,
               location,
               schema=SCHEMA,
               if_exists='fail',
               compression='snappy')
        # table already exists
        with self.assertRaises(OperationalError):
            to_sql(df,
                   table_name,
                   cursor._connection,
                   location,
                   schema=SCHEMA,
                   if_exists='fail',
                   compression='snappy')
        # replace
        to_sql(df,
               table_name,
               cursor._connection,
               location,
               schema=SCHEMA,
               if_exists='replace',
               compression='snappy')

        cursor.execute('SELECT * FROM {0}'.format(table_name))
        self.assertEqual(cursor.fetchall(), [(
            1,
            12345,
            1.0,
            1.2345,
            'a',
            True,
            datetime(2020, 1, 1, 0, 0, 0),
            date(2020, 12, 31),
        )])
        self.assertEqual([(d[0], d[1]) for d in cursor.description], [
            ('col_int', 'integer'),
            ('col_bigint', 'bigint'),
            ('col_float', 'float'),
            ('col_double', 'double'),
            ('col_string', 'varchar'),
            ('col_boolean', 'boolean'),
            ('col_timestamp', 'timestamp'),
            ('col_date', 'date'),
        ])

        # append
        to_sql(df,
               table_name,
               cursor._connection,
               location,
               schema=SCHEMA,
               if_exists='append',
               compression='snappy')
        cursor.execute('SELECT * FROM {0}'.format(table_name))
        self.assertEqual(cursor.fetchall(), [(
            1,
            12345,
            1.0,
            1.2345,
            'a',
            True,
            datetime(2020, 1, 1, 0, 0, 0),
            date(2020, 12, 31),
        ),
                                             (
                                                 1,
                                                 12345,
                                                 1.0,
                                                 1.2345,
                                                 'a',
                                                 True,
                                                 datetime(2020, 1, 1, 0, 0, 0),
                                                 date(2020, 12, 31),
                                             )])