コード例 #1
0
    def test_custom_table_publish_mixed_type_column(self, mock_session_helper,
                                                    mock_create_custom_table):
        dataframe, custom_redshift_columns = setup_custom_redshift_columns_and_dataframe(
        )
        bucket, key = self.setup_s3()
        partitions = []
        redshift_params = self.setup_redshift_params()
        msh = mock_session_helper(region=redshift_params['region'],
                                  cluster_id=redshift_params['cluster_id'],
                                  host=redshift_params['host'],
                                  port=redshift_params['port'],
                                  db_name=redshift_params['db_name'])

        msh.configure_session_helper()

        dataframe.iat[1, dataframe.columns.get_loc("colA")] = 45

        parq.custom_publish(bucket=bucket,
                            key=key,
                            dataframe=dataframe,
                            partitions=partitions,
                            redshift_params=redshift_params,
                            custom_redshift_columns=custom_redshift_columns)

        mock_create_custom_table.assert_called_once_with(
            redshift_params['table_name'], redshift_params['schema_name'],
            partitions, parq.s3_url(bucket, key), custom_redshift_columns, msh)
コード例 #2
0
    def test_custom_table_publish_null_in_int_column(self, mock_session_helper,
                                                     mock_create_custom_table):
        with get_s3_client() as s3_client:
            dataframe, custom_redshift_columns = setup_custom_redshift_columns_and_dataframe_with_null(
            )
            bucket, key = self.setup_s3(s3_client)
            partitions = []
            redshift_params = self.setup_redshift_params()
            msh = mock_session_helper(region=redshift_params['region'],
                                      cluster_id=redshift_params['cluster_id'],
                                      host=redshift_params['host'],
                                      port=redshift_params['port'],
                                      db_name=redshift_params['db_name'])

            msh.configure_session_helper()

            parq.custom_publish(
                bucket=bucket,
                key=key,
                dataframe=dataframe,
                partitions=partitions,
                redshift_params=redshift_params,
                custom_redshift_columns=custom_redshift_columns)

            mock_create_custom_table.assert_called_once_with(
                redshift_params['table_name'],
                redshift_params['schema_name'], partitions,
                parq.s3_url(bucket, key), custom_redshift_columns, msh)
コード例 #3
0
    def test_table_publish_mixed_type_column(self, mock_session_helper,
                                             mock_create_table):
        dataframe = setup_grouped_dataframe()
        bucket, key = self.setup_s3()
        partitions = []
        redshift_params = self.setup_redshift_params()
        msh = mock_session_helper(region=redshift_params['region'],
                                  cluster_id=redshift_params['cluster_id'],
                                  host=redshift_params['host'],
                                  port=redshift_params['port'],
                                  db_name=redshift_params['db_name'])

        msh.configure_session_helper()

        dataframe.iat[5, dataframe.columns.get_loc("text_col")] = 45

        parq.publish(bucket=bucket,
                     key=key,
                     dataframe=dataframe,
                     partitions=partitions,
                     redshift_params=redshift_params)

        df_types = parq._get_dataframe_datatypes(dataframe, partitions)
        partition_types = parq._get_dataframe_datatypes(
            dataframe, partitions, True)

        mock_create_table.assert_called_once_with(
            redshift_params['table_name'], redshift_params['schema_name'],
            df_types, partition_types, parq.s3_url(bucket, key), msh)
コード例 #4
0
    def test_table_publish(self, mock_session_helper, mock_create_table):
        with get_s3_client() as s3_client:
            dataframe = setup_grouped_dataframe()
            bucket, key = self.setup_s3(s3_client)
            partitions = ["text_col", "int_col", "float_col"]
            redshift_params = self.setup_redshift_params()
            msh = mock_session_helper(region=redshift_params['region'],
                                      cluster_id=redshift_params['cluster_id'],
                                      host=redshift_params['host'],
                                      port=redshift_params['port'],
                                      db_name=redshift_params['db_name'])

            msh.configure_session_helper()
            parq.publish(bucket=bucket,
                         key=key,
                         dataframe=dataframe,
                         partitions=partitions,
                         redshift_params=redshift_params)

            df_types = parq._get_dataframe_datatypes(dataframe, partitions)
            partition_types = parq._get_dataframe_datatypes(
                dataframe, partitions, True)

            mock_create_table.assert_called_once_with(
                redshift_params['table_name'], redshift_params['schema_name'],
                df_types, partition_types, parq.s3_url(bucket, key), msh)