def test_custom_table_publish_mixed_type_column(self, mock_session_helper, mock_create_custom_table): dataframe, custom_redshift_columns = setup_custom_redshift_columns_and_dataframe( ) bucket, key = self.setup_s3() partitions = [] redshift_params = self.setup_redshift_params() msh = mock_session_helper(region=redshift_params['region'], cluster_id=redshift_params['cluster_id'], host=redshift_params['host'], port=redshift_params['port'], db_name=redshift_params['db_name']) msh.configure_session_helper() dataframe.iat[1, dataframe.columns.get_loc("colA")] = 45 parq.custom_publish(bucket=bucket, key=key, dataframe=dataframe, partitions=partitions, redshift_params=redshift_params, custom_redshift_columns=custom_redshift_columns) mock_create_custom_table.assert_called_once_with( redshift_params['table_name'], redshift_params['schema_name'], partitions, parq.s3_url(bucket, key), custom_redshift_columns, msh)
def test_custom_table_publish_null_in_int_column(self, mock_session_helper, mock_create_custom_table): with get_s3_client() as s3_client: dataframe, custom_redshift_columns = setup_custom_redshift_columns_and_dataframe_with_null( ) bucket, key = self.setup_s3(s3_client) partitions = [] redshift_params = self.setup_redshift_params() msh = mock_session_helper(region=redshift_params['region'], cluster_id=redshift_params['cluster_id'], host=redshift_params['host'], port=redshift_params['port'], db_name=redshift_params['db_name']) msh.configure_session_helper() parq.custom_publish( bucket=bucket, key=key, dataframe=dataframe, partitions=partitions, redshift_params=redshift_params, custom_redshift_columns=custom_redshift_columns) mock_create_custom_table.assert_called_once_with( redshift_params['table_name'], redshift_params['schema_name'], partitions, parq.s3_url(bucket, key), custom_redshift_columns, msh)
def test_table_publish_mixed_type_column(self, mock_session_helper, mock_create_table): dataframe = setup_grouped_dataframe() bucket, key = self.setup_s3() partitions = [] redshift_params = self.setup_redshift_params() msh = mock_session_helper(region=redshift_params['region'], cluster_id=redshift_params['cluster_id'], host=redshift_params['host'], port=redshift_params['port'], db_name=redshift_params['db_name']) msh.configure_session_helper() dataframe.iat[5, dataframe.columns.get_loc("text_col")] = 45 parq.publish(bucket=bucket, key=key, dataframe=dataframe, partitions=partitions, redshift_params=redshift_params) df_types = parq._get_dataframe_datatypes(dataframe, partitions) partition_types = parq._get_dataframe_datatypes( dataframe, partitions, True) mock_create_table.assert_called_once_with( redshift_params['table_name'], redshift_params['schema_name'], df_types, partition_types, parq.s3_url(bucket, key), msh)
def test_table_publish(self, mock_session_helper, mock_create_table): with get_s3_client() as s3_client: dataframe = setup_grouped_dataframe() bucket, key = self.setup_s3(s3_client) partitions = ["text_col", "int_col", "float_col"] redshift_params = self.setup_redshift_params() msh = mock_session_helper(region=redshift_params['region'], cluster_id=redshift_params['cluster_id'], host=redshift_params['host'], port=redshift_params['port'], db_name=redshift_params['db_name']) msh.configure_session_helper() parq.publish(bucket=bucket, key=key, dataframe=dataframe, partitions=partitions, redshift_params=redshift_params) df_types = parq._get_dataframe_datatypes(dataframe, partitions) partition_types = parq._get_dataframe_datatypes( dataframe, partitions, True) mock_create_table.assert_called_once_with( redshift_params['table_name'], redshift_params['schema_name'], df_types, partition_types, parq.s3_url(bucket, key), msh)