def test_file_splitting(self, gcs_hook_mock_class): """Test that ndjson is split by approx_max_file_size_bytes param.""" gcs_hook_mock = gcs_hook_mock_class.return_value expected_upload = { FILENAME.format(0): b''.join(NDJSON_LINES[:2]), FILENAME.format(1): NDJSON_LINES[2], } def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip): self.assertEqual(BUCKET, bucket) self.assertEqual('application/json', mime_type) self.assertFalse(gzip) with open(tmp_filename, 'rb') as file: self.assertEqual(expected_upload[obj], file.read()) gcs_hook_mock.upload.side_effect = _assert_upload op = PostgresToGoogleCloudStorageOperator( task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME, approx_max_file_size_bytes=len( expected_upload[FILENAME.format(0)])) op.execute(None)
def test_init(self): """Test PostgresToGoogleCloudStorageOperator instance is properly initialized.""" op = PostgresToGoogleCloudStorageOperator( task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME) self.assertEqual(op.task_id, TASK_ID) self.assertEqual(op.sql, SQL) self.assertEqual(op.bucket, BUCKET) self.assertEqual(op.filename, FILENAME)
def test_exec_success(self, gcs_hook_mock_class): """Test the execute function in case where the run is successful.""" op = PostgresToGoogleCloudStorageOperator( task_id=TASK_ID, postgres_conn_id=POSTGRES_CONN_ID, sql=SQL, bucket=BUCKET, filename=FILENAME) gcs_hook_mock = gcs_hook_mock_class.return_value def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip): self.assertEqual(BUCKET, bucket) self.assertEqual(FILENAME.format(0), obj) self.assertEqual('application/json', mime_type) self.assertFalse(gzip) with open(tmp_filename, 'rb') as file: self.assertEqual(b''.join(NDJSON_LINES), file.read()) gcs_hook_mock.upload.side_effect = _assert_upload op.execute(None)
def test_schema_file(self, gcs_hook_mock_class): """Test writing schema files.""" gcs_hook_mock = gcs_hook_mock_class.return_value def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip): # pylint: disable=unused-argument if obj == SCHEMA_FILENAME: with open(tmp_filename, 'rb') as file: self.assertEqual(SCHEMA_JSON, file.read()) gcs_hook_mock.upload.side_effect = _assert_upload op = PostgresToGoogleCloudStorageOperator( task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME, schema_filename=SCHEMA_FILENAME) op.execute(None) # once for the file and once for the schema self.assertEqual(2, gcs_hook_mock.upload.call_count)
import airflow from airflow import models from airflow.operators.postgres_to_gcs import PostgresToGoogleCloudStorageOperator default_args = {"start_date": airflow.utils.dates.days_ago(1)} GCS_BUCKET = "postgres_to_gcs_example" FILENAME = "test_file" QUERY = "select * from test_table;" with models.DAG( dag_id='example_postgres_to_gcs', default_args=default_args, schedule_interval=None, # Override to match your needs ) as dag: upload_data = PostgresToGoogleCloudStorageOperator(task_id="get_data", sql=QUERY, bucket=GCS_BUCKET, filename=FILENAME, gzip=False)