def test_file_splitting(self, gcs_hook_mock_class):
        """Test that ndjson is split by approx_max_file_size_bytes param."""

        gcs_hook_mock = gcs_hook_mock_class.return_value
        expected_upload = {
            FILENAME.format(0): b''.join(NDJSON_LINES[:2]),
            FILENAME.format(1): NDJSON_LINES[2],
        }

        def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual('application/json', mime_type)
            self.assertFalse(gzip)
            with open(tmp_filename, 'rb') as file:
                self.assertEqual(expected_upload[obj], file.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op = PostgresToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=FILENAME,
            approx_max_file_size_bytes=len(
                expected_upload[FILENAME.format(0)]))
        op.execute(None)
 def test_init(self):
     """Test PostgresToGoogleCloudStorageOperator instance is properly initialized."""
     op = PostgresToGoogleCloudStorageOperator(
         task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME)
     self.assertEqual(op.task_id, TASK_ID)
     self.assertEqual(op.sql, SQL)
     self.assertEqual(op.bucket, BUCKET)
     self.assertEqual(op.filename, FILENAME)
    def test_exec_success(self, gcs_hook_mock_class):
        """Test the execute function in case where the run is successful."""
        op = PostgresToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            postgres_conn_id=POSTGRES_CONN_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=FILENAME)

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual(FILENAME.format(0), obj)
            self.assertEqual('application/json', mime_type)
            self.assertFalse(gzip)
            with open(tmp_filename, 'rb') as file:
                self.assertEqual(b''.join(NDJSON_LINES), file.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)
    def test_schema_file(self, gcs_hook_mock_class):
        """Test writing schema files."""

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip):  # pylint: disable=unused-argument
            if obj == SCHEMA_FILENAME:
                with open(tmp_filename, 'rb') as file:
                    self.assertEqual(SCHEMA_JSON, file.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op = PostgresToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=FILENAME,
            schema_filename=SCHEMA_FILENAME)
        op.execute(None)

        # once for the file and once for the schema
        self.assertEqual(2, gcs_hook_mock.upload.call_count)
import airflow
from airflow import models
from airflow.operators.postgres_to_gcs import PostgresToGoogleCloudStorageOperator

default_args = {"start_date": airflow.utils.dates.days_ago(1)}

GCS_BUCKET = "postgres_to_gcs_example"
FILENAME = "test_file"
QUERY = "select * from test_table;"

with models.DAG(
        dag_id='example_postgres_to_gcs',
        default_args=default_args,
        schedule_interval=None,  # Override to match your needs
) as dag:
    upload_data = PostgresToGoogleCloudStorageOperator(task_id="get_data",
                                                       sql=QUERY,
                                                       bucket=GCS_BUCKET,
                                                       filename=FILENAME,
                                                       gzip=False)