def test_file_splitting(self, gcs_hook_mock_class): """Test that ndjson is split by approx_max_file_size_bytes param.""" gcs_hook_mock = gcs_hook_mock_class.return_value expected_upload = { FILENAME.format(0): b''.join(NDJSON_LINES[:2]), FILENAME.format(1): NDJSON_LINES[2], } def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip): self.assertEqual(BUCKET, bucket) self.assertEqual('application/json', mime_type) self.assertFalse(gzip) with open(tmp_filename, 'rb') as file: self.assertEqual(expected_upload[obj], file.read()) gcs_hook_mock.upload.side_effect = _assert_upload op = PostgresToGCSOperator( task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME, approx_max_file_size_bytes=len(expected_upload[FILENAME.format(0)]), ) op.execute(None)
def test_exec_success(self, gcs_hook_mock_class): """Test the execute function in case where the run is successful.""" op = PostgresToGCSOperator(task_id=TASK_ID, postgres_conn_id=POSTGRES_CONN_ID, sql=SQL, bucket=BUCKET, filename=FILENAME) gcs_hook_mock = gcs_hook_mock_class.return_value gcs_hook_mock.upload.side_effect = self._assert_uploaded_file_content op.execute(None)
def test_exec_success_server_side_cursor(self, gcs_hook_mock_class): """Test the execute in case where the run is successful while using server side cursor.""" op = PostgresToGCSOperator( task_id=TASK_ID, postgres_conn_id=POSTGRES_CONN_ID, sql=SQL, bucket=BUCKET, filename=FILENAME, use_server_side_cursor=True, cursor_itersize=100, ) gcs_hook_mock = gcs_hook_mock_class.return_value gcs_hook_mock.upload.side_effect = self._assert_uploaded_file_content op.execute(None)
def test_init(self): """Test PostgresToGoogleCloudStorageOperator instance is properly initialized.""" op = PostgresToGCSOperator(task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME) self.assertEqual(op.task_id, TASK_ID) self.assertEqual(op.sql, SQL) self.assertEqual(op.bucket, BUCKET) self.assertEqual(op.filename, FILENAME)
def test_exec_success(self, gcs_hook_mock_class): """Test the execute function in case where the run is successful.""" op = PostgresToGCSOperator( task_id=TASK_ID, postgres_conn_id=POSTGRES_CONN_ID, sql=SQL, bucket=BUCKET, filename=FILENAME ) gcs_hook_mock = gcs_hook_mock_class.return_value def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip): self.assertEqual(BUCKET, bucket) self.assertEqual(FILENAME.format(0), obj) self.assertEqual('application/json', mime_type) self.assertFalse(gzip) with open(tmp_filename, 'rb') as file: self.assertEqual(b''.join(NDJSON_LINES), file.read()) gcs_hook_mock.upload.side_effect = _assert_upload op.execute(None)
def test_schema_file(self, gcs_hook_mock_class): """Test writing schema files.""" gcs_hook_mock = gcs_hook_mock_class.return_value def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip): # pylint: disable=unused-argument if obj == SCHEMA_FILENAME: with open(tmp_filename, 'rb') as file: self.assertEqual(SCHEMA_JSON, file.read()) gcs_hook_mock.upload.side_effect = _assert_upload op = PostgresToGCSOperator( task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME, schema_filename=SCHEMA_FILENAME ) op.execute(None) # once for the file and once for the schema self.assertEqual(2, gcs_hook_mock.upload.call_count)
# Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Example DAG using PostgresToGoogleCloudStorageOperator. """ from airflow import models from airflow.providers.google.cloud.transfers.postgres_to_gcs import PostgresToGCSOperator from airflow.utils.dates import days_ago GCS_BUCKET = "postgres_to_gcs_example" FILENAME = "test_file" SQL_QUERY = "select * from test_table;" with models.DAG( dag_id='example_postgres_to_gcs', schedule_interval=None, # Override to match your needs start_date=days_ago(1), tags=['example'], ) as dag: upload_data = PostgresToGCSOperator( task_id="get_data", sql=SQL_QUERY, bucket=GCS_BUCKET, filename=FILENAME, gzip=False )
from airflow import models from airflow.providers.google.cloud.transfers.postgres_to_gcs import PostgresToGCSOperator PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project") GCS_BUCKET = os.environ.get("GCP_GCS_BUCKET_NAME", "INVALID BUCKET NAME") FILENAME = "test_file" SQL_QUERY = "select * from test_table;" with models.DAG( dag_id='example_postgres_to_gcs', schedule_interval='@once', # Override to match your needs start_date=datetime(2021, 1, 1), catchup=False, tags=['example'], ) as dag: upload_data = PostgresToGCSOperator(task_id="get_data", sql=SQL_QUERY, bucket=GCS_BUCKET, filename=FILENAME, gzip=False) upload_data_server_side_cursor = PostgresToGCSOperator( task_id="get_data_with_server_side_cursor", sql=SQL_QUERY, bucket=GCS_BUCKET, filename=FILENAME, gzip=False, use_server_side_cursor=True, )