def test_file_splitting(self, gcs_hook_mock_class, mssql_hook_mock_class): """Test that ndjson is split by approx_max_file_size_bytes param.""" mssql_hook_mock = mssql_hook_mock_class.return_value mssql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS) mssql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION gcs_hook_mock = gcs_hook_mock_class.return_value expected_upload = { JSON_FILENAME.format(0): b''.join(NDJSON_LINES[:2]), JSON_FILENAME.format(1): NDJSON_LINES[2], } def _assert_upload(bucket, obj, tmp_filename, mime_type=None, gzip=False): assert BUCKET == bucket assert 'application/json' == mime_type assert GZIP == gzip with open(tmp_filename, 'rb') as file: assert expected_upload[obj] == file.read() gcs_hook_mock.upload.side_effect = _assert_upload op = MSSQLToGCSOperator( task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=JSON_FILENAME, approx_max_file_size_bytes=len(expected_upload[JSON_FILENAME.format(0)]), ) op.execute(None)
def test_exec_success_json(self, gcs_hook_mock_class, mssql_hook_mock_class): """Test successful run of execute function for JSON""" op = MSSQLToGCSOperator( task_id=TASK_ID, mssql_conn_id=MSSQL_CONN_ID, sql=SQL, bucket=BUCKET, filename=JSON_FILENAME ) mssql_hook_mock = mssql_hook_mock_class.return_value mssql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS) mssql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION gcs_hook_mock = gcs_hook_mock_class.return_value def _assert_upload(bucket, obj, tmp_filename, mime_type=None, gzip=False): assert BUCKET == bucket assert JSON_FILENAME.format(0) == obj assert 'application/json' == mime_type assert GZIP == gzip with open(tmp_filename, 'rb') as file: assert b''.join(NDJSON_LINES) == file.read() gcs_hook_mock.upload.side_effect = _assert_upload op.execute(None) mssql_hook_mock_class.assert_called_once_with(mssql_conn_id=MSSQL_CONN_ID) mssql_hook_mock.get_conn().cursor().execute.assert_called_once_with(SQL)
def test_schema_file(self, gcs_hook_mock_class, mssql_hook_mock_class): """Test writing schema files.""" mssql_hook_mock = mssql_hook_mock_class.return_value mssql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS) mssql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION gcs_hook_mock = gcs_hook_mock_class.return_value def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip): # pylint: disable=unused-argument if obj == SCHEMA_FILENAME: with open(tmp_filename, 'rb') as file: self.assertEqual(b''.join(SCHEMA_JSON), file.read()) gcs_hook_mock.upload.side_effect = _assert_upload op = MSSQLToGCSOperator( task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=JSON_FILENAME, schema_filename=SCHEMA_FILENAME) op.execute(None) # once for the file and once for the schema self.assertEqual(2, gcs_hook_mock.upload.call_count)
def test_init(self): """Test MySqlToGoogleCloudStorageOperator instance is properly initialized.""" op = MSSQLToGCSOperator(task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=JSON_FILENAME) assert op.task_id == TASK_ID assert op.sql == SQL assert op.bucket == BUCKET assert op.filename == JSON_FILENAME
def test_init(self): """Test MySqlToGoogleCloudStorageOperator instance is properly initialized.""" op = MSSQLToGCSOperator(task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=JSON_FILENAME) self.assertEqual(op.task_id, TASK_ID) self.assertEqual(op.sql, SQL) self.assertEqual(op.bucket, BUCKET) self.assertEqual(op.filename, JSON_FILENAME)
# specific language governing permissions and limitations # under the License. import os from datetime import datetime from airflow import models from airflow.providers.google.cloud.transfers.mssql_to_gcs import MSSQLToGCSOperator GCS_BUCKET = os.environ.get("GCP_GCS_BUCKET", "example-airflow") FILENAME = 'test_file' SQL_QUERY = "USE airflow SELECT * FROM Country;" with models.DAG( 'example_mssql_to_gcs', schedule_interval='@once', start_date=datetime(2021, 12, 1), catchup=False, tags=['example'], ) as dag: # [START howto_operator_mssql_to_gcs] upload = MSSQLToGCSOperator( task_id='mssql_to_gcs', mssql_conn_id='airflow_mssql', sql=SQL_QUERY, bucket=GCS_BUCKET, filename=FILENAME, export_format='csv', ) # [END howto_operator_mssql_to_gcs]