def test_save_as_csv(self, mock_presto_hook, mock_gcs_hook): def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip): self.assertEqual(BUCKET, bucket) self.assertEqual(FILENAME.format(0), obj) self.assertEqual("text/csv", mime_type) self.assertFalse(gzip) with open(tmp_filename, "rb") as file: self.assertEqual(b"".join(CSV_LINES), file.read()) mock_gcs_hook.return_value.upload.side_effect = _assert_upload mock_cursor = mock_presto_hook.return_value.get_conn.return_value.cursor mock_cursor.return_value.description = [ ("some_num", "INTEGER", None, None, None, None, None), ("some_str", "VARCHAR", None, None, None, None, None), ] mock_cursor.return_value.fetchone.side_effect = [ [42, "mock_row_content_1"], [43, "mock_row_content_2"], [44, "mock_row_content_3"], None, ] op = PrestoToGCSOperator( task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME, export_format="csv", presto_conn_id=PRESTO_CONN_ID, gcp_conn_id=GCP_CONN_ID, impersonation_chain=IMPERSONATION_CHAIN, ) op.execute(None) mock_gcs_hook.return_value.upload.assert_called() mock_presto_hook.assert_called_once_with(presto_conn_id=PRESTO_CONN_ID) mock_gcs_hook.assert_called_once_with( delegate_to=None, gcp_conn_id=GCP_CONN_ID, impersonation_chain=IMPERSONATION_CHAIN, )
def test_save_as_csv_with_file_splitting(self, mock_gcs_hook, mock_presto_hook): """Test that csv is split by approx_max_file_size_bytes param.""" expected_upload = { FILENAME.format(0): b"".join(CSV_LINES[:3]), FILENAME.format(1): b"".join([CSV_LINES[0], CSV_LINES[3]]), } def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip): self.assertEqual(BUCKET, bucket) self.assertEqual("text/csv", mime_type) self.assertFalse(gzip) with open(tmp_filename, "rb") as file: self.assertEqual(expected_upload[obj], file.read()) mock_gcs_hook.return_value.upload.side_effect = _assert_upload mock_cursor = mock_presto_hook.return_value.get_conn.return_value.cursor mock_cursor.return_value.description = [ ("some_num", "INTEGER", None, None, None, None, None), ("some_str", "VARCHAR(20)", None, None, None, None, None), ] mock_cursor.return_value.fetchone.side_effect = [ [42, "mock_row_content_1"], [43, "mock_row_content_2"], [44, "mock_row_content_3"], None, ] op = PrestoToGCSOperator( task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME, approx_max_file_size_bytes=len( expected_upload[FILENAME.format(0)]), export_format="csv", ) op.execute(None) mock_gcs_hook.return_value.upload.assert_called()
def test_save_as_json_with_schema_file(self, mock_gcs_hook, mock_presto_hook): """Test writing schema files.""" def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip): # pylint: disable=unused-argument if obj == SCHEMA_FILENAME: with open(tmp_filename, "rb") as file: self.assertEqual(SCHEMA_JSON, file.read()) mock_gcs_hook.return_value.upload.side_effect = _assert_upload mock_cursor = mock_presto_hook.return_value.get_conn.return_value.cursor mock_cursor.return_value.description = [ ("some_num", "INTEGER", None, None, None, None, None), ("some_str", "VARCHAR", None, None, None, None, None), ] mock_cursor.return_value.fetchone.side_effect = [ [42, "mock_row_content_1"], [43, "mock_row_content_2"], [44, "mock_row_content_3"], None, ] op = PrestoToGCSOperator( task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME, schema_filename=SCHEMA_FILENAME, export_format="csv", presto_conn_id=PRESTO_CONN_ID, gcp_conn_id=GCP_CONN_ID, ) op.execute(None) # once for the file and once for the schema self.assertEqual(2, mock_gcs_hook.return_value.upload.call_count)