예제 #1
0
    def test_save_as_csv(self, mock_presto_hook, mock_gcs_hook):
        def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual(FILENAME.format(0), obj)
            self.assertEqual("text/csv", mime_type)
            self.assertFalse(gzip)
            with open(tmp_filename, "rb") as file:
                self.assertEqual(b"".join(CSV_LINES), file.read())

        mock_gcs_hook.return_value.upload.side_effect = _assert_upload

        mock_cursor = mock_presto_hook.return_value.get_conn.return_value.cursor

        mock_cursor.return_value.description = [
            ("some_num", "INTEGER", None, None, None, None, None),
            ("some_str", "VARCHAR", None, None, None, None, None),
        ]

        mock_cursor.return_value.fetchone.side_effect = [
            [42, "mock_row_content_1"],
            [43, "mock_row_content_2"],
            [44, "mock_row_content_3"],
            None,
        ]

        op = PrestoToGCSOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=FILENAME,
            export_format="csv",
            presto_conn_id=PRESTO_CONN_ID,
            gcp_conn_id=GCP_CONN_ID,
            impersonation_chain=IMPERSONATION_CHAIN,
        )

        op.execute(None)

        mock_gcs_hook.return_value.upload.assert_called()

        mock_presto_hook.assert_called_once_with(presto_conn_id=PRESTO_CONN_ID)
        mock_gcs_hook.assert_called_once_with(
            delegate_to=None,
            gcp_conn_id=GCP_CONN_ID,
            impersonation_chain=IMPERSONATION_CHAIN,
        )
예제 #2
0
    def test_save_as_csv_with_file_splitting(self, mock_gcs_hook,
                                             mock_presto_hook):
        """Test that csv is split by approx_max_file_size_bytes param."""

        expected_upload = {
            FILENAME.format(0): b"".join(CSV_LINES[:3]),
            FILENAME.format(1): b"".join([CSV_LINES[0], CSV_LINES[3]]),
        }

        def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual("text/csv", mime_type)
            self.assertFalse(gzip)
            with open(tmp_filename, "rb") as file:
                self.assertEqual(expected_upload[obj], file.read())

        mock_gcs_hook.return_value.upload.side_effect = _assert_upload

        mock_cursor = mock_presto_hook.return_value.get_conn.return_value.cursor

        mock_cursor.return_value.description = [
            ("some_num", "INTEGER", None, None, None, None, None),
            ("some_str", "VARCHAR(20)", None, None, None, None, None),
        ]

        mock_cursor.return_value.fetchone.side_effect = [
            [42, "mock_row_content_1"],
            [43, "mock_row_content_2"],
            [44, "mock_row_content_3"],
            None,
        ]

        op = PrestoToGCSOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=FILENAME,
            approx_max_file_size_bytes=len(
                expected_upload[FILENAME.format(0)]),
            export_format="csv",
        )

        op.execute(None)

        mock_gcs_hook.return_value.upload.assert_called()
예제 #3
0
    def test_save_as_json_with_schema_file(self, mock_gcs_hook, mock_presto_hook):
        """Test writing schema files."""

        def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip):  # pylint: disable=unused-argument
            if obj == SCHEMA_FILENAME:
                with open(tmp_filename, "rb") as file:
                    self.assertEqual(SCHEMA_JSON, file.read())

        mock_gcs_hook.return_value.upload.side_effect = _assert_upload

        mock_cursor = mock_presto_hook.return_value.get_conn.return_value.cursor

        mock_cursor.return_value.description = [
            ("some_num", "INTEGER", None, None, None, None, None),
            ("some_str", "VARCHAR", None, None, None, None, None),
        ]

        mock_cursor.return_value.fetchone.side_effect = [
            [42, "mock_row_content_1"],
            [43, "mock_row_content_2"],
            [44, "mock_row_content_3"],
            None,
        ]

        op = PrestoToGCSOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=FILENAME,
            schema_filename=SCHEMA_FILENAME,
            export_format="csv",
            presto_conn_id=PRESTO_CONN_ID,
            gcp_conn_id=GCP_CONN_ID,
        )
        op.execute(None)

        # once for the file and once for the schema
        self.assertEqual(2, mock_gcs_hook.return_value.upload.call_count)