Example #1
0
 def test_gcs_init(self, google_client):
     handler = GCSResultHandler(bucket="bob")
     assert handler.bucket == "bob"
     assert handler.credentials_secret == "GOOGLE_APPLICATION_CREDENTIALS"
     assert google_client.called is False
     handler.initialize_client()
     assert google_client.return_value.bucket.call_args[0][0] == "bob"
Example #2
0
 def test_gcs_writes_binary_string(self, google_client):
     blob = MagicMock()
     google_client.return_value.bucket = MagicMock(return_value=MagicMock(
         blob=MagicMock(return_value=blob)))
     handler = GCSResultHandler(bucket="foo")
     handler.write(None)
     assert blob.upload_from_string.called
     assert isinstance(blob.upload_from_string.call_args[0][0], str)
Example #3
0
    def test_gcs_uses_custom_secret_name(self, google_client):
        handler = GCSResultHandler(bucket="foo", credentials_secret="TEST_SECRET")

        with prefect.context(secrets=dict(TEST_SECRET=94611)):
            with set_temporary_config({"cloud.use_local_secrets": True}):
                handler.initialize_client()

        assert google_client.call_args[1]["credentials"] == 94611
Example #4
0
 def test_gcs_writes_to_blob_prefixed_by_date_suffixed_by_prefect(
         self, google_client):
     bucket = MagicMock()
     google_client.return_value.bucket = MagicMock(return_value=bucket)
     handler = GCSResultHandler(bucket="foo")
     handler.write("so-much-data")
     assert bucket.blob.called
     assert bucket.blob.call_args[0][0].startswith(
         pendulum.now("utc").format("Y/M/D"))
     assert bucket.blob.call_args[0][0].endswith("prefect_result")
Example #5
0
 def test_serialize(self):
     handler = GCSResultHandler(bucket="my-bucket",
                                credentials_secret="FOO")
     serialized = ResultHandlerSchema().dump(handler)
     assert serialized["type"] == "GCSResultHandler"
     assert serialized["bucket"] == "my-bucket"
     assert serialized["credentials_secret"] == "FOO"
Example #6
0
    def test_gcs_uses_custom_secret_name(self):
        auth = MagicMock()
        handler = GCSResultHandler(bucket="foo", credentials_secret="TEST_SECRET")

        with prefect.context(secrets=dict(TEST_SECRET=94611)):
            with set_temporary_config({"cloud.use_local_secrets": True}):
                with patch.dict(
                    "sys.modules",
                    {
                        "google.cloud": MagicMock(),
                        "google.oauth2.service_account": auth,
                    },
                ):
                    handler.initialize_client()

        assert auth.Credentials.from_service_account_info.call_args[0][0] == 94611
Example #7
0
    def test_gcs_handler_is_pickleable(self, google_client, monkeypatch):
        class gcs_bucket:
            def __init__(self, *args, **kwargs):
                pass

            def __getstate__(self):
                raise ValueError("I cannot be pickled.")

        handler = GCSResultHandler("foo")
        res = cloudpickle.loads(cloudpickle.dumps(handler))
        assert isinstance(res, GCSResultHandler)
Example #8
0
    def test_roundtrip_never_loads_client(self, monkeypatch):
        schema = ResultHandlerSchema()

        def raise_me(*args, **kwargs):
            raise SyntaxError("oops")

        monkeypatch.setattr(GCSResultHandler, "initialize_client", raise_me)
        handler = schema.load(
            schema.dump(
                GCSResultHandler(bucket="bucket3", credentials_secret="FOO")))
        assert isinstance(handler, GCSResultHandler)
        assert handler.bucket == "bucket3"
        assert handler.credentials_secret == "FOO"
Example #9
0
    def __init__(self,
                 bucket: str,
                 key: str = None,
                 project: str = None) -> None:
        self.flows = dict()  # type: Dict[str, str]
        self._flows = dict()  # type: Dict[str, "Flow"]

        self.bucket = bucket
        self.key = key
        self.project = project

        result_handler = GCSResultHandler(bucket=bucket)
        super().__init__(result_handler=result_handler)
    def test_gcs_handler_is_pickleable(self, monkeypatch):
        class gcs_bucket:
            def __init__(self, *args, **kwargs):
                pass

            def __getstate__(self):
                raise ValueError("I cannot be pickled.")

        client = MagicMock(bucket=gcs_bucket)
        storage = MagicMock(Client=MagicMock(return_value=client))
        with patch.dict(
            "sys.modules",
            {
                "google.cloud": MagicMock(storage=storage),
                "google.oauth2.service_account": MagicMock(),
            },
        ):
            handler = GCSResultHandler("foo")
            res = cloudpickle.loads(cloudpickle.dumps(handler))
            assert isinstance(res, GCSResultHandler)
Example #11
0
 def test_roundtrip(self):
     schema = ResultHandlerSchema()
     handler = schema.load(schema.dump(GCSResultHandler(bucket="bucket3")))
     assert isinstance(handler, GCSResultHandler)
     assert handler.bucket == "bucket3"
Example #12
0
 def test_serialize(self, google_client):
     handler = GCSResultHandler(bucket="my-bucket")
     serialized = ResultHandlerSchema().dump(handler)
     assert serialized["type"] == "GCSResultHandler"
     assert serialized["bucket"] == "my-bucket"
                      parameter_defaults=dict(
                          url='http://www.insidethex.co.uk/')),
        ]),
        storage=Docker(
            # TODO: change to your docker registry:
            #  https://docs.prefect.io/cloud/recipes/configuring_storage.html
            registry_url='szelenka',
            # TODO: 'pin' the exact versions you used on your development machine
            python_dependencies=[
                'requests==2.23.0', 'beautifulsoup4==4.8.2',
                'sqlalchemy==1.3.15'
            ],
        ),
        # TODO: specify how you want to handle results
        #  https://docs.prefect.io/core/concepts/results.html#results-and-result-handlers
        result_handler=GCSResultHandler(bucket='prefect_results')) as flow:
    _url = Parameter("url", default='http://www.insidethex.co.uk/')
    _bypass = Parameter("bypass", default=False, required=False)
    _db_file = Parameter("db_file", default='xfiles_db.sqlite', required=False)

    # scrape the website
    _home_page = retrieve_url(_url)
    _episodes = create_episode_list(base_url=_url,
                                    main_html=_home_page,
                                    bypass=_bypass)
    _episode = retrieve_url.map(_episodes)
    _dialogue = scrape_dialogue.map(_episode)

    # insert into SQLite table
    _db = create_db(filename=_db_file)
    _final = insert_episode.map(episode=_dialogue, tbl=unmapped(_db))
 def test_gcs_init(self, google_client):
     handler = GCSResultHandler(bucket="bob")
     assert handler.bucket == "bob"
     assert google_client.return_value.bucket.call_args[0][0] == "bob"
def test_basic_conversion_gcs_result():
    result_handler = GCSResultHandler(bucket="foo")
    result = ResultHandlerResult.from_result_handler(result_handler)
    assert isinstance(result, GCSResult)
    assert result.bucket == "foo"