def test_find_artifact(self):
        backend = LocalArtifactBackend(config={"path": "./test_storage/"})
        artifact = Artifact(self.stage_config)
        payload = "SHRIM"
        artifact.item = Item(payload="SHRIM")
        backend.save_artifact(artifact)

        loaded_artifact = backend._find_cached_artifact(artifact)

        # Ensure that we found the artifact
        self.assertNotEqual(None, loaded_artifact)

        # Ensure that the artifact doesn't have a payload
        self.assertNotEqual(None, loaded_artifact.item)
        self.assertEqual(None, loaded_artifact.item.payload)

        # Ensure that meta properties are correctly set on artifact
        self.assertEqual(loaded_artifact._specific_hash,
                         artifact._specific_hash)
        self.assertEqual(loaded_artifact._dependency_hash,
                         artifact._dependency_hash)
        self.assertEqual(loaded_artifact._definition_hash,
                         artifact._definition_hash)
        self.assertEqual(loaded_artifact._pipeline_stage,
                         artifact._pipeline_stage)
        self.assertEqual(loaded_artifact.item.type, artifact.item.type)
    def test_pipeline_stage_status(self):
        backend = LocalArtifactBackend(config={"path": "./test_storage/"})
        artifact = Artifact(self.stage_config)
        payload = "SHRIM"
        artifact.item = Item(payload=payload)

        status = backend.pipeline_stage_run_status(self.stage_config,
                                                   artifact._dependency_hash)
        self.assertEqual(status, STAGE_DOES_NOT_EXIST)

        backend.save_artifact(artifact)

        status = backend.pipeline_stage_run_status(self.stage_config,
                                                   artifact._dependency_hash)
        self.assertEqual(status, STAGE_IN_PROGRESS)

        backend.log_pipeline_stage_run_complete(self.stage_config,
                                                artifact._dependency_hash)
        status = backend.pipeline_stage_run_status(self.stage_config,
                                                   artifact._dependency_hash)
        self.assertEqual(status, STAGE_COMPLETE)

        meta = backend._get_pipeline_stage_run_meta(self.stage_config,
                                                    artifact._dependency_hash)

        self.assertEqual(len(meta['artifacts']), 1)
Example #3
0
    def test_metadata_from_dict(self):
        stage_a = PipelineStageConfig('some_name', {
            "A": 1,
            "B": 2,
            "type": "ExecutorPipelineStage"
        })
        art_a = Artifact(stage_a)
        d = {
            "antecedents": {},
            "creation_time": 124566722.3,
            "definition_hash": "dac9630aec642a428cd73f4be0a03569",
            "specific_hash": "bc1687bbb3b97214d46b7c30ab307cc1",
            "dependency_hash": "ecad5fc98abf66565e009155f5e57dda",
            "pipeline_stage": "some_stage",
            "item": {
                "meta": {
                    "loss": 0.2
                },
                "tags": ["my_pipeline_run"],
                "type": "my_item_type"
            }
        }
        art_a.meta_from_dict(d)

        for prop in d:
            if prop == "item":
                for iprop in d['item']:
                    value = getattr(art_a.item, iprop)
                    self.assertEqual(d['item'][iprop], value)
            else:
                value = getattr(art_a, "_" + prop)
                self.assertEqual(d[prop], value)
Example #4
0
    def find_pipeline_stage_run_artifacts(self, stage_config,
                                          dependency_hash):
        """
        Finds all artifacts for a given pipeline run, loading their
        metadata.
        """
        # Update pipeline stage metaa
        stage_run_key = {
            'stage_config_hash': stage_config.hash(),
            'dependency_hash': dependency_hash,
        }

        response = self._stage_run_table.get_item(Key=stage_run_key)
        if 'Item' not in response:
            return None
        else:
            res = []
            meta = json.loads(response['Item']['metadata'])
            for obj in meta['artifacts']:
                art = Artifact(stage_config)
                art.item.type = obj['type']
                art._specific_hash = obj['specific_hash']
                art._dependency_hash = dependency_hash
                art._definition_hash = stage_config.hash()
                res.append(self._find_cached_artifact(art))
            return res
    def test_load_artifact(self):
        backend = LocalArtifactBackend(config={"path": "./test_storage/"})
        artifact = Artifact(self.stage_config)
        payload = "SHRIM"
        artifact.item = Item(payload="SHRIM")
        backend.save_artifact(artifact)

        loaded_artifact = backend.load_artifact(artifact)
        self.assertEqual(loaded_artifact.item.payload, artifact.item.payload)
Example #6
0
 def test_generate_metadata(self):
     stage_a = PipelineStageConfig('some_name', {
         "A": 1,
         "B": 2,
         "type": "ExecutorPipelineStage"
     })
     art_a = Artifact(stage_a)
     d = art_a.meta_to_dict()
     for m in art_a._meta_properties:
         if m not in d:
             self.fail()
Example #7
0
 def test_stage_definition_hash_uniqueness(self):
     stage_a = PipelineStageConfig('some_name', {
         "foo": "bar",
         "type": "ExecutorPipelineStage"
     })
     stage_b = PipelineStageConfig('some_name', {
         "foo": "quux",
         "type": "ExecutorPipelineStage"
     })
     art_a = Artifact(stage_a)
     art_b = Artifact(stage_b)
     self.assertNotEqual(art_a._definition_hash, art_b._definition_hash)
Example #8
0
 def test_metadata_from_bad_dict(self):
     stage_a = PipelineStageConfig('some_name', {
         "A": 1,
         "B": 2,
         "type": "ExecutorPipelineStage"
     })
     art_a = Artifact(stage_a)
     try:
         art_a.meta_from_dict({})
         self.fail()
     except InvalidArtifactMetadataError:
         pass
Example #9
0
 def test_stage_definition_hash_idempotence(self):
     stage_a = PipelineStageConfig('some_name', {
         "A": 1,
         "B": 2,
         "type": "ExecutorPipelineStage"
     })
     stage_b = PipelineStageConfig('some_name', {
         "B": 2,
         "A": 1,
         "type": "ExecutorPipelineStage"
     })
     art_a = Artifact(stage_a)
     art_b = Artifact(stage_b)
     self.assertEqual(art_a._definition_hash, art_b._definition_hash)
    def test_pipeline_stage_run_meta(self):
        backend = LocalArtifactBackend(config={"path": "./test_storage/"})
        artifact = Artifact(self.stage_config)
        payload = "SHRIM"
        artifact.item = Item(payload=payload)
        backend.save_artifact(artifact)

        backend.log_pipeline_stage_run_complete(self.stage_config,
                                                artifact._dependency_hash)

        arts = backend.find_pipeline_stage_run_artifacts(
            self.stage_config, artifact._dependency_hash)
        self.assertEqual(len(arts), 1)
        self.assertEqual(arts[0].get_uid(), artifact.get_uid())
Example #11
0
        async def process_loop(executor, stage, input_artifacts):
            exit_loop = False
            while not exit_loop:
                await asyncio.sleep(2.0)
                for message in executor._task_queue.receive_messages(
                        MessageAttributeNames=[
                            'stage_config_hash', 'dependency_hash'
                        ]):
                    print("Retrieved message")
                    print(message.body)
                    print(message.message_attributes)
                    if message.message_attributes is None:
                        self.assertEqual(0, "Message attributes absent")

                    m_config_hash = message.message_attributes.\
                        get('stage_config_hash').get('StringValue')
                    m_dependency_hash = message.message_attributes.\
                        get('dependency_hash').get('StringValue')
                    config_hash = stage._config.hash()
                    dependency_hash = Artifact.dependency_hash(input_artifacts)

                    self.assertEqual(config_hash, m_config_hash)
                    self.assertEqual(dependency_hash, m_dependency_hash)
                    message.delete()
                    exit_loop = True
            for task in asyncio.Task.all_tasks():
                task.cancel()
            raise CancelledError
Example #12
0
 def _yield_artifact(self, artifact_name):
     artifact_path = os.path.join(os.getcwd(), self._root, artifact_name)
     if self.read_content:
         with open(artifact_path, 'rb') as f:
             art = Artifact(self._stage_config)
             art.item.payload = f.read()
             return art
     return artifact_path
Example #13
0
    def find_pipeline_stage_run_artifacts(self, stage_config, dependency_hash):
        """
        Finds all artifacts for a given pipeline run.
        """
        meta = self._get_pipeline_stage_run_meta(stage_config, dependency_hash)

        if 'artifacts' not in meta:
            return []
        else:
            res = []
            for uid in meta['artifacts']:
                artDict = meta['artifacts'][uid]
                art = Artifact(stage_config)
                art.item.type = artDict['item_type']
                art._specific_hash = artDict['specific_hash']
                res.append(self._find_cached_artifact(art))
            return res
Example #14
0
    def _yield_artifact(self):
        artifact_path = os.path.join(os.getcwd(), self._path)
        content = ""
        with open(artifact_path, 'r') as f:
            content = f.read()

        art = Artifact(self._stage_config)
        art.item.payload = content
        return art
Example #15
0
 def test_save_missing_payload(self):
     artifact = Artifact(self.stage_config)
     try:
         self._default_backend.save_artifact(artifact)
         self.assertEqual(0, "The above line should fail " +
                          "due to the artifact having no payload")
         self.fail()
     except ArtifactMissingPayloadError:
         pass
Example #16
0
    def _sorted_artifacts(self, artifact):
        """
        Returns a sorted list of artifacts, based upon pruning ordering
        """
        item_meta = self._load_item_meta(artifact._pipeline_stage,
                                         artifact.item.type)

        result = []
        for k in item_meta:
            result.append(item_meta[k])
        sorted_metadata = sorted(result, key=lambda x: x["creation_time"])

        sorted_artifacts = []
        for x in sorted_metadata:
            a = Artifact(artifact._config, artifact.item.type)
            a.meta_from_dict(x)
            sorted_artifacts.append(a)

        return sorted_artifacts
 def test_save_missing_payload(self):
     backend = LocalArtifactBackend(config={"path": "./test_storage/"})
     artifact = Artifact(self.stage_config)
     try:
         backend.save_artifact(artifact)
         self.assertEqual(
             0, "The above line should fail " +
             "due to the artifact having no payload")
         self.fail()
     except ArtifactMissingPayloadError:
         pass
Example #18
0
    def test_load_artifact_from_s3(self):
        backend = self._default_backend
        artifact = Artifact(self.stage_config)
        payload = "SHRIM"
        artifact.item = Item(payload=payload)
        backend.save_artifact(artifact)

        # Now we'll delete the local artifact cache, forcing retrieval from S3
        path = backend._localArtifactBackend.path
        for root, dirs, files in os.walk(path, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
            for name in dirs:
                os.rmdir(os.path.join(root, name))
        distutils.dir_util.mkpath(path)

        loaded_artifact = backend.load_artifact(artifact)
        self.assertEqual(loaded_artifact.item.payload.decode('utf-8'), payload)
        self.assertEqual(True, loaded_artifact._loaded_from_s3_cache)

        self.cleanup_test_tables(self._default_backend)
Example #19
0
    async def _run_job(self, job):
        # Get stage from pipeline
        pf = PipelineStageFactory()
        config = PipelineStageConfig(job['stage_name'], job['stage_config'])
        stage = pf.create_pipeline_stage(config)

        # Load artifact payloads from cache
        loaded_artifacts = []
        for artifact in job['artifacts']:
            art_obj = Artifact(stage._config)
            art_obj.meta_from_dict(artifact)
            print(art_obj._pipeline_stage)
            loaded = self._backend.load_artifact(art_obj)
            if loaded is None:
                raise Exception("Could not find payload for artifact")
            loaded_artifacts.append(loaded)

        # Execute the task
        exec_task = self._executor.create_task(stage, loaded_artifacts)
        result = await exec_task.generate_artifacts()
        return result
Example #20
0
    async def _run_job(self, job):
        # Get stage from pipeline
        pf = PipelineStageFactory()
        config = PipelineStageConfig(job['stage_name'], job['stage_config'])
        stage = pf.create_pipeline_stage(config)

        # Load input artifact payloads from cache
        loaded_artifacts = []
        for artifact in job['artifacts']:
            art_obj = Artifact(stage._config)
            art_obj.meta_from_dict(artifact)
            loaded = self._backend.load_artifact(art_obj)
            if loaded is None:
                self._log("Could not find payload for artifact")
                raise Exception("Could not find payload for artifact")
            loaded_artifacts.append(loaded)

        # Execute the task
        exec_task = self._executor.create_task(stage, loaded_artifacts)
        result = await exec_task.generate_artifacts()

        for art in result:
            art._creation_time = float(time.time())
            art._dependency_hash = Artifact.dependency_hash(loaded_artifacts)
            self._backend.save_artifact(art)
        self._backend.log_pipeline_stage_run_complete(
            config, Artifact.dependency_hash(loaded_artifacts))

        return result
Example #21
0
    def test_create_tasks(self):
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        executor = RemoteSQSExecutor(
            aws_profile="testing",
            task_queue_name=self.test_queue_task_name,
            result_queue_name=self.test_queue_result_name,
            loop=loop)

        pf = PipelineStageFactory()
        stage = pf.create_pipeline_stage(self.stage_config)
        input_artifacts = [Artifact(self.stage_config)]
        executor.create_task(stage, input_artifacts)

        async def process_loop(executor, stage, input_artifacts):
            exit_loop = False
            while not exit_loop:
                await asyncio.sleep(2.0)
                for message in executor._task_queue.receive_messages(
                        MessageAttributeNames=[
                            'stage_config_hash', 'dependency_hash'
                        ]):
                    print("Retrieved message")
                    print(message.body)
                    print(message.message_attributes)
                    if message.message_attributes is None:
                        self.assertEqual(0, "Message attributes absent")

                    m_config_hash = message.message_attributes.\
                        get('stage_config_hash').get('StringValue')
                    m_dependency_hash = message.message_attributes.\
                        get('dependency_hash').get('StringValue')
                    config_hash = stage._config.hash()
                    dependency_hash = Artifact.dependency_hash(input_artifacts)

                    self.assertEqual(config_hash, m_config_hash)
                    self.assertEqual(dependency_hash, m_dependency_hash)
                    message.delete()
                    exit_loop = True
            for task in asyncio.Task.all_tasks():
                task.cancel()
            raise CancelledError

        try:
            loop.run_until_complete(
                asyncio.wait([
                    executor._process_queue(),
                    process_loop(executor, stage, input_artifacts)
                ]))
        except CancelledError:
            print('CancelledError raised: closing event loop.')
Example #22
0
    async def _process_queue(self):
        while True:
            task = await self._queue.get()
            self._log('Acquired Task: %s with %d inputs' %
                      (task._stage._config.name, len(task._input_artifacts)))

            # Push task to queue
            config_hash = task._stage._config.hash()
            dependency_hash = Artifact.dependency_hash(task._input_artifacts)
            self._queue_push(task, config_hash, dependency_hash)

            # Wait until task is complete
            message = await self._await_result(config_hash, dependency_hash)
            result = message.body
            message.delete()
            self._complete_task(task, config_hash, dependency_hash)
Example #23
0
 def _get_cached_artifacts(self, stage_name, input_artifacts, backend):
     """
     Attempts to retrieve cached artifacts for the stage run,
     identified uniquely by its definition and the hash of its
     input artifacts.
     """
     stage = self._stages[stage_name]
     dependency_hash = Artifact.dependency_hash(input_artifacts)
     status = backend.pipeline_stage_run_status(stage, dependency_hash)
     if status == STAGE_COMPLETE or status == STAGE_IN_PROGRESS:
         cached_arts = backend.find_pipeline_stage_run_artifacts(
             stage._config, dependency_hash)
         self._log("Loaded %d cached artifacts for stage %s" %\
                   (len(cached_arts), stage_name))
         loaded_arts = []
         for art in cached_arts:
             loaded = backend.load_artifact(art)
             loaded._loaded_from_cache = True
             loaded_arts.append(loaded)
         return loaded_arts
     else:
         return None
Example #24
0
    async def _run_stage(self, stage_name, input_artifacts, executor, backend):
        """
        Run a stage once we've acquired the input artifacts
        """
        # Check if the stage has already been run with the given
        # input artifacts and pipeline definition. If so,
        # return the cached run.
        cached_arts = self._get_cached_artifacts(stage_name, input_artifacts,
                                                 backend)
        if cached_arts is not None:
            self._log("Found %d cached artifacts for stage %s" %
                      (len(cached_arts), stage_name))
            return cached_arts

        # We need to generate fresh artifacts.
        # We'll feed the input artifacts to the executor,
        # returning generated artifacts
        stage = self._stages[stage_name]
        result = []
        dependency_hash = Artifact.dependency_hash(input_artifacts)

        task = executor.create_task(self._stages[stage_name], input_artifacts)
        artifacts = await task.generate_artifacts()

        for art in artifacts:
            if hasattr(art, "_remotely_produced"):
                self._log("Remotely produced artifact for %s" % stage_name)
                result.append(art)
            else:
                self._log("Yielding fresh artifact for stage %s" % stage_name)
                self._log("\tPayload: %s " % str(art.item.payload)[0:50])
                art = self._ensure_artifact_meta(art, dependency_hash)
                backend.save_artifact(art)
                result.append(art)

        self._log("Done generating stage %s" % stage_name)
        backend.log_pipeline_stage_run_complete(stage, dependency_hash)
        return result
 def test_save_artifact(self):
     backend = LocalArtifactBackend(config={"path": "./test_storage/"})
     artifact = Artifact(self.stage_config)
     artifact.item = Item(payload="SHRIM")
     backend.save_artifact(artifact)
     pass
Example #26
0
 def yield_artifacts(self, input_artifacts):
     for artifact in input_artifacts:
         new_artifact = Artifact(self._config, artifact.item)
         new_artifact._specific_hash = artifact._specific_hash
         yield new_artifact
Example #27
0
    def test_executor_server_integration(self):
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)

        executor = RemoteSQSExecutor(
            aws_profile=self.test_profile,
            task_queue_name=self.test_queue_task_name,
            result_queue_name=self.test_queue_result_name,
            s3_bucket_name=self.test_bucket_name,
            dynamodb_artifact_table_name=self.
            test_dynamodb_artifact_table_name,
            dynamodb_stage_run_table_name=self.test_dynamodb_stage_run_name,
            loop=loop)

        server = RemoteSQSServer(
            aws_profile=self.test_profile,
            aws_region=self.test_region,
            s3_bucket_name=self.test_bucket_name,
            task_queue_name=self.test_queue_task_name,
            result_queue_name=self.test_queue_result_name,
            dynamodb_artifact_table_name=self.
            test_dynamodb_artifact_table_name,
            dynamodb_stage_run_table_name=self.test_dynamodb_stage_run_name,
            loop=loop)

        # Create task. Its input will be itself because that's just great.
        pf = PipelineStageFactory()
        stage = pf.create_pipeline_stage(self.stage_config)
        input_artifacts = []
        for art in stage.yield_artifacts():
            input_artifacts.append(art)
        executor.create_task(stage, input_artifacts)

        # Save input artifacts so they're available for the remote server
        executor._backend.save_artifact(input_artifacts[0])

        # Run our local RemoteExecutor and the remote RemoteSQSServer
        # for 10 seconds.
        async def timeout():
            await asyncio.sleep(10.0)
            for task in asyncio.Task.all_tasks():
                task.cancel()
            raise CancelledError

        try:
            loop.run_until_complete(
                asyncio.wait([
                    executor._process_queue(),
                    server._process_tasks(),
                    server._executor_server._listen_to_queue(),
                    timeout()
                ]))
        except CancelledError:
            print('CancelledError raised: closing event loop.')

        # Load our remotely generated artifact(s) and ensure they
        # have the correct payload.
        arts = executor._backend.find_pipeline_stage_run_artifacts(
            self.stage_config, Artifact.dependency_hash(input_artifacts))

        loaded = []
        for art in arts:
            loaded.append(executor._backend.load_artifact(art))

        self.assertEqual(1, len(loaded))
        self.assertEqual(loaded[0].item.payload['param_a'],
                         "string parameter value")
Example #28
0
 def _yield_artifact(self):
     art = Artifact(self._stage_config)
     art.item.payload = self._parameters
     return art
Example #29
0
 def test_save_artifact(self):
     s3_backend = self._default_backend
     artifact = Artifact(self.stage_config)
     artifact.item.payload = "foobs"
     s3_backend.save_artifact(artifact)
     self.cleanup_test_tables(self._default_backend)