def test_stage_definition_hash_uniqueness(self): stage_a = PipelineStageConfig('some_name', { "foo": "bar", "type": "ExecutorPipelineStage" }) stage_b = PipelineStageConfig('some_name', { "foo": "quux", "type": "ExecutorPipelineStage" }) art_a = Artifact(stage_a) art_b = Artifact(stage_b) self.assertNotEqual(art_a._definition_hash, art_b._definition_hash)
def test_stage_definition_hash_idempotence(self): stage_a = PipelineStageConfig('some_name', { "A": 1, "B": 2, "type": "ExecutorPipelineStage" }) stage_b = PipelineStageConfig('some_name', { "B": 2, "A": 1, "type": "ExecutorPipelineStage" }) art_a = Artifact(stage_a) art_b = Artifact(stage_b) self.assertEqual(art_a._definition_hash, art_b._definition_hash)
async def _run_job(self, job): # Get stage from pipeline pf = PipelineStageFactory() config = PipelineStageConfig(job['stage_name'], job['stage_config']) stage = pf.create_pipeline_stage(config) # Load input artifact payloads from cache loaded_artifacts = [] for artifact in job['artifacts']: art_obj = Artifact(stage._config) art_obj.meta_from_dict(artifact) loaded = self._backend.load_artifact(art_obj) if loaded is None: self._log("Could not find payload for artifact") raise Exception("Could not find payload for artifact") loaded_artifacts.append(loaded) # Execute the task exec_task = self._executor.create_task(stage, loaded_artifacts) result = await exec_task.generate_artifacts() for art in result: art._creation_time = float(time.time()) art._dependency_hash = Artifact.dependency_hash(loaded_artifacts) self._backend.save_artifact(art) self._backend.log_pipeline_stage_run_complete( config, Artifact.dependency_hash(loaded_artifacts)) return result
def test_metadata_from_dict(self): stage_a = PipelineStageConfig('some_name', { "A": 1, "B": 2, "type": "ExecutorPipelineStage" }) art_a = Artifact(stage_a) d = { "antecedents": {}, "creation_time": 124566722.3, "definition_hash": "dac9630aec642a428cd73f4be0a03569", "specific_hash": "bc1687bbb3b97214d46b7c30ab307cc1", "dependency_hash": "ecad5fc98abf66565e009155f5e57dda", "pipeline_stage": "some_stage", "item": { "meta": { "loss": 0.2 }, "tags": ["my_pipeline_run"], "type": "my_item_type" } } art_a.meta_from_dict(d) for prop in d: if prop == "item": for iprop in d['item']: value = getattr(art_a.item, iprop) self.assertEqual(d['item'][iprop], value) else: value = getattr(art_a, "_" + prop) self.assertEqual(d[prop], value)
def setUp(self): self.data = { "inputs": [], "execute": "tests.functional.module.executor_function.function", "type": "ExecutorPipelineStage" } self.config = PipelineStageConfig('WriteBytes', self.data) self.factory = PipelineStageFactory()
def pregenerate_artifacts(self, backend): pf = PipelineStageFactory() config = PipelineStageConfig("StageA", self.generate_pipeline_config()["StageA"]) stage = pf.create_pipeline_stage(config) arts = [] for art in stage.yield_artifacts(): backend.save_artifact(art) arts.append(art) return arts
def setUp(self): self.dirname = 'foo' self.filename = ['foo.bar', 'foo.baz'] self.filedatas = ['foo bar baz', 'helloworld'] self.fs = isolated_filesystem() self.fs.__enter__() self.stage_config = PipelineStageConfig( "test_stage_name", {"type": "ParameterPipelineStage"}) self.stage_config = PipelineStageConfig( "test_stage_name", {"type": "ParameterPipelineStage"}) # Build directory structure os.makedirs(self.dirname) for name, data in zip(self.filename, self.filedatas): with open(os.path.join(os.getcwd(), self.dirname, name), 'w') as f: f.write(data)
def test_generate_metadata(self): stage_a = PipelineStageConfig('some_name', { "A": 1, "B": 2, "type": "ExecutorPipelineStage" }) art_a = Artifact(stage_a) d = art_a.meta_to_dict() for m in art_a._meta_properties: if m not in d: self.fail()
def test_metadata_from_bad_dict(self): stage_a = PipelineStageConfig('some_name', { "A": 1, "B": 2, "type": "ExecutorPipelineStage" }) art_a = Artifact(stage_a) try: art_a.meta_from_dict({}) self.fail() except InvalidArtifactMetadataError: pass
def generate_pipeline_from_dict(self, config_data): if not isinstance(config_data, OrderedDict): raise TypeError('generate_pipeline_from_dict requires an ' 'OrderedDict to preserve the loading order ' 'of the pipeline stages. Found %s instead.' % type(config_data)) configs = [] for name, data in config_data.items(): config = PipelineStageConfig(name, data) configs.append(config) stages = self._generate_stages(configs) return Pipeline(stages)
def setUp(self): # File system configuration self.filename = ['foo.bar', 'foo.baz'] self.filedatas = ['foo bar baz', 'hello, world'] self.fs = isolated_filesystem() self.fs.__enter__() for name, data in zip(self.filename, self.filedatas): with open(os.path.join(os.getcwd(), name), 'w') as f: f.write(data) # Setup stage config self.stage_config = PipelineStageConfig( "test_stage_name", { "type": "ParameterPipelineStage", "param_a": "string parameter value" })
async def _run_job(self, job): # Get stage from pipeline pf = PipelineStageFactory() config = PipelineStageConfig(job['stage_name'], job['stage_config']) stage = pf.create_pipeline_stage(config) # Load artifact payloads from cache loaded_artifacts = [] for artifact in job['artifacts']: art_obj = Artifact(stage._config) art_obj.meta_from_dict(artifact) print(art_obj._pipeline_stage) loaded = self._backend.load_artifact(art_obj) if loaded is None: raise Exception("Could not find payload for artifact") loaded_artifacts.append(loaded) # Execute the task exec_task = self._executor.create_task(stage, loaded_artifacts) result = await exec_task.generate_artifacts() return result
def setUp(self): # File system configuration self.filename = ['foo.bar', 'foo.baz'] self.filedatas = ['foo bar baz', 'hello, world'] self.fs = isolated_filesystem() self.fs.__enter__() for name, data in zip(self.filename, self.filedatas): with open(os.path.join(os.getcwd(), name), 'w') as f: f.write(data) # Setup settings for local arbiter self.stage_config = PipelineStageConfig("test_stage_name", { "type": "ParameterPipelineStage" }) self.config_filename = 'pipetree.json' with open(os.path.join(".", self.config_filename), 'w') as f: json.dump(self.generate_pipeline_config(), f) # Cleanup before each test is run self.cleanup_test_tables(self._default_backend)
def setUp(self): self.stage_config = PipelineStageConfig( "test_stage_name", {"type": "ParameterPipelineStage"}) self.test_parameters = {"int_param": 200, "str_param": "str"} pass
def load_file(self, path): data = self._file_loader.load_file(path) for name, pipeline_stage in data.items(): config = PipelineStageConfig(name, pipeline_stage) yield config