Esempio n. 1
0
 def test_resolve_missing_condition_definition(self):
     exported = DataFlow.auto(add).export(linked=True)
     del exported["definitions"]["is_add"]
     with self.assertRaisesRegex(
         DefinitionMissing, "add.conditions.*is_add"
     ):
         DataFlow._fromdict(**exported)
Esempio n. 2
0
 async def test_run(self):
     stdout = io.BytesIO()
     with unittest.mock.patch("sys.stdout.buffer.write", new=stdout.write):
         await Export(export="tests.test_df:DATAFLOW",
                      not_linked=False).run()
     exported = json.loads(stdout.getvalue())
     DataFlow._fromdict(**exported)
Esempio n. 3
0
 async def test_dataflow_usage_example(self):
     # Write out shouldi dataflow
     orig = self.mktempfile() + ".json"
     pathlib.Path(orig).write_text(json.dumps(self.DATAFLOW.export()))
     # Import from feature/git
     transform_to_repo = Operation.load("dffml.mapping.create")
     lines_of_code_by_language, lines_of_code_to_comments = list(
         load(
             "dffml_feature_git.feature.operations:lines_of_code_by_language",
             "dffml_feature_git.feature.operations:lines_of_code_to_comments",
             relative=relative_path("..", "..", "feature", "git"),
         ))
     # Create new dataflow
     override = DataFlow.auto(
         transform_to_repo,
         lines_of_code_by_language,
         lines_of_code_to_comments,
     )
     # TODO Modify and compare against yaml in docs example
     # Write out override dataflow
     created = self.mktempfile() + ".json"
     pathlib.Path(created).write_text(json.dumps(override.export()))
     # Merge the two
     with contextlib.redirect_stdout(self.stdout):
         await CLI.cli("dataflow", "merge", orig, created)
     DataFlow._fromdict(**json.loads(self.stdout.getvalue()))
Esempio n. 4
0
    async def setUp(self):
        super().setUp()
        self.stdout = io.StringIO()
        InputDataflow = DataFlow(
            operations={
                "AcceptUserInput": AcceptUserInput.op,
                "get_single": GetSingle.imp.op,
            },
            seed=[
                Input(
                    value=[AcceptUserInput.op.outputs["InputData"].name],
                    definition=GetSingle.op.inputs["spec"],
                )
            ],
            implementations={AcceptUserInput.op.name: AcceptUserInput},
        )

        OutputDataflow = DataFlow(
            operations={
                "print_output": print_output.op,
                "get_single": GetSingle.imp.op,
            },
            implementations={print_output.op.name: print_output.imp},
        )

        self.InputDataflow = InputDataflow
        self.OutputDataflow = OutputDataflow
Esempio n. 5
0
 async def test_export(self):
     self.required_plugins("shouldi")
     stdout = io.StringIO()
     # Use shouldi's dataflow for tests
     with relative_chdir("..", "..", "examples", "shouldi"):
         with unittest.mock.patch("sys.stdout.buffer.write") as write:
             await Develop.cli("export", "shouldi.cli:DATAFLOW")
         DataFlow._fromdict(**json.loads(write.call_args[0][0]))
Esempio n. 6
0
 async def test_pos_tagger(self):
     input_sentence = (
         "The end is the beginning , and the beginning is the end"
     )
     async for ctx, results in MemoryOrchestrator.run(
         DataFlow.auto(pos_tagger, GetSingle),
         [
             Input(
                 value=[pos_tagger.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_sentence,
                 definition=pos_tagger.op.inputs["text"],
             ),
             Input(
                 value="en_core_web_sm",
                 definition=pos_tagger.op.inputs["spacy_model"],
             ),
         ],
     ):
         pos_tags = results[pos_tagger.op.outputs["result"].name]
         words = input_sentence.split()
         for i, _ in enumerate(words):
             self.assertEqual(pos_tags[i][0], words[i])
             self.assertIn(pos_tags[i][1], ["DT", "NN", "VBZ", "CC", ","])
Esempio n. 7
0
 async def test_get_similarity(self):
     input_sentence1 = (
         "The end is the beginning , and the beginning is the end"
     )
     input_sentence2 = (
         "The end was the beginning , and the beginning was the end"
     )
     async for ctx, results in MemoryOrchestrator.run(
         DataFlow.auto(get_similarity, GetSingle),
         [
             Input(
                 value=[get_similarity.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_sentence1,
                 definition=get_similarity.op.inputs["text_1"],
             ),
             Input(
                 value=input_sentence2,
                 definition=get_similarity.op.inputs["text_2"],
             ),
             Input(
                 value="en_core_web_sm",
                 definition=get_similarity.op.inputs["spacy_model"],
             ),
         ],
     ):
         similarity_score = results[
             get_similarity.op.outputs["result"].name
         ]
         self.assertGreater(similarity_score, 0.9)
Esempio n. 8
0
    async def test_associatedefinition(self):
        feed_def = Definition(name="feed", primitive="string")
        dead_def = Definition(name="dead", primitive="string")
        output = Definition(name="output", primitive="string")

        feed_input = Input(value="my favorite value", definition=feed_def)
        face_input = Input(
            value="face", definition=output, parents=[feed_input]
        )

        dead_input = Input(
            value="my second favorite value", definition=dead_def
        )
        beef_input = Input(
            value="beef", definition=output, parents=[dead_input]
        )

        test_result = {"feed": "face", "dead": "beef"}
        for test_value in test_result.keys():
            async for ctx, results in MemoryOrchestrator.run(
                DataFlow.auto(AssociateDefinition),
                [
                    feed_input,
                    face_input,
                    dead_input,
                    beef_input,
                    Input(
                        value={test_value: "output"},
                        definition=AssociateDefinition.op.inputs["spec"],
                    ),
                ],
            ):
                self.assertEqual(
                    results, {test_value: test_result[test_value]}
                )
Esempio n. 9
0
 async def setUp(self):
     self.dataflow = DataFlow.auto(*OPIMPS)
     self.dataflow.seed.append(
         Input(
             value=[
                 restart_running_containers.op.outputs["containers"].name
             ],
             definition=GetSingle.op.inputs["spec"],
         ))
     self.test_inputs = {
         "TestRun": [
             Input(
                 value={
                     "ref": "refs/master",
                     "repository": {
                         "clone_url":
                         f"https://github.com/{USER}/{REPO}.git",
                         "default_branch": "master",
                         "html_url": f"https://github.com/{USER}/{REPO}",
                     },
                 },
                 definition=get_url_from_payload.op.inputs["payload"],
             )
         ]
     }
     self.containers_to_remove = []
Esempio n. 10
0
 async def test_get_embedding(self):
     input_sentence = (
         "The end is the beginning , and the beginning is the end")
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(get_embedding, GetSingle),
         [
             Input(
                 value=[get_embedding.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_sentence,
                 definition=get_embedding.op.inputs["text"],
             ),
             Input(
                 value="en_core_web_sm",
                 definition=get_embedding.op.inputs["spacy_model"],
             ),
         ],
     ):
         embeddings = results[get_embedding.op.outputs["result"].name]
         self.assertEqual(len(input_sentence.split()), len(embeddings))
         self.assertEqual(
             embeddings[randint(0,
                                len(input_sentence.split()) - 1)].shape,
             embeddings[randint(0,
                                len(input_sentence.split()) - 1)].shape,
         )
Esempio n. 11
0
 async def test_calcHist(self):
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(calcHist, GetSingle),
         [
             Input(
                 value=[
                     calcHist.op.outputs["result"].name,
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=self.INPUT_ARRAY,
                 definition=calcHist.op.inputs["images"],
             ),
             Input(
                 value=None,
                 definition=calcHist.op.inputs["mask"],
             ),
             Input(
                 value=[0, 1],
                 definition=calcHist.op.inputs["channels"],
             ),
             Input(
                 value=[32, 32],
                 definition=calcHist.op.inputs["histSize"],
             ),
             Input(
                 value=[0, 256, 0, 256],
                 definition=calcHist.op.inputs["ranges"],
             ),
         ],
     ):
         self.assertEqual(results[calcHist.op.outputs["result"].name].shape,
                          (32, 32))
Esempio n. 12
0
 async def test_convert_color(self):
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(convert_color, GetSingle),
         [
             Input(
                 value=[
                     convert_color.op.outputs["result"].name,
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=self.INPUT_ARRAY,
                 definition=convert_color.op.inputs["src"],
             ),
             Input(
                 value="BGR2RGB",
                 definition=convert_color.op.inputs["code"],
             ),
         ],
     ):
         self.assertEqual(
             cv2.cvtColor(
                 results[convert_color.op.outputs["result"].name],
                 cv2.COLOR_RGB2BGR,
             ).flatten().tolist(),
             self.INPUT_ARRAY.flatten().tolist(),
         )
Esempio n. 13
0
    async def test_run(self):
        dataflow = DataFlow.auto(convert_to_gif, GetSingle)
        dataflow.seed.append(
            Input(
                value=[convert_to_gif.op.outputs["output_file"].name],
                definition=GetSingle.op.inputs["spec"],
            )
        )

        input_file_path = self.parent_path / "input.mp4"

        with open(input_file_path, "rb") as f:
            input_file = f.read(-1)

        test_inputs = {
            "Test": [
                Input(
                    value=input_file,
                    definition=convert_to_gif.op.inputs["input_file"],
                ),
                Input(
                    value=240,
                    definition=convert_to_gif.op.inputs["resolution"],
                ),
            ]
        }

        async with MemoryOrchestrator.withconfig({}) as orchestrator:
            async with orchestrator(dataflow) as octx:
                async for ctx, results in octx.run(test_inputs):
                    self.assertIn("output_file", results)
                    output = results["output_file"]
                    self.assertGreater(len(output), 100000)
Esempio n. 14
0
 async def test_vaildation_by_op(self):
     test_dataflow = DataFlow(
         operations={
             "validate_shout_instance": validate_shouts.op,
             "echo_shout": echo_shout.op,
             "get_single": GetSingle.imp.op,
         },
         seed=[
             Input(
                 value=[echo_shout.op.outputs["shout_out"].name],
                 definition=GetSingle.op.inputs["spec"],
             )
         ],
         implementations={
             validate_shouts.op.name: validate_shouts.imp,
             echo_shout.op.name: echo_shout.imp,
         },
     )
     test_inputs = {
         "TestShoutOut":
         [Input(value="validation_status:", definition=SHOUTIN)]
     }
     async with MemoryOrchestrator.withconfig({}) as orchestrator:
         async with orchestrator(test_dataflow) as octx:
             async for ctx_str, results in octx.run(test_inputs):
                 self.assertIn("shout_out", results)
                 self.assertEqual(results["shout_out"],
                                  "validation_status:_validated")
Esempio n. 15
0
    async def setUp(self):
        self.dataflow = DataFlow.auto(*OPIMPS)
        self.dataflow.seed += [
            Input(
                value=[
                    restart_running_containers.op.outputs["containers"].name
                ],
                definition=GetSingle.op.inputs["spec"],
            ),
            Input(value=True, definition=clone_git_repo.op.conditions[0]),
        ]

        test_data = {
            "ref": "refs/main",
            "repository": {
                "clone_url": f"https://github.com/{USER}/{REPO}.git",
                "default_branch": "main",
                "html_url": f"https://github.com/{USER}/{REPO}",
            },
        }

        self.test_inputs = {
            "TestRun": [
                Input(
                    value=test_data,
                    definition=check_secret_match.op.outputs["git_payload"],
                )
            ]
        }
        self.containers_to_remove = []
Esempio n. 16
0
 async def test_simple_imputer(self):
     input_data = [[np.nan, 2], [6, np.nan], [7, 6]]
     output_data = [[6.5, 2], [6, 4], [7, 6]]
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(simple_imputer, GetSingle),
         [
             Input(
                 value=[simple_imputer.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_data,
                 definition=simple_imputer.op.inputs["data"],
             ),
             Input(
                 value=np.nan,
                 definition=simple_imputer.op.inputs["missing_values"],
             ),
             Input(
                 value="mean",
                 definition=simple_imputer.op.inputs["strategy"],
             ),
         ],
     ):
         self.assertTrue((results[simple_imputer.op.outputs["result"].name]
                          == output_data).all())
Esempio n. 17
0
 def test_export(self):
     exported = DataFlow.auto(add).export(linked=True)
     # Operations
     self.assertIn("operations", exported)
     self.assertIn("tests.test_df:add", exported["operations"])
     self.assertIn("inputs", exported["operations"]["tests.test_df:add"])
     self.assertIn("outputs", exported["operations"]["tests.test_df:add"])
     self.assertIn("conditions",
                   exported["operations"]["tests.test_df:add"])
     self.assertIn(
         "is_add",
         exported["operations"]["tests.test_df:add"]["conditions"])
     self.assertIn("numbers",
                   exported["operations"]["tests.test_df:add"]["inputs"])
     self.assertEqual(
         "numbers",
         exported["operations"]["tests.test_df:add"]["inputs"]["numbers"],
     )
     self.assertIn("sum",
                   exported["operations"]["tests.test_df:add"]["outputs"])
     self.assertEqual(
         "result",
         exported["operations"]["tests.test_df:add"]["outputs"]["sum"],
     )
     # Definitions
     self.assertIn("definitions", exported)
     self.assertIn("numbers", exported["definitions"])
     self.assertIn("primitive", exported["definitions"]["numbers"])
     self.assertEqual("List[int]",
                      exported["definitions"]["numbers"]["primitive"])
     self.assertIn("result", exported["definitions"])
     self.assertIn("primitive", exported["definitions"]["result"])
     self.assertEqual("int", exported["definitions"]["result"]["primitive"])
Esempio n. 18
0
 async def test_principal_component_analysis(self):
     input_data, _ = make_classification(
         n_samples=10,
         n_features=10,
         n_informative=8,
         n_redundant=2,
         random_state=7,
     )
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(principal_component_analysis, GetSingle),
         [
             Input(
                 value=[
                     principal_component_analysis.op.outputs["result"].name
                 ],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_data,
                 definition=principal_component_analysis.op.inputs["data"],
             ),
             Input(
                 value=8,
                 definition=principal_component_analysis.op.
                 inputs["n_components"],
             ),
         ],
     ):
         self.assertTrue((10, 8) == results[
             principal_component_analysis.op.outputs["result"].name].shape)
Esempio n. 19
0
 async def test_run(self):
     self.required_plugins("dffml-config-yaml", "dffml-model-scratch")
     # Load get_single and model_predict
     get_single = Operation.load("get_single")
     model_predict = list(load("dffml.operation.model:model_predict"))[0]
     # Create new dataflow from operations
     dataflow = DataFlow.auto(get_single, model_predict)
     # Add the seed inputs
     dataflow.seed.append(
         Input(
             value=[
                 definition.name
                 for definition in model_predict.op.outputs.values()
             ],
             definition=get_single.inputs["spec"],
         ))
     # Write out the dataflow
     dataflow_yaml = pathlib.Path(self.mktempfile() + ".yaml")
     async with BaseConfigLoader.load("yaml").withconfig(
         {}) as configloader:
         async with configloader() as loader:
             dataflow_yaml.write_bytes(await loader.dumpb(
                 dataflow.export(linked=True)))
     # TODO Figure out how nested model config options will work
     # print(dataflow_yaml.read_text())
     return
Esempio n. 20
0
async def operation_db():
    """
    Create the database and table (myTable) for the db operations
    """
    sdb = SqliteDatabase(SqliteDatabaseConfig(filename="examples.db"))

    dataflow = DataFlow(
        operations={"db_query_create": db_query_create_table.op},
        configs={"db_query_create": DatabaseQueryConfig(database=sdb)},
        seed=[],
    )

    inputs = [
        Input(
            value="myTable",
            definition=db_query_create_table.op.inputs["table_name"],
        ),
        Input(
            value={
                "key": "INTEGER NOT NULL PRIMARY KEY",
                "firstName": "text",
                "lastName": "text",
                "age": "int",
            },
            definition=db_query_create_table.op.inputs["cols"],
        ),
    ]

    async for ctx, result in MemoryOrchestrator.run(dataflow, inputs):
        pass
Esempio n. 21
0
    async def test_run(self):
        dataflow = DataFlow.auto(*OPIMPS)
        passwords = [str(random.random()) for _ in range(0, 20)]

        # Orchestrate the running of these operations
        async with MemoryOrchestrator.withconfig({}) as orchestrator:

            definitions = Operation.definitions(*OPERATIONS)

            passwords = [
                Input(
                    value=password,
                    definition=definitions["UnhashedPassword"],
                    parents=None,
                ) for password in passwords
            ]

            output_spec = Input(
                value=["ScryptPassword"],
                definition=definitions["get_single_spec"],
                parents=None,
            )

            async with orchestrator(dataflow) as octx:
                try:
                    async for _ctx, results in octx.run({
                            password.value: [password, output_spec]
                            for password in passwords
                    }):
                        self.assertTrue(results)
                except AttributeError as error:
                    raise
Esempio n. 22
0
 def _create_dataflow_with_op(self, query_op, seed=[]):
     return DataFlow(
         operations={
             "db_query": query_op.op,
             "get_single": GetSingle.imp.op,
         },
         configs={"db_query": DatabaseQueryConfig(database=self.sdb)},
         seed=seed,
         implementations={query_op.op.name: query_op.imp},
     )
Esempio n. 23
0
def create_dataflow(operation, seed):
    dataflow = DataFlow(
        operations={operation.op.name: operation},
        seed={
            Input(value=val, definition=operation.op.inputs[input_name])
            for input_name, val in seed.items()
        },
        implementations={operation.op.name: operation.imp},
    )
    return dataflow
Esempio n. 24
0
    async def test_condition_does_not_run_auto_start(self):
        ran = []

        @op(conditions=[CONDITION])
        async def condition_test():
            ran.append(True)  # pragma: no cover

        async with MemoryOrchestrator() as orchestrator:
            async with orchestrator(DataFlow(condition_test)) as octx:
                async for _ in octx.run([]):
                    pass

        self.assertFalse(ran)
Esempio n. 25
0
 async def test_dataflow_run_cli_example(self):
     # Write out override dataflow
     created = self.mktempfile() + ".yaml"
     with open(created, "w") as fileobj:
         with contextlib.redirect_stdout(fileobj):
             await CLI.cli(
                 "dataflow",
                 "create",
                 "dffml.mapping.create",
                 "print_output",
                 "-configloader",
                 "yaml",
             )
     # Load the generated dataflow
     async with ConfigLoaders() as cfgl:
         _, exported = await cfgl.load_file(created)
         dataflow = DataFlow._fromdict(**exported)
     # Modify the dataflow
     dataflow.flow["print_output"].inputs["data"] = [{
         "dffml.mapping.create":
         "mapping"
     }]
     # Write back modified dataflow
     async with BaseConfigLoader.load("yaml").withconfig(
         {}) as configloader:
         async with configloader() as loader:
             with open(created, "wb") as fileobj:
                 fileobj.write(await
                               loader.dumpb(dataflow.export(linked=True)))
     # Run the dataflow
     with contextlib.redirect_stdout(self.stdout):
         await CLI.cli(
             "dataflow",
             "run",
             "records",
             "all",
             "-no-echo",
             "-record-def",
             "value",
             "-inputs",
             "hello=key",
             "-dataflow",
             created,
             "-sources",
             "m=memory",
             "-source-records",
             "world",
             "user",
         )
     self.assertEqual(self.stdout.getvalue(),
                      "{'hello': 'world'}\n{'hello': 'user'}\n")
Esempio n. 26
0
 async def setUp(self):
     self.dataflow = DataFlow(
         operations={
             "get_circle": get_circle.op,
             "get_single": GetSingle.imp.op,
         },
         seed=[
             Input(
                 value=[get_circle.op.outputs["shape"].name],
                 definition=GetSingle.op.inputs["spec"],
             )
         ],
         implementations={"get_circle": get_circle.imp},
     )
Esempio n. 27
0
    async def setUp(self):
        dataflow = DataFlow(
            operations={
                "announce": announce.op,
                "get_single": GetSingle.imp.op,
            },
            seed=[
                Input(
                    value=[announce.op.outputs["string_out"].name],
                    definition=GetSingle.op.inputs["spec"],
                )
            ],
            implementations={announce.op.name: announce.imp},
        )

        self.dataflow = dataflow
Esempio n. 28
0
    async def __aenter__(self) -> "DataFlowSourceContext":
        self.sctx = await self.parent.source().__aenter__()

        if isinstance(self.parent.config.dataflow, str):
            dataflow_path = pathlib.Path(self.parent.config.dataflow)
            config_type = dataflow_path.suffix.replace(".", "")
            config_cls = BaseConfigLoader.load(config_type)
            async with config_cls.withconfig({}) as configloader:
                async with configloader() as loader:
                    exported = await loader.loadb(dataflow_path.read_bytes())
                self.parent.config.dataflow = DataFlow._fromdict(**exported)

        self.octx = await self.parent.orchestrator(self.parent.config.dataflow
                                                   ).__aenter__()

        return self
Esempio n. 29
0
    async def test_gen_with_input(self):
        test_dataflow = DataFlow.auto(GetMulti, counter, echo_num)
        test_dataflow.seed.append(
            Input(
                value=[echo_num.op.outputs["number_out"].name],
                definition=GetMulti.op.inputs["spec"],
            ))
        test_dataflow.implementations[counter.op.name] = counter.imp
        test_dataflow.implementations[echo_num.op.name] = echo_num.imp

        test_inputs = {"TestCount": [Input(value=1, definition=CountStart)]}
        async with MemoryOrchestrator.withconfig({}) as orchestrator:
            async with orchestrator(test_dataflow) as octx:
                async for ctx_str, results in octx.run(test_inputs):
                    self.assertIn("number", results)
                    self.assertEqual(set([1, 2, 3, 4, 5]),
                                     set(results["number"]))
Esempio n. 30
0
 async def test_standard_scaler(self):
     input_data = [[0, 0], [0, 0], [1, 1], [1, 1]]
     output_data = [[-1, -1], [-1, -1], [1, 1], [1, 1]]
     async for ctx, results in MemoryOrchestrator.run(
             DataFlow.auto(standard_scaler, GetSingle),
         [
             Input(
                 value=[standard_scaler.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
             Input(
                 value=input_data,
                 definition=standard_scaler.op.inputs["data"],
             ),
         ],
     ):
         self.assertTrue((results[standard_scaler.op.outputs["result"].name]
                          == output_data))