Beispiel #1
0
 async def test_vaildation_by_op(self):
     test_dataflow = DataFlow(
         operations={
             "validate_shout_instance": validate_shouts.op,
             "echo_shout": echo_shout.op,
             "get_single": GetSingle.imp.op,
         },
         seed=[
             Input(
                 value=[echo_shout.op.outputs["shout_out"].name],
                 definition=GetSingle.op.inputs["spec"],
             )
         ],
         implementations={
             validate_shouts.op.name: validate_shouts.imp,
             echo_shout.op.name: echo_shout.imp,
         },
     )
     test_inputs = {
         "TestShoutOut":
         [Input(value="validation_status:", definition=SHOUTIN)]
     }
     async with MemoryOrchestrator.withconfig({}) as orchestrator:
         async with orchestrator(test_dataflow) as octx:
             async for ctx_str, results in octx.run(test_inputs):
                 self.assertIn("shout_out", results)
                 self.assertEqual(results["shout_out"],
                                  "validation_status:_validated")
Beispiel #2
0
class DataFlowSourceConfig:
    source: BaseSource = field("Source to wrap")
    dataflow: DataFlow = field("DataFlow to use for preprocessing")
    features: Features = field(
        "Features to pass as definitions to each context from each "
        "record to be preprocessed",
        default=Features(),
    )
    inputs: List[str] = field(
        "Other inputs to add under each ctx (record's key will " +
        "be used as the context)",
        action=ParseInputsAction,
        default_factory=lambda: [],
    )
    record_def: str = field(
        "Definition to be used for record.key."
        "If set, record.key will be added to the set of inputs "
        "under each context (which is also the record's key)",
        default=None,
    )
    length: str = field("Definition name to add as source length",
                        default=None)
    all_for_single: bool = field(
        "Run all records through dataflow before grabing "
        "results of desired record on a call to record()",
        default=False,
    )
    no_strict: bool = field(
        "Do not exit on operation exceptions, just log errors",
        default=False,
    )
    orchestrator: BaseOrchestrator = MemoryOrchestrator.withconfig({})
Beispiel #3
0
    async def test_run(self):
        dataflow = DataFlow.auto(convert_to_gif, GetSingle)
        dataflow.seed.append(
            Input(
                value=[convert_to_gif.op.outputs["output_file"].name],
                definition=GetSingle.op.inputs["spec"],
            )
        )

        input_file_path = self.parent_path / "input.mp4"

        with open(input_file_path, "rb") as f:
            input_file = f.read(-1)

        test_inputs = {
            "Test": [
                Input(
                    value=input_file,
                    definition=convert_to_gif.op.inputs["input_file"],
                ),
                Input(
                    value=240,
                    definition=convert_to_gif.op.inputs["resolution"],
                ),
            ]
        }

        async with MemoryOrchestrator.withconfig({}) as orchestrator:
            async with orchestrator(dataflow) as octx:
                async for ctx, results in octx.run(test_inputs):
                    self.assertIn("output_file", results)
                    output = results["output_file"]
                    self.assertGreater(len(output), 100000)
Beispiel #4
0
    async def test_2_lookup(self):
        seed = [
            Input(
                value=[db_query_lookup.op.outputs["lookups"].name],
                definition=GetSingle.op.inputs["spec"],
            )
        ]
        df = self._create_dataflow_with_op(db_query_lookup, seed=seed)
        test_inputs = {
            "lookup": {
                "table_name": self.table_name,
                "cols": [],
                "conditions": [],
            }
        }

        async with MemoryOrchestrator.withconfig({}) as orchestrator:
            async with orchestrator(df) as octx:
                async for _ctx, results in octx.run({
                        test_ctx: [
                            Input(
                                value=val,
                                definition=db_query_lookup.op.inputs[key],
                            ) for key, val in test_val.items()
                        ]
                        for test_ctx, test_val in test_inputs.items()
                }):
                    self.assertIn("query_lookups", results)
                    results = results["query_lookups"]
                    self.assertEqual(self.data_dicts, results)
Beispiel #5
0
    async def test_0_create(self):

        df = self._create_dataflow_with_op(db_query_create_table)
        test_inputs = {
            "create": {
                "table_name": self.table_name,
                "cols": self.cols
            }
        }

        async with MemoryOrchestrator.withconfig({}) as orchestrator:
            async with orchestrator(df) as octx:
                async for _ctx, results in octx.run({
                        test_ctx:
                    [
                        Input(
                            value=val,
                            definition=db_query_create_table.op.inputs[key],
                        ) for key, val in test_val.items()
                    ]
                        for test_ctx, test_val in test_inputs.items()
                }):
                    pass

            async with self.sdb as db:
                async with db() as db_ctx:
                    query = (
                        "SELECT count(name) FROM sqlite_master " +
                        f" WHERE type='table' and name='{self.table_name}' ")
                    db_ctx.parent.cursor.execute(query)
                    results = db_ctx.parent.cursor.fetchone()
                    self.assertEqual(results["count(name)"], 1)
Beispiel #6
0
    async def test_1_insert(self):

        df = self._create_dataflow_with_op(db_query_insert)
        for _data in self.data_dicts:
            test_inputs = {
                "insert": {
                    "table_name": self.table_name,
                    "data": _data
                }
            }

            async with MemoryOrchestrator.withconfig({}) as orchestrator:
                async with orchestrator(df) as octx:
                    async for _ctx, results in octx.run({
                            test_ctx: [
                                Input(
                                    value=val,
                                    definition=db_query_insert.op.inputs[key],
                                ) for key, val in test_val.items()
                            ]
                            for test_ctx, test_val in test_inputs.items()
                    }):
                        continue

        async with self.sdb as db:
            async with db() as db_ctx:
                query = f"SELECT * FROM {self.table_name}"
                db_ctx.parent.cursor.execute(query)
                rows = db_ctx.parent.cursor.fetchall()
                self.assertEqual(self.data_dicts, list(map(dict, rows)))
Beispiel #7
0
 async def test_auto_start(self):
     test_inputs = {"testStart": []}
     async with MemoryOrchestrator.withconfig({}) as orchestrator:
         async with orchestrator(self.dataflow) as octx:
             async for ctx_str, results in octx.run(test_inputs):
                 self.assertIn("string", results)
                 self.assertEqual("EXISTS", results["string"])
Beispiel #8
0
    async def test_run(self):
        dataflow = DataFlow.auto(*OPIMPS)
        passwords = [str(random.random()) for _ in range(0, 20)]

        # Orchestrate the running of these operations
        async with MemoryOrchestrator.withconfig({}) as orchestrator:

            definitions = Operation.definitions(*OPERATIONS)

            passwords = [
                Input(
                    value=password,
                    definition=definitions["UnhashedPassword"],
                    parents=None,
                ) for password in passwords
            ]

            output_spec = Input(
                value=["ScryptPassword"],
                definition=definitions["get_single_spec"],
                parents=None,
            )

            async with orchestrator(dataflow) as octx:
                try:
                    async for _ctx, results in octx.run({
                            password.value: [password, output_spec]
                            for password in passwords
                    }):
                        self.assertTrue(results)
                except AttributeError as error:
                    raise
 async def test_0_start_container(self):
     with mock.patch.object(
         clone_git_repo.imp, "CONTEXT", new=FakeCloneRepoImp
     ):
         tag = f"{USER}/{REPO}"
         before = await check_output(
             "docker",
             "ps",
             "--filter",
             f"ancestor={tag}",
             "--format",
             "{{.ID}} {{.RunningFor}}",
         )
         async with MemoryOrchestrator.withconfig({}) as orchestrator:
             async with orchestrator(self.dataflow) as octx:
                 async for ctx, results in octx.run(self.test_inputs):
                     after = await check_output(
                         "docker",
                         "ps",
                         "--filter",
                         f"ancestor={tag}",
                         "--format",
                         "{{.ID}} {{.RunningFor}}",
                     )
                     self.assertNotEqual(before, after)
                     self.assertIn("docker_restarted_containers", results)
                     self.containers_to_remove = results[
                         "docker_restarted_containers"
                     ]
Beispiel #10
0
 async def test_AcceptUserInput(self):
     test_inputs = {"testInput": []}
     async with MemoryOrchestrator.withconfig({}) as orchestrator:
         async with orchestrator(self.InputDataflow) as octx:
             with mock.patch("builtins.input",
                             return_value="Testing AcceptUserInput"):
                 async for ctx_str, results in octx.run(test_inputs):
                     self.assertIn("UserInput", results)
                     self.assertEqual("Testing AcceptUserInput",
                                      results["UserInput"])
Beispiel #11
0
 async def test_print_output(self):
     test_inputs = [
         Input(
             value="Testing print_output",
             definition=self.OutputDataflow.definitions["DataToPrint"],
             parents=None,
         )
     ]
     async with MemoryOrchestrator.withconfig({}) as orchestrator:
         async with orchestrator(self.OutputDataflow) as octx:
             with contextlib.redirect_stdout(self.stdout):
                 async for ctx_str, _ in octx.run(test_inputs):
                     results = self.stdout.getvalue()
                     self.assertIn("Testing print_output", results)
Beispiel #12
0
 async def test_validate(self):
     test_inputs = {
         "area": [
             Input(value="unitcircle", definition=ShapeName),
             Input(value=1, definition=Radius),
             Input(value=3.14, definition=Pie),
         ]
     }
     async with MemoryOrchestrator.withconfig({}) as orchestrator:
         async with orchestrator(self.dataflow) as octx:
             async for ctx_str, results in octx.run(test_inputs):
                 self.assertIn("mapping", results)
                 results = results["mapping"]
                 self.assertEqual(results["name"], "UNITCIRCLE")
                 self.assertEqual(results["area"], 3.14)
                 self.assertEqual(results["radius"], 1)
    async def test_gen_with_input(self):
        test_dataflow = DataFlow.auto(GetMulti, counter, echo_num)
        test_dataflow.seed.append(
            Input(
                value=[echo_num.op.outputs["number_out"].name],
                definition=GetMulti.op.inputs["spec"],
            ))
        test_dataflow.implementations[counter.op.name] = counter.imp
        test_dataflow.implementations[echo_num.op.name] = echo_num.imp

        test_inputs = {"TestCount": [Input(value=1, definition=CountStart)]}
        async with MemoryOrchestrator.withconfig({}) as orchestrator:
            async with orchestrator(test_dataflow) as octx:
                async for ctx_str, results in octx.run(test_inputs):
                    self.assertIn("number", results)
                    self.assertEqual(set([1, 2, 3, 4, 5]),
                                     set(results["number"]))
Beispiel #14
0
    async def test_run_custom(self):
        output_definition = add.op.outputs["sum"]

        get_single_spec_input = Input(
            value=[output_definition.name],
            definition=GetSingle.op.inputs["spec"],
        )

        subflow = copy.deepcopy(DATAFLOW)
        subflow.seed.append(get_single_spec_input)

        test_dataflow = DataFlow(
            operations={
                "run_dataflow":
                run_dataflow.op._replace(
                    inputs=parse_line.op.inputs,
                    outputs={output_definition.name: output_definition},
                ),
                "get_single":
                GetSingle.imp.op,
            },
            configs={"run_dataflow": RunDataFlowConfig(dataflow=subflow)},
            seed=[get_single_spec_input],
        )

        test_outputs = {"add 40 and 2": 42, "multiply 42 and 10": 420}

        async with MemoryOrchestrator.withconfig({}) as orchestrator:
            async with orchestrator(test_dataflow) as octx:
                async for _ctx, results in octx.run({
                        input_line: [
                            Input(
                                value=input_line,
                                definition=parse_line.op.inputs["line"],
                            )
                        ]
                        for input_line in test_outputs
                }):
                    ctx_str = (await _ctx.handle()).as_string()
                    results = results[output_definition.name]
                    expected_results = test_outputs[ctx_str]
                    self.assertEqual(expected_results, results)
Beispiel #15
0
 async def run(self):
     # Create an Orchestrator which will manage the running of our operations
     async with MemoryOrchestrator.withconfig({}) as orchestrator:
         # Create a orchestrator context, everything in DFFML follows this
         # one-two context entry pattern
         async with orchestrator(DATAFLOW) as octx:
             # Run all the operations, Each iteration of this loop happens
             # when all inputs are exhausted for a context, the output
             # operations are then run and their results are yielded
             async for package_name, results in octx.run({
                     # For each package add a new input set to the input network
                     # The context operations execute under is the package name
                     # to evaluate. Contexts ensure that data pertaining to
                     # package A doesn't mingle with data pertaining to package B
                     package_name:
                 [
                     # The only input to the operations is the package name.
                     Input(
                         value=package_name,
                         definition=pypi_package_json.op.inputs["package"],
                     )
                 ]
                     for package_name in self.packages
             }):
                 # Grab the number of safety issues and the bandit report
                 # from the results dict
                 safety_issues = results[
                     safety_check.op.outputs["issues"].name]
                 bandit_report = results[
                     run_bandit.op.outputs["report"].name]
                 # Decide if those numbers mean we should stop ship or not
                 if (safety_issues > 0 or
                         bandit_report["CONFIDENCE.HIGH_AND_SEVERITY.HIGH"]
                         > 5):
                     print(f"Do not install {package_name}!")
                     for definition_name, result in results.items():
                         print(f"    {definition_name}: {result}")
                 else:
                     print(f"{package_name} is okay to install")
Beispiel #16
0
 async def test_run(self):
     dataflow = DataFlow.auto(*OPIMPS)
     calc_strings_check = {"add 40 and 2": 42, "multiply 42 and 10": 420}
     async with MemoryOrchestrator.withconfig({}) as orchestrator:
         async with orchestrator(dataflow) as octx:
             async for ctx, results in octx.run({
                     to_calc: [
                         Input(
                             value=to_calc,
                             definition=calc_parse_line.op.inputs["line"],
                         ),
                         Input(
                             value=[calc_add.op.outputs["sum"].name],
                             definition=GetSingle.op.inputs["spec"],
                         ),
                     ]
                     for to_calc in calc_strings_check.keys()
             }):
                 ctx_str = (await ctx.handle()).as_string()
                 self.assertEqual(
                     calc_strings_check[ctx_str],
                     results[calc_add.op.outputs["sum"].name],
                 )
Beispiel #17
0
    async def test_run(self):
        repos = [
            "http://pkg.freebsd.org/FreeBSD:13:amd64/latest/All/ImageMagick7-7.0.8.48.txz",
            "https://download.clearlinux.org/releases/10540/clear/x86_64/os/Packages/sudo-setuid-1.8.17p1-34.x86_64.rpm",
            "https://rpmfind.net/linux/fedora/linux/updates/29/Everything/x86_64/Packages/g/gzip-1.9-9.fc29.x86_64.rpm",
            "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/20/Everything/x86_64/os/Packages/c/curl-7.32.0-3.fc20.x86_64.rpm",
        ]

        dataflow = DataFlow.auto(
            URLToURLBytes,
            files_in_rpm,
            urlbytes_to_rpmfile,
            urlbytes_to_tarfile,
            is_binary_pie,
            Associate,
            cleanup_rpm,
        )
        async with MemoryOrchestrator.withconfig({}) as orchestrator:

            definitions = Operation.definitions(*OPERATIONS)

            async with orchestrator(dataflow) as octx:
                async for ctx, results in octx.run(
                    {
                        URL: [
                            Input(value=URL, definition=definitions["URL"]),
                            Input(
                                value=["rpm_filename", "binary_is_PIE"],
                                definition=definitions["associate_spec"],
                            ),
                        ]
                        for URL in repos
                    },
                    strict=True,
                ):
                    self.assertTrue(results)
Beispiel #18
0
 async def create_octx(self):
     async with MemoryOrchestrator.withconfig({}) as orchestrator:
         async with orchestrator(DATAFLOW) as octx:
             yield octx
Beispiel #19
0
 async def test_run(self):
     calc_strings_check = {"add 40 and 2": 42, "multiply 42 and 10": 420}
     # TODO(p0) Implement and test asyncgenerator
     callstyles_no_expand = ["asyncgenerator", "dict"]
     callstyles = {
         "dict": {
             to_calc: [
                 Input(value=to_calc,
                       definition=parse_line.op.inputs["line"]),
                 Input(
                     value=[add.op.outputs["sum"].name],
                     definition=GetSingle.op.inputs["spec"],
                 ),
             ]
             for to_calc in calc_strings_check.keys()
         },
         "list_input_sets": [
             MemoryInputSet(
                 MemoryInputSetConfig(
                     ctx=StringInputSetContext(to_calc),
                     inputs=[
                         Input(
                             value=to_calc,
                             definition=parse_line.op.inputs["line"],
                         ),
                         Input(
                             value=[add.op.outputs["sum"].name],
                             definition=GetSingle.op.inputs["spec"],
                         ),
                     ],
                 )) for to_calc in calc_strings_check.keys()
         ],
         "uctx": [[
             Input(value=to_calc, definition=parse_line.op.inputs["line"]),
             Input(
                 value=[add.op.outputs["sum"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
         ] for to_calc in calc_strings_check.keys()],
     }
     async with MemoryOrchestrator.withconfig({}) as orchestrator:
         async with orchestrator(DATAFLOW) as octx:
             for callstyle, inputs in callstyles.items():
                 with self.subTest(callstyle=callstyle):
                     if callstyle in callstyles_no_expand:
                         run_coro = octx.run(inputs)
                     else:
                         run_coro = octx.run(*inputs)
                     async for ctx, results in run_coro:
                         ctx_str = (await ctx.handle()).as_string()
                         if callstyle == "uctx":
                             self.assertIn(
                                 results[add.op.outputs["sum"].name],
                                 dict(
                                     zip(
                                         calc_strings_check.values(),
                                         calc_strings_check.keys(),
                                     )),
                             )
                         else:
                             self.assertEqual(
                                 calc_strings_check[ctx_str],
                                 results[add.op.outputs["sum"].name],
                             )
Beispiel #20
0
    async def test_run(self):
        test_dataflow = DataFlow(
            operations={
                "run_dataflow": run_dataflow.op,
                "get_single": GetSingle.imp.op,
            },
            configs={"run_dataflow": RunDataFlowConfig(dataflow=DATAFLOW)},
            seed=[
                Input(
                    value=[run_dataflow.op.outputs["results"].name],
                    definition=GetSingle.op.inputs["spec"],
                )
            ],
        )

        test_inputs = [
            {
                "add_op": [
                    {
                        "value": "add 40 and 2",
                        "definition": parse_line.op.inputs["line"].name,
                    },
                    {
                        "value": [add.op.outputs["sum"].name],
                        "definition": GetSingle.op.inputs["spec"].name,
                    },
                ]
            },
            {
                "mult_op": [
                    {
                        "value": "multiply 42 and 10",
                        "definition": parse_line.op.inputs["line"].name,
                    },
                    {
                        "value": [mult.op.outputs["product"].name],
                        "definition": GetSingle.op.inputs["spec"].name,
                    },
                ]
            },
        ]
        test_outputs = {"add_op": 42, "mult_op": 420}

        async with MemoryOrchestrator.withconfig({}) as orchestrator:
            async with orchestrator(test_dataflow) as octx:
                async for _ctx, results in octx.run({
                        list(test_input.keys())[0]: [
                            Input(
                                value=test_input,
                                definition=run_dataflow.op.inputs["inputs"],
                            )
                        ]
                        for test_input in test_inputs
                }):
                    ctx_str = (await _ctx.handle()).as_string()
                    self.assertIn("flow_results", results)

                    results = results["flow_results"]
                    self.assertIn(ctx_str, map(str, results.keys()))
                    self.assertIn(ctx_str, test_outputs)

                    results = results[list(results.keys())[0]]
                    self.assertIn("result", results)

                    results = results["result"]
                    expected_results = test_outputs[ctx_str]
                    self.assertEqual(expected_results, results)
Beispiel #21
0
class DataFlowSourceConfig:
    source: BaseSource
    dataflow: DataFlow
    features: Features
    orchestrator: BaseOrchestrator = MemoryOrchestrator.withconfig({})