async def test_vaildation_by_op(self): test_dataflow = DataFlow( operations={ "validate_shout_instance": validate_shouts.op, "echo_shout": echo_shout.op, "get_single": GetSingle.imp.op, }, seed=[ Input( value=[echo_shout.op.outputs["shout_out"].name], definition=GetSingle.op.inputs["spec"], ) ], implementations={ validate_shouts.op.name: validate_shouts.imp, echo_shout.op.name: echo_shout.imp, }, ) test_inputs = { "TestShoutOut": [Input(value="validation_status:", definition=SHOUTIN)] } async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(test_dataflow) as octx: async for ctx_str, results in octx.run(test_inputs): self.assertIn("shout_out", results) self.assertEqual(results["shout_out"], "validation_status:_validated")
class DataFlowSourceConfig: source: BaseSource = field("Source to wrap") dataflow: DataFlow = field("DataFlow to use for preprocessing") features: Features = field( "Features to pass as definitions to each context from each " "record to be preprocessed", default=Features(), ) inputs: List[str] = field( "Other inputs to add under each ctx (record's key will " + "be used as the context)", action=ParseInputsAction, default_factory=lambda: [], ) record_def: str = field( "Definition to be used for record.key." "If set, record.key will be added to the set of inputs " "under each context (which is also the record's key)", default=None, ) length: str = field("Definition name to add as source length", default=None) all_for_single: bool = field( "Run all records through dataflow before grabing " "results of desired record on a call to record()", default=False, ) no_strict: bool = field( "Do not exit on operation exceptions, just log errors", default=False, ) orchestrator: BaseOrchestrator = MemoryOrchestrator.withconfig({})
async def test_run(self): dataflow = DataFlow.auto(convert_to_gif, GetSingle) dataflow.seed.append( Input( value=[convert_to_gif.op.outputs["output_file"].name], definition=GetSingle.op.inputs["spec"], ) ) input_file_path = self.parent_path / "input.mp4" with open(input_file_path, "rb") as f: input_file = f.read(-1) test_inputs = { "Test": [ Input( value=input_file, definition=convert_to_gif.op.inputs["input_file"], ), Input( value=240, definition=convert_to_gif.op.inputs["resolution"], ), ] } async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(dataflow) as octx: async for ctx, results in octx.run(test_inputs): self.assertIn("output_file", results) output = results["output_file"] self.assertGreater(len(output), 100000)
async def test_2_lookup(self): seed = [ Input( value=[db_query_lookup.op.outputs["lookups"].name], definition=GetSingle.op.inputs["spec"], ) ] df = self._create_dataflow_with_op(db_query_lookup, seed=seed) test_inputs = { "lookup": { "table_name": self.table_name, "cols": [], "conditions": [], } } async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(df) as octx: async for _ctx, results in octx.run({ test_ctx: [ Input( value=val, definition=db_query_lookup.op.inputs[key], ) for key, val in test_val.items() ] for test_ctx, test_val in test_inputs.items() }): self.assertIn("query_lookups", results) results = results["query_lookups"] self.assertEqual(self.data_dicts, results)
async def test_0_create(self): df = self._create_dataflow_with_op(db_query_create_table) test_inputs = { "create": { "table_name": self.table_name, "cols": self.cols } } async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(df) as octx: async for _ctx, results in octx.run({ test_ctx: [ Input( value=val, definition=db_query_create_table.op.inputs[key], ) for key, val in test_val.items() ] for test_ctx, test_val in test_inputs.items() }): pass async with self.sdb as db: async with db() as db_ctx: query = ( "SELECT count(name) FROM sqlite_master " + f" WHERE type='table' and name='{self.table_name}' ") db_ctx.parent.cursor.execute(query) results = db_ctx.parent.cursor.fetchone() self.assertEqual(results["count(name)"], 1)
async def test_1_insert(self): df = self._create_dataflow_with_op(db_query_insert) for _data in self.data_dicts: test_inputs = { "insert": { "table_name": self.table_name, "data": _data } } async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(df) as octx: async for _ctx, results in octx.run({ test_ctx: [ Input( value=val, definition=db_query_insert.op.inputs[key], ) for key, val in test_val.items() ] for test_ctx, test_val in test_inputs.items() }): continue async with self.sdb as db: async with db() as db_ctx: query = f"SELECT * FROM {self.table_name}" db_ctx.parent.cursor.execute(query) rows = db_ctx.parent.cursor.fetchall() self.assertEqual(self.data_dicts, list(map(dict, rows)))
async def test_auto_start(self): test_inputs = {"testStart": []} async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(self.dataflow) as octx: async for ctx_str, results in octx.run(test_inputs): self.assertIn("string", results) self.assertEqual("EXISTS", results["string"])
async def test_run(self): dataflow = DataFlow.auto(*OPIMPS) passwords = [str(random.random()) for _ in range(0, 20)] # Orchestrate the running of these operations async with MemoryOrchestrator.withconfig({}) as orchestrator: definitions = Operation.definitions(*OPERATIONS) passwords = [ Input( value=password, definition=definitions["UnhashedPassword"], parents=None, ) for password in passwords ] output_spec = Input( value=["ScryptPassword"], definition=definitions["get_single_spec"], parents=None, ) async with orchestrator(dataflow) as octx: try: async for _ctx, results in octx.run({ password.value: [password, output_spec] for password in passwords }): self.assertTrue(results) except AttributeError as error: raise
async def test_0_start_container(self): with mock.patch.object( clone_git_repo.imp, "CONTEXT", new=FakeCloneRepoImp ): tag = f"{USER}/{REPO}" before = await check_output( "docker", "ps", "--filter", f"ancestor={tag}", "--format", "{{.ID}} {{.RunningFor}}", ) async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(self.dataflow) as octx: async for ctx, results in octx.run(self.test_inputs): after = await check_output( "docker", "ps", "--filter", f"ancestor={tag}", "--format", "{{.ID}} {{.RunningFor}}", ) self.assertNotEqual(before, after) self.assertIn("docker_restarted_containers", results) self.containers_to_remove = results[ "docker_restarted_containers" ]
async def test_AcceptUserInput(self): test_inputs = {"testInput": []} async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(self.InputDataflow) as octx: with mock.patch("builtins.input", return_value="Testing AcceptUserInput"): async for ctx_str, results in octx.run(test_inputs): self.assertIn("UserInput", results) self.assertEqual("Testing AcceptUserInput", results["UserInput"])
async def test_print_output(self): test_inputs = [ Input( value="Testing print_output", definition=self.OutputDataflow.definitions["DataToPrint"], parents=None, ) ] async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(self.OutputDataflow) as octx: with contextlib.redirect_stdout(self.stdout): async for ctx_str, _ in octx.run(test_inputs): results = self.stdout.getvalue() self.assertIn("Testing print_output", results)
async def test_validate(self): test_inputs = { "area": [ Input(value="unitcircle", definition=ShapeName), Input(value=1, definition=Radius), Input(value=3.14, definition=Pie), ] } async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(self.dataflow) as octx: async for ctx_str, results in octx.run(test_inputs): self.assertIn("mapping", results) results = results["mapping"] self.assertEqual(results["name"], "UNITCIRCLE") self.assertEqual(results["area"], 3.14) self.assertEqual(results["radius"], 1)
async def test_gen_with_input(self): test_dataflow = DataFlow.auto(GetMulti, counter, echo_num) test_dataflow.seed.append( Input( value=[echo_num.op.outputs["number_out"].name], definition=GetMulti.op.inputs["spec"], )) test_dataflow.implementations[counter.op.name] = counter.imp test_dataflow.implementations[echo_num.op.name] = echo_num.imp test_inputs = {"TestCount": [Input(value=1, definition=CountStart)]} async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(test_dataflow) as octx: async for ctx_str, results in octx.run(test_inputs): self.assertIn("number", results) self.assertEqual(set([1, 2, 3, 4, 5]), set(results["number"]))
async def test_run_custom(self): output_definition = add.op.outputs["sum"] get_single_spec_input = Input( value=[output_definition.name], definition=GetSingle.op.inputs["spec"], ) subflow = copy.deepcopy(DATAFLOW) subflow.seed.append(get_single_spec_input) test_dataflow = DataFlow( operations={ "run_dataflow": run_dataflow.op._replace( inputs=parse_line.op.inputs, outputs={output_definition.name: output_definition}, ), "get_single": GetSingle.imp.op, }, configs={"run_dataflow": RunDataFlowConfig(dataflow=subflow)}, seed=[get_single_spec_input], ) test_outputs = {"add 40 and 2": 42, "multiply 42 and 10": 420} async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(test_dataflow) as octx: async for _ctx, results in octx.run({ input_line: [ Input( value=input_line, definition=parse_line.op.inputs["line"], ) ] for input_line in test_outputs }): ctx_str = (await _ctx.handle()).as_string() results = results[output_definition.name] expected_results = test_outputs[ctx_str] self.assertEqual(expected_results, results)
async def run(self): # Create an Orchestrator which will manage the running of our operations async with MemoryOrchestrator.withconfig({}) as orchestrator: # Create a orchestrator context, everything in DFFML follows this # one-two context entry pattern async with orchestrator(DATAFLOW) as octx: # Run all the operations, Each iteration of this loop happens # when all inputs are exhausted for a context, the output # operations are then run and their results are yielded async for package_name, results in octx.run({ # For each package add a new input set to the input network # The context operations execute under is the package name # to evaluate. Contexts ensure that data pertaining to # package A doesn't mingle with data pertaining to package B package_name: [ # The only input to the operations is the package name. Input( value=package_name, definition=pypi_package_json.op.inputs["package"], ) ] for package_name in self.packages }): # Grab the number of safety issues and the bandit report # from the results dict safety_issues = results[ safety_check.op.outputs["issues"].name] bandit_report = results[ run_bandit.op.outputs["report"].name] # Decide if those numbers mean we should stop ship or not if (safety_issues > 0 or bandit_report["CONFIDENCE.HIGH_AND_SEVERITY.HIGH"] > 5): print(f"Do not install {package_name}!") for definition_name, result in results.items(): print(f" {definition_name}: {result}") else: print(f"{package_name} is okay to install")
async def test_run(self): dataflow = DataFlow.auto(*OPIMPS) calc_strings_check = {"add 40 and 2": 42, "multiply 42 and 10": 420} async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(dataflow) as octx: async for ctx, results in octx.run({ to_calc: [ Input( value=to_calc, definition=calc_parse_line.op.inputs["line"], ), Input( value=[calc_add.op.outputs["sum"].name], definition=GetSingle.op.inputs["spec"], ), ] for to_calc in calc_strings_check.keys() }): ctx_str = (await ctx.handle()).as_string() self.assertEqual( calc_strings_check[ctx_str], results[calc_add.op.outputs["sum"].name], )
async def test_run(self): repos = [ "http://pkg.freebsd.org/FreeBSD:13:amd64/latest/All/ImageMagick7-7.0.8.48.txz", "https://download.clearlinux.org/releases/10540/clear/x86_64/os/Packages/sudo-setuid-1.8.17p1-34.x86_64.rpm", "https://rpmfind.net/linux/fedora/linux/updates/29/Everything/x86_64/Packages/g/gzip-1.9-9.fc29.x86_64.rpm", "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/20/Everything/x86_64/os/Packages/c/curl-7.32.0-3.fc20.x86_64.rpm", ] dataflow = DataFlow.auto( URLToURLBytes, files_in_rpm, urlbytes_to_rpmfile, urlbytes_to_tarfile, is_binary_pie, Associate, cleanup_rpm, ) async with MemoryOrchestrator.withconfig({}) as orchestrator: definitions = Operation.definitions(*OPERATIONS) async with orchestrator(dataflow) as octx: async for ctx, results in octx.run( { URL: [ Input(value=URL, definition=definitions["URL"]), Input( value=["rpm_filename", "binary_is_PIE"], definition=definitions["associate_spec"], ), ] for URL in repos }, strict=True, ): self.assertTrue(results)
async def create_octx(self): async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(DATAFLOW) as octx: yield octx
async def test_run(self): calc_strings_check = {"add 40 and 2": 42, "multiply 42 and 10": 420} # TODO(p0) Implement and test asyncgenerator callstyles_no_expand = ["asyncgenerator", "dict"] callstyles = { "dict": { to_calc: [ Input(value=to_calc, definition=parse_line.op.inputs["line"]), Input( value=[add.op.outputs["sum"].name], definition=GetSingle.op.inputs["spec"], ), ] for to_calc in calc_strings_check.keys() }, "list_input_sets": [ MemoryInputSet( MemoryInputSetConfig( ctx=StringInputSetContext(to_calc), inputs=[ Input( value=to_calc, definition=parse_line.op.inputs["line"], ), Input( value=[add.op.outputs["sum"].name], definition=GetSingle.op.inputs["spec"], ), ], )) for to_calc in calc_strings_check.keys() ], "uctx": [[ Input(value=to_calc, definition=parse_line.op.inputs["line"]), Input( value=[add.op.outputs["sum"].name], definition=GetSingle.op.inputs["spec"], ), ] for to_calc in calc_strings_check.keys()], } async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(DATAFLOW) as octx: for callstyle, inputs in callstyles.items(): with self.subTest(callstyle=callstyle): if callstyle in callstyles_no_expand: run_coro = octx.run(inputs) else: run_coro = octx.run(*inputs) async for ctx, results in run_coro: ctx_str = (await ctx.handle()).as_string() if callstyle == "uctx": self.assertIn( results[add.op.outputs["sum"].name], dict( zip( calc_strings_check.values(), calc_strings_check.keys(), )), ) else: self.assertEqual( calc_strings_check[ctx_str], results[add.op.outputs["sum"].name], )
async def test_run(self): test_dataflow = DataFlow( operations={ "run_dataflow": run_dataflow.op, "get_single": GetSingle.imp.op, }, configs={"run_dataflow": RunDataFlowConfig(dataflow=DATAFLOW)}, seed=[ Input( value=[run_dataflow.op.outputs["results"].name], definition=GetSingle.op.inputs["spec"], ) ], ) test_inputs = [ { "add_op": [ { "value": "add 40 and 2", "definition": parse_line.op.inputs["line"].name, }, { "value": [add.op.outputs["sum"].name], "definition": GetSingle.op.inputs["spec"].name, }, ] }, { "mult_op": [ { "value": "multiply 42 and 10", "definition": parse_line.op.inputs["line"].name, }, { "value": [mult.op.outputs["product"].name], "definition": GetSingle.op.inputs["spec"].name, }, ] }, ] test_outputs = {"add_op": 42, "mult_op": 420} async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(test_dataflow) as octx: async for _ctx, results in octx.run({ list(test_input.keys())[0]: [ Input( value=test_input, definition=run_dataflow.op.inputs["inputs"], ) ] for test_input in test_inputs }): ctx_str = (await _ctx.handle()).as_string() self.assertIn("flow_results", results) results = results["flow_results"] self.assertIn(ctx_str, map(str, results.keys())) self.assertIn(ctx_str, test_outputs) results = results[list(results.keys())[0]] self.assertIn("result", results) results = results["result"] expected_results = test_outputs[ctx_str] self.assertEqual(expected_results, results)
class DataFlowSourceConfig: source: BaseSource dataflow: DataFlow features: Features orchestrator: BaseOrchestrator = MemoryOrchestrator.withconfig({})