コード例 #1
0
ファイル: test_operations.py プロジェクト: wandreuscv/dffml
    async def test_run(self):
        calc_strings_check = {"add 40 and 2": 42, "multiply 42 and 10": 420}

        async with MemoryOrchestrator.basic_config(*OPIMPS) as orchestrator:
            async with orchestrator() as octx:
                for to_calc in calc_strings_check.keys():
                    await octx.ictx.sadd(
                        to_calc,
                        Input(
                            value=to_calc,
                            definition=calc_parse_line.op.inputs["line"],
                        ),
                        Input(
                            value=[calc_add.op.outputs["sum"].name],
                            definition=GetSingle.op.inputs["spec"],
                        ),
                    )

                async for ctx, results in octx.run_operations():
                    ctx_str = (await ctx.handle()).as_string()
                    self.assertEqual(
                        calc_strings_check[ctx_str],
                        results[GetSingle.op.name][
                            calc_add.op.outputs["sum"].name
                        ],
                    )
コード例 #2
0
    async def test_run(self):
        passwords = [str(random.random()) for _ in range(0, 20)]

        # Orchestrate the running of these operations
        async with MemoryOrchestrator.basic_config(*OPIMPS) as orchestrator:

            definitions = Operation.definitions(*OPERATIONS)

            passwords = [
                Input(value=password,
                      definition=definitions['UnhashedPassword'],
                      parents=None) for password in passwords
            ]

            output_spec = Input(value=['ScryptPassword'],
                                definition=definitions['get_single_spec'],
                                parents=None)

            async with orchestrator() as octx:
                # Add our inputs to the input network with the context being the URL
                for password in passwords:
                    await octx.ictx.add(
                        MemoryInputSet(
                            MemoryInputSetConfig(
                                ctx=StringInputSetContext(password.value),
                                inputs=[password, output_spec])))
                try:
                    async for _ctx, results in octx.run_operations(
                            strict=True):
                        self.assertTrue(results)
                except AttributeError as error:
                    if "module 'hashlib' has no attribute 'scrypt'" \
                            in str(error):
                        return
                    raise
コード例 #3
0
 async def multicomm_dataflow(self, config, request):
     # Seed the network with inputs given by caller
     # TODO(p0,security) allowlist of valid definitions to seed (set
     # Input.origin to something other than seed)
     inputs = []
     # If data was sent add those inputs
     if request.method == "POST":
         # Accept a list of input data
         # TODO validate that input data is dict of list of inputs each item
         # has definition and value properties
         for ctx, client_inputs in (await request.json()).items():
             for input_data in client_inputs:
                 if (
                     not input_data["definition"]
                     in config.dataflow.definitions
                 ):
                     return web.json_response(
                         {
                             "error": f"Missing definition for {input_data['definition']} in dataflow"
                         },
                         status=HTTPStatus.NOT_FOUND,
                     )
             inputs.append(
                 MemoryInputSet(
                     MemoryInputSetConfig(
                         ctx=StringInputSetContext(ctx),
                         inputs=[
                             Input(
                                 value=input_data["value"],
                                 definition=config.dataflow.definitions[
                                     input_data["definition"]
                                 ],
                             )
                             for input_data in client_inputs
                         ],
                     )
                 )
             )
     # Run the operation in an orchestrator
     # TODO(dfass) Create the orchestrator on startup of the HTTP API itself
     async with MemoryOrchestrator.basic_config() as orchestrator:
         # TODO(dfass) Create octx on dataflow registration
         async with orchestrator(config.dataflow) as octx:
             results = {
                 str(ctx): result async for ctx, result in octx.run(*inputs)
             }
             # TODO Implement input and presentation stages?
             """
             if config.presentation == "blob":
                 return web.Response(body=results)
             elif config.presentation == "text":
                 return web.Response(text=results)
             else:
             """
             return web.json_response(results)
コード例 #4
0
ファイル: cli.py プロジェクト: raghav-ys/dffml
    async def run(self):
        # Create an Orchestrator which will manage the running of our operations
        async with MemoryOrchestrator.basic_config(*OPIMPS) as orchestrator:
            # Create a orchestrator context, everything in DFFML follows this
            # one-two context entry pattern
            async with orchestrator() as octx:
                for package_name in self.packages:
                    # For each package add a new input set to the network of
                    # inputs (ictx). Operations run under a context, the context
                    # here is the package_name to evaluate (the first argument).
                    # The next arguments are all the inputs we're seeding the
                    # network with for that context. We give the package name
                    # because pypi_latest_package_version needs it to find the
                    # version, which safety will then use. We also give an input
                    # to the output operation GetSingle, which takes a list of
                    # data type definitions we want to select as our results.
                    await octx.ictx.sadd(
                        package_name,
                        Input(
                            value=package_name,
                            definition=pypi_package_json.op.inputs["package"],
                        ),
                        Input(
                            value=[
                                safety_check.op.outputs["issues"].name,
                                run_bandit.op.outputs["report"].name,
                            ],
                            definition=GetSingle.op.inputs["spec"],
                        ),
                    )

                # Run all the operations, Each iteration of this loop happens
                # when all inputs are exhausted for a context, the output
                # operations are then run and their results are yielded
                async for ctx, results in octx.run_operations():
                    # The context for this data flow was the package name
                    package_name = (await ctx.handle()).as_string()
                    # Get the results of the GetSingle output operation
                    results = results[GetSingle.op.name]
                    # Check if any of the values of the operations evaluate to
                    # true, so if the number of issues found by safety is
                    # non-zero then this will be true
                    any_issues = list(results.values())
                    if (any_issues[0] > 0 or
                            any_issues[1]["CONFIDENCE.HIGH_AND_SEVERITY.HIGH"]
                            > 5):
                        print(f"Do not install {package_name}! {results!r}")
                    else:
                        print(f"{package_name} is okay to install")
コード例 #5
0
    async def test_run(self):
        linker = Linker()
        exported = linker.export(*OPERATIONS)
        definitions, operations, _outputs = linker.resolve(exported)

        # Instantiate inputs
        repos = glob.glob(
            os.path.join(
                os.path.expanduser("~"),
                "Documents",
                "python",
                "testrepos",
                "*",
            )
        )
        if not repos:
            repos = glob.glob(
                os.path.join(
                    os.path.expanduser("~"), "Documents", "python", "dffml"
                )
            )
        if not repos:
            repos = [
                "https://github.com/intel/dffml",
                "https://github.com/pdxjohnny/dffml",
            ]
        repos = repos[:1]
        urls = [
            Input(value=URL, definition=definitions["URL"], parents=None)
            for URL in repos
        ]
        no_git_branch_given = Input(
            value=True,
            definition=definitions["no_git_branch_given"],
            parents=None,
        )
        date_spec = Input(
            value=datetime.now().strftime(TIME_FORMAT_MINTUE_RESOLUTION),
            definition=definitions["quarter_start_date"],
            parents=None,
        )
        quarters = [
            Input(value=i, definition=definitions["quarter"], parents=None)
            for i in range(0, 10)
        ]

        group_by_spec = Input(
            value={
                "cloc": {
                    "group": "quarter",
                    "by": "language_to_comment_ratio",
                    "fill": 0,
                },
                "authors": {
                    "group": "quarter",
                    "by": "author_count",
                    "fill": 0,
                },
                "work": {"group": "quarter", "by": "work_spread", "fill": 0},
                "release": {
                    "group": "quarter",
                    "by": "release_within_period",
                    "fill": False,
                },
                "commits": {
                    "group": "quarter",
                    "by": "commit_count",
                    "fill": 0,
                },
            },
            definition=definitions["group_by_spec"],
            parents=None,
        )

        # Orchestrate the running of these operations
        help(MemoryOrchestrator.basic_config)
        async with MemoryOrchestrator.basic_config(*OPIMPS) as orchestrator:
            async with orchestrator() as octx:
                # Add our inputs to the input network with the context being the URL
                for url in urls:
                    await octx.ictx.sadd(
                        url.value,
                        url,
                        no_git_branch_given,
                        date_spec,
                        group_by_spec,
                        *quarters,
                    )
                async for ctx, results in octx.run_operations():
                    self.assertTrue(results)
コード例 #6
0
    async def multicomm_dataflow(self, config, request):
        # Seed the network with inputs given by caller
        # TODO(p0,security) allowlist of valid definitions to seed (set
        # Input.origin to something other than seed)
        inputs = []
        # If data was sent add those inputs
        if request.method == "POST":
            # Accept a list of input data according to config.input_mode
            if config.input_mode == "default":
                # TODO validate that input data is dict of list of inputs each item
                # has definition and value properties
                for ctx, client_inputs in (await request.json()).items():
                    for input_data in client_inputs:
                        if (not input_data["definition"]
                                in config.dataflow.definitions):
                            return web.json_response(
                                {
                                    "error":
                                    f"Missing definition for {input_data['definition']} in dataflow"
                                },
                                status=HTTPStatus.NOT_FOUND,
                            )
                    inputs.append(
                        MemoryInputSet(
                            MemoryInputSetConfig(
                                ctx=StringInputSetContext(ctx),
                                inputs=[
                                    Input(
                                        value=input_data["value"],
                                        definition=config.dataflow.definitions[
                                            input_data["definition"]],
                                    ) for input_data in client_inputs
                                ],
                            )))
            elif ":" in config.input_mode:
                preprocess_mode, input_def = config.input_mode.split(":")
                if input_def not in config.dataflow.definitions:
                    return web.json_response(
                        {
                            "error":
                            f"Missing definition for {input_data['definition']} in dataflow"
                        },
                        status=HTTPStatus.NOT_FOUND,
                    )
                if preprocess_mode == "json":
                    value = await request.json()
                elif preprocess_mode == "str":
                    value = await request.text()
                elif preprocess_mode == "bytes":
                    value = await request.read()
                elif preprocess == "stream":
                    value = request.content
                else:
                    return web.json_response(
                        {
                            "error":
                            f"preprocess tag must be one of {IO_MODES}, got {preprocess}"
                        },
                        status=HTTPStatus.NOT_FOUND,
                    )
                inputs.append(
                    MemoryInputSet(
                        MemoryInputSetConfig(
                            ctx=StringInputSetContext("post_input"),
                            inputs=[
                                Input(
                                    value=value,
                                    definition=config.dataflow.
                                    definitions[input_def],
                                )
                            ],
                        )))
            else:
                raise NotImplementedError(
                    "Input modes other than default,preprocess:definition_name  not yet implemented"
                )

        # Run the operation in an orchestrator
        # TODO(dfass) Create the orchestrator on startup of the HTTP API itself
        async with MemoryOrchestrator.basic_config() as orchestrator:
            # TODO(dfass) Create octx on dataflow registration
            async with orchestrator(config.dataflow) as octx:
                results = {
                    str(ctx): result
                    async for ctx, result in octx.run(*inputs)
                }
                if config.output_mode == "json":
                    return web.json_response(results)

                # content_info is a List[str] ([content_type,output_keys])
                # in case of stream,bytes and string in others
                postprocess_mode, *content_info = config.output_mode.split(":")

                if postprocess_mode == "stream":
                    # stream:text/plain:get_single.beef
                    raise NotImplementedError(
                        "output mode  not yet implemented")

                elif postprocess_mode == "bytes":
                    content_type, output_keys = content_info
                    output_data = traverse_get(results,
                                               *output_keys.split("."))
                    return web.Response(body=output_data)

                elif postprocess_mode == "text":
                    output_data = traverse_get(results,
                                               *content_info[0].split("."))
                    return web.Response(text=output_data)

                else:
                    return web.json_response(
                        {"error": f"output mode not valid"},
                        status=HTTPStatus.NOT_FOUND,
                    )