Beispiel #1
0
    async def test_associatedefinition(self):
        feed_def = Definition(name="feed", primitive="string")
        dead_def = Definition(name="dead", primitive="string")
        output = Definition(name="output", primitive="string")

        feed_input = Input(value="my favorite value", definition=feed_def)
        face_input = Input(
            value="face", definition=output, parents=[feed_input]
        )

        dead_input = Input(
            value="my second favorite value", definition=dead_def
        )
        beef_input = Input(
            value="beef", definition=output, parents=[dead_input]
        )

        test_result = {"feed": "face", "dead": "beef"}
        for test_value in test_result.keys():
            async for ctx, results in MemoryOrchestrator.run(
                DataFlow.auto(AssociateDefinition),
                [
                    feed_input,
                    face_input,
                    dead_input,
                    beef_input,
                    Input(
                        value={test_value: "output"},
                        definition=AssociateDefinition.op.inputs["spec"],
                    ),
                ],
            ):
                self.assertEqual(
                    results, {test_value: test_result[test_value]}
                )
Beispiel #2
0
 async def input_set(self, record: Record) -> List[Input]:
     return ([
         Input(
             value=record.feature(feature.name),
             definition=Definition(
                 name=feature.name,
                 primitive=str(feature.dtype()),
             ),
         ) for feature in self.parent.config.features
     ] + [
         Input(
             value=value,
             definition=self.parent.config.dataflow.definitions[name],
         ) for value, name in self.parent.config.inputs
     ] + ([] if not self.parent.config.length else [
         Input(
             value=await self.sctx.length(),
             definition=Definition(
                 name=self.parent.config.length,
                 primitive="int",
             ),
         )
     ]) + ([] if not self.parent.config.record_def else [
         Input(
             value=record.key,
             definition=Definition(
                 name=self.parent.config.record_def,
                 primitive="string",
             ),
         )
     ]))
Beispiel #3
0
 def _create_dataflow(self, input_, output):
     dataflow = create_archive_dataflow({
         Input(
             value=input_,
             definition=Definition("test_inp", primitive="str"),
             origin="input_path",
         ),
         Input(
             value=output,
             definition=Definition("test_out", primitive="str"),
             origin="output_path",
         ),
     })
     return dataflow
Beispiel #4
0
 async def records(self) -> AsyncIterator[Record]:
     async for record in self.sctx.records():
         async for ctx, result in self.octx.run([
                 Input(
                     value=record.feature(feature.name),
                     definition=Definition(name=feature.name,
                                           primitive=str(feature.dtype())),
                 ) for feature in self.parent.config.features
         ]):
             if result:
                 record.evaluated(result)
             yield record
Beispiel #5
0
        ngram_range=ngram_range,
        analyzer=analyzer,
        max_df=max_df,
        min_df=min_df,
        max_features=max_features,
        vocabulary=vocabulary,
        binary=binary,
    )
    names = None
    X = vectorizer.fit_transform(text).toarray()
    if get_feature_names:
        names = vectorizer.get_feature_names()
    return [X, names]


collected_data = Definition(name="collected_data", primitive="List[str]")
data_received = Definition(name="data_received", primitive="bool")


@op(
    inputs={"data": collected_data}, outputs={"status": data_received},
)
def get_status_collected_data(data):
    return {"status": True}


data_example = Definition(name="data_example", primitive="str")


@op(
    inputs={"data": data_example}, outputs={"all_data": collected_data},
Beispiel #6
0
from typing import Dict, Any

from dffml.base import config
from dffml.df.base import op
from dffml.df.types import DataFlow, Input, Definition


@config
class RunDataFlowConfig:
    dataflow: DataFlow


@op(
    name="dffml.dataflow.run",
    inputs={
        "inputs": Definition(name="flow_inputs", primitive="Dict[str,Any]")
    },
    outputs={
        "results": Definition(name="flow_results", primitive="Dict[str,Any]")
    },
    config_cls=RunDataFlowConfig,
    expand=["results"],
)
async def run_dataflow(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
    """
    Starts a subflow ``self.config.dataflow`` and adds ``inputs`` in it.

    Parameters
    ----------
    inputs : dict
        The inputs to add to the subflow. These should be a key value mapping of
Beispiel #7
0
        if custom_stop_words:
            if token.text not in custom_stop_words:
                all_tokens.append(token.text)
        else:
            all_tokens.append(token.text)

    # Create list of word tokens after removing stopwords
    for word in all_tokens:
        lexeme = nlp.vocab[word]
        if lexeme.is_stop == False:
            clean_tokens.append(word)
    return " ".join(clean_tokens)


# Definitions
text_def = Definition(name="text_def", primitive="str")
max_len_def = Definition(name="max_len_def", primitive="int")
pad_token_def = Definition(name="pad_token_def", primitive="str")
spacy_model_name_def = Definition(name="spacy_model_name_def", primitive="str")
embedding_def = Definition(name="embedding", primitive="generic")


@op(
    name="get_embedding",
    inputs={
        "text": text_def,
        "spacy_model": spacy_model_name_def,
        "max_len": max_len_def,
        "pad_token": pad_token_def,
    },
    outputs={"embedding": embedding_def},
Beispiel #8
0
import sys
from dffml.df.types import Definition

definitions = [
    Definition(name="input_data", primitive="List[List[int]]"),
    Definition(name="output_data", primitive="List[List[int]]"),
    Definition(name="n_components", primitive="int"),
    Definition(name="n_iter", primitive="int"),
    Definition(name="random_state", primitive="int"),
    Definition(name="missing_values", primitive="Any"),
    Definition(name="strategy", primitive="str"),
    Definition(name="categories", primitive="List[List[Any]]"),
]

for definition in definitions:
    setattr(sys.modules[__name__], definition.name, definition)
Beispiel #9
0
    async def test_config_default_label(self):
        with patch.object(BaseKeyValueStore, "load", load_kvstore_with_args):
            was = MemoryRedundancyChecker.config(await parse_unknown(
                "--rchecker-memory-kvstore",
                "withargs",
                "--rchecker-memory-kvstore-withargs-filename",
                "somefile",
            ))
            self.assertEqual(type(was), MemoryRedundancyCheckerConfig)
            self.assertEqual(type(was.kvstore), KeyValueStoreWithArguments)
            self.assertEqual(type(was.kvstore.config),
                             KeyValueStoreWithArgumentsConfig)
            self.assertEqual(was.kvstore.config.filename, "somefile")


CONDITION = Definition(name="condition", primitive="boolean")


class TestMemoryOrchestrator(AsyncTestCase):
    async def test_condition_does_run(self):
        ran = []

        @op(conditions=[CONDITION])
        async def condition_test(hi: str):
            ran.append(True)

        async with MemoryOrchestrator() as orchestrator:
            async with orchestrator(DataFlow(condition_test)) as octx:
                async for _ in octx.run([
                        Input(
                            value=True,
Beispiel #10
0
import json
import asyncio
from typing import Dict, Any

from dffml.df.base import op
from dffml.df.types import Definition

package_src_dir = Definition(name="package_src_dir", primitive="str")
golangci_lint_output = Definition(name="golangci_lint_output",
                                  primitive="Dict[str, Any]")


class GoLangCILintError(Exception):
    """
    Raised when golangci-lint fails
    """


@op(inputs={"pkg": package_src_dir}, outputs={"report": golangci_lint_output})
async def run_golangci_lint(pkg: str) -> Dict[str, Any]:
    """
    CLI usage: dffml service dev run -log debug shouldi.golangci_lint:run_golangci_lint -pkg .
    """
    proc = await asyncio.create_subprocess_exec(
        "golangci-lint",
        "run",
        "--out-format",
        "json",
        "./...",
        cwd=pkg,
        stdout=asyncio.subprocess.PIPE,
Beispiel #11
0
import aiohttp
from typing import Dict, Any

from dffml.df.types import Definition
from dffml.df.base import op

package = Definition(name="package", primitive="str")
package_version = Definition(name="package_version", primitive="str")


@op(
    inputs={"package": package},
    outputs={"version": package_version},
    imp_enter={
        "session": (lambda self: aiohttp.ClientSession(trust_env=True))
    },
)
async def pypi_latest_package_version(self, package: str) -> Dict[str, Any]:
    url = f"https://pypi.org/pypi/{package}/json"
    async with self.parent.session.get(url) as resp:
        package = await resp.json()
        return {"version": package["info"]["version"]}
Beispiel #12
0
from dffml.df.types import Definition


class GitRepoSpec(NamedTuple):
    directory: str
    URL: str = None


class GitRepoCheckedOutSpec(NamedTuple):
    directory: str
    URL: str = None
    commit: str = None


definitions = [
    Definition(name="quarter_start_date", primitive="int"),
    Definition(name="quarter", primitive="int"),
    Definition(name="quarters", primitive="int"),
    Definition(name="URL", primitive="string"),
    Definition(name="valid_git_repository_URL", primitive="boolean"),
    Definition(name="git_branch", primitive="str"),
    Definition(
        name="git_repository",
        primitive="Dict[str, str]",
        lock=True,
        spec=GitRepoSpec,
    ),
    Definition(
        name="git_repository_checked_out",
        primitive="Dict[str, str]",
        lock=True,
Beispiel #13
0
import aiohttp
import tempfile
import shutil
from typing import Dict, Any

from dffml.df.types import Definition, Stage
from dffml.df.base import op

package = Definition(name="package", primitive="str")
package_json = Definition(name="package_json", primitive="Dict[str, Any]")
package_version = Definition(name="package_version", primitive="str")
package_url = Definition(name="package_url", primitive="str")
package_src_dir = Definition(name="package_src_dir", primitive="str")


@op(
    inputs={"package": package},
    outputs={"response_json": package_json},
    imp_enter={
        "session": (lambda self: aiohttp.ClientSession(trust_env=True))
    },
)
async def pypi_package_json(self, package: str) -> Dict[str, Any]:
    url = f"https://pypi.org/pypi/{package}/json"
    async with self.parent.session.get(url) as resp:
        package_json = await resp.json()
        return {"response_json": package_json}


@op(
    inputs={"response_json": package_json},
Beispiel #14
0
from dffml.df.base import op
from dffml.df.types import DataFlow, Input, Definition
from dffml.operation.output import GetSingle
from dffml.util.asynctestcase import AsyncTestCase
from dffml.df.memory import MemoryOrchestrator
from dffml.operation.mapping import MAPPING
from dffml.df.exceptions import InputValidationError


def pie_validation(x):
    if x == 3.14:
        return x
    raise InputValidationError()


Pie = Definition(name="pie", primitive="float", validate=pie_validation)
Radius = Definition(name="radius", primitive="float")
Area = Definition(name="area", primitive="float")
ShapeName = Definition(name="shape_name",
                       primitive="str",
                       validate=lambda x: x.upper())


@op(
    inputs={
        "name": ShapeName,
        "radius": Radius,
        "pie": Pie
    },
    outputs={"shape": MAPPING},
)
Beispiel #15
0
from dffml.source.df import DataFlowSource, DataFlowSourceConfig
from dffml.util.asynctestcase import AsyncTestCase
from dffml.feature import Features, Feature
from dffml.source.source import Sources
from dffml.source.memory import MemorySource, MemorySourceConfig
from dffml.record import Record
from dffml.df.base import op
from dffml.df.types import Input, DataFlow, Definition, InputFlow
from dffml.operation.output import AssociateDefinition


edit_feature_def = Definition(name="feature_data", primitive="generic")


@op(
    name="edit_feature",
    inputs={"features": edit_feature_def},
    outputs={"updated_features": edit_feature_def},
)
async def edit_feature(features):
    value = features * 10
    return {"updated_features": value}


FEATURE_DATA = [
    [0, 1, 0.1, 10],
    [1, 3, 0.2, 20],
    [2, 5, 0.3, 30],
    [3, 7, 0.4, 40],
]
A, B, C, D = list(zip(*FEATURE_DATA))
Beispiel #16
0
import sys
from dffml.df.types import Definition

definitions = [
    Definition(name="input_file", primitive="bytes"),
    Definition(name="Resolution", primitive="int"),
    Definition(name="output_file", primitive="bytes"),
]

for definition in definitions:
    setattr(sys.modules[__name__], definition.name, definition)
Beispiel #17
0
import sys
from dffml.df.types import Definition

definitions = [
    Definition(name="webhook_headers", primitive="Dict[str,Any]"),
    Definition(name="payload", primitive="bytes"),
    Definition(name="git_payload", primitive="Dict[str,Any]"),
    Definition(name="docker_image_id", primitive="str"),
    Definition(name="is_default_branch", primitive="bool"),
    Definition(name="docker_image_tag", primitive="str"),
    Definition(name="docker_running_containers", primitive="List[str]"),
    Definition(name="got_running_containers", primitive="bool"),
    Definition(name="is_image_built", primitive="bool"),
    Definition(name="docker_commands", primitive="Dict[str,Any]"),
    Definition(name="docker_restarted_containers", primitive="str"),
]

for definition in definitions:
    setattr(sys.modules[__name__], definition.name, definition)
Beispiel #18
0
from dffml.df.base import op
from dffml.df.types import DataFlow, Input, Definition
from dffml.operation.output import GetSingle
from dffml.util.asynctestcase import AsyncTestCase
from dffml.df.memory import MemoryOrchestrator
from dffml.operation.mapping import MAPPING
from dffml.df.exceptions import InputValidationError


def pie_validation(x):
    if x == 3.14:
        return x
    raise InputValidationError()


Pie = Definition(name="pie", primitive="float", validate=pie_validation)
Radius = Definition(name="radius", primitive="float")
Area = Definition(name="area", primitive="float")
ShapeName = Definition(name="shape_name",
                       primitive="str",
                       validate=lambda x: x.upper())
SHOUTIN = Definition(name="shout_in",
                     primitive="str",
                     validate="validate_shout_instance")
SHOUTOUT = Definition(name="shout_out", primitive="str")


@op(
    inputs={
        "name": ShapeName,
        "radius": Radius,
Beispiel #19
0
from dffml.df.base import op
from dffml.df.types import DataFlow, Input, Definition
from dffml.operation.output import GetSingle
from dffml.util.asynctestcase import AsyncTestCase
from dffml.df.memory import MemoryOrchestrator

STRING = Definition(name="string", primitive="str")


@op(inputs={}, outputs={"string_out": STRING})
async def announce():
    return {"string_out": "EXISTS"}


class TestAutoStart(AsyncTestCase):
    async def setUp(self):
        await super().setUp()
        dataflow = DataFlow(
            operations={
                "announce": announce.op,
                "get_single": GetSingle.imp.op,
            },
            seed=[
                Input(
                    value=[announce.op.outputs["string_out"].name],
                    definition=GetSingle.op.inputs["spec"],
                )
            ],
            implementations={announce.op.name: announce.imp},
        )
Beispiel #20
0
import io
import json
import asyncio
from typing import Dict, Any

from dffml.df.types import Definition
from dffml.df.base import op

from .pypi import package, package_version

safety_check_number_of_issues = Definition(
    name="safety_check_number_of_issues", primitive="int"
)


@op(
    name="safety_check",
    inputs={"package": package, "version": package_version},
    outputs={"issues": safety_check_number_of_issues},
    conditions=[],
)
async def safety_check(package: str, version: str) -> Dict[str, Any]:
    pinned = f"{package}=={version}"

    proc = await asyncio.create_subprocess_exec(
        "safety",
        "check",
        "--stdin",
        "--json",
        stdin=asyncio.subprocess.PIPE,
        stdout=asyncio.subprocess.PIPE,
Beispiel #21
0
    OperationImplementationNotInstantiable,
    OperationImplementationNotInstantiated,
)
from dffml.df.memory import (
    MemoryKeyValueStore,
    MemoryOperationImplementationNetwork,
    MemoryOrchestrator,
    MemoryInputSet,
    MemoryInputSetConfig,
)
from dffml.df.archive import create_archive_dataflow
from dffml.operation.output import GetSingle
from dffml.util.asynctestcase import AsyncTestCase

definitions = [
    Definition(name="calc_string", primitive="string"),
    Definition(name="is_add", primitive="bool"),
    Definition(name="is_mult", primitive="bool"),
    Definition(name="numbers", primitive="List[int]"),
    Definition(name="result", primitive="int"),
]

for definition in definitions:
    setattr(sys.modules[__name__], definition.name, definition)


@op(inputs={"numbers": numbers}, outputs={"sum": result}, conditions=[is_add])
async def add(numbers: List[int]):
    return {"sum": sum(numbers)}

Beispiel #22
0
import sys
import json
import asyncio
from typing import Dict, Any

from dffml.df.base import op
from dffml.df.types import Definition

package_src_dir = Definition(name="package_src_dir", primitive="str")
bandit_output = Definition(name="bandit_output", primitive="Dict[str, Any]")


@op(inputs={"pkg": package_src_dir}, outputs={"report": bandit_output})
async def run_bandit(pkg: str) -> Dict[str, Any]:
    """
    CLI usage: dffml service dev run -log debug shouldi.bandit:run_bandit -pkg .
    """
    proc = await asyncio.create_subprocess_exec(
        sys.executable,
        "-m",
        "bandit",
        "-r",
        "-f",
        "json",
        pkg,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )

    stdout, _stderr = await proc.communicate()
    if len(stdout) == 0:
Beispiel #23
0
import sys
import json
import asyncio
from typing import Dict, Any

from dffml.df.base import op
from dffml.df.types import Definition

package = Definition(name="package", primitive="str")
package_version = Definition(name="package_version", primitive="str")
safety_check_number_of_issues = Definition(
    name="safety_check_number_of_issues", primitive="int"
)


@op(
    name="safety_check",
    inputs={"package": package, "version": package_version},
    outputs={"issues": safety_check_number_of_issues},
    conditions=[],
)
async def safety_check(package: str, version: str) -> Dict[str, Any]:
    pinned = f"{package}=={version}"

    proc = await asyncio.create_subprocess_exec(
        sys.executable,
        "-m",
        "safety",
        "check",
        "--stdin",
        "--json",
Beispiel #24
0
import asyncio
import concurrent.futures
from typing import Dict, Any

from dffml.df.types import Operation, Definition
from dffml.df.base import (
    op,
    OperationImplementationContext,
    OperationImplementation,
)


# Definitions
UserInput = Definition(name="UserInput", primitive="str")
DataToPrint = Definition(name="DataToPrint", primitive="generic")

AcceptUserInput = Operation(
    name="AcceptUserInput",
    inputs={},
    outputs={"InputData": UserInput},
    conditions=[],
)


class AcceptUserInputContext(OperationImplementationContext):
    @staticmethod
    def receive_input():
        print("Enter the value: ", end="")
        return input()

    async def run(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
Beispiel #25
0
import io
import os
import sys
import json
import asyncio
from typing import Dict, Any

from dffml.df.types import Definition
from dffml.df.base import op
from .pypi import package_src_dir

bandit_output = Definition(name="bandit_output", primitive="Dict[str, Any]")


@op(inputs={"pkg": package_src_dir}, outputs={"report": bandit_output})
async def run_bandit(pkg: str) -> Dict[str, Any]:
    """
    CLI usage: dffml service dev run -log debug shouldi.bandit:run_bandit -pkg .
    """
    proc = await asyncio.create_subprocess_exec(
        sys.executable,
        "-m",
        "bandit",
        "-r",
        "-f",
        "json",
        pkg,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )
Beispiel #26
0
import copy
from typing import NamedTuple

from dffml.df.base import op
from dffml.df.types import Input, Definition, DataFlow
from dffml.operation.output import GetSingle, remap


class FormatterConfig(NamedTuple):
    formatting: str


@op(
    inputs={"data": Definition(name="format_data", primitive="string")},
    outputs={"string": Definition(name="message", primitive="string")},
    config_cls=FormatterConfig,
)
def formatter(data: str, op_config: FormatterConfig):
    return {"string": op_config.formatting.format(data)}


HELLO_BLANK_DATAFLOW = DataFlow(
    operations={
        "hello_blank": formatter.op,
        "remap_to_response": remap.op
    },
    configs={
        "hello_blank": {
            "formatting": "Hello {}"
        },
        "remap_to_response": {
Beispiel #27
0
import ast
from dffml.df.types import Definition
from dffml.df.base import op

# Definitions
InputStr = Definition(name="InputStr", primitive="str")
EvaluatedStr = Definition(name="EvaluatedStr", primitive="generic")


@op(
    inputs={"str_to_eval": InputStr},
    outputs={"str_after_eval": EvaluatedStr},
    conditions=[],
)
async def literal_eval(str_to_eval: str):
    """
    Evaluate the input using ast.literal_eval()

    Parameters
    ++++++++++
    str_to_eval : str
        A string to be evaluated.

    Returns
    +++++++
    dict
        A dict containing python literal.

    Examples
    ++++++++
Beispiel #28
0
import shutil
import tempfile
from typing import Dict, Any

import aiohttp

from dffml.df.base import op
from dffml.df.types import Definition, Stage

from .safety import package, package_version
from .bandit import package_src_dir

package_json = Definition(name="package_json", primitive="Dict[str, Any]")
package_url = Definition(name="package_url", primitive="str")


@op(
    inputs={"package": package},
    outputs={"response_json": package_json},
    # imp_enter allows us to create instances of objects which are async context
    # managers and assign them to self.parent which is an object of type
    # OperationImplementation which will be alive for the lifetime of the
    # Orchestrator which runs all these operations.
    imp_enter={
        "session": (lambda self: aiohttp.ClientSession(trust_env=True))
    },
)
async def pypi_package_json(self, package: str) -> Dict[str, Any]:
    """
    Download the information on the package in JSON format.
    """
from dffml.df.types import DataFlow, Input, Definition
from dffml.df.base import op
from dffml.df.memory import MemoryOrchestrator
from dffml.operation.output import GetMulti
from dffml.util.asynctestcase import AsyncTestCase

CountStart = Definition(name="count_start", primitive="int")
Count = Definition(name="count", primitive="int")
Number = Definition(name="number", primitive="int")


@op(inputs={"count_start": CountStart}, outputs={"count": Count})
async def counter(count_start):
    for i in range(count_start, count_start + 5):
        yield {"count": i}


@op(inputs={}, outputs={"count": Count})
async def counter_auto_start():
    for i in range(5):
        yield {"count": i}


@op(inputs={"number_in": Count}, outputs={"number_out": Number})
def echo_num(number_in: int):
    return {"number_out": number_in}


class TestAsyncIter(AsyncTestCase):
    async def test_gen_with_input(self):
        test_dataflow = DataFlow.auto(GetMulti, counter, echo_num)