async def test_associatedefinition(self): feed_def = Definition(name="feed", primitive="string") dead_def = Definition(name="dead", primitive="string") output = Definition(name="output", primitive="string") feed_input = Input(value="my favorite value", definition=feed_def) face_input = Input( value="face", definition=output, parents=[feed_input] ) dead_input = Input( value="my second favorite value", definition=dead_def ) beef_input = Input( value="beef", definition=output, parents=[dead_input] ) test_result = {"feed": "face", "dead": "beef"} for test_value in test_result.keys(): async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(AssociateDefinition), [ feed_input, face_input, dead_input, beef_input, Input( value={test_value: "output"}, definition=AssociateDefinition.op.inputs["spec"], ), ], ): self.assertEqual( results, {test_value: test_result[test_value]} )
async def input_set(self, record: Record) -> List[Input]: return ([ Input( value=record.feature(feature.name), definition=Definition( name=feature.name, primitive=str(feature.dtype()), ), ) for feature in self.parent.config.features ] + [ Input( value=value, definition=self.parent.config.dataflow.definitions[name], ) for value, name in self.parent.config.inputs ] + ([] if not self.parent.config.length else [ Input( value=await self.sctx.length(), definition=Definition( name=self.parent.config.length, primitive="int", ), ) ]) + ([] if not self.parent.config.record_def else [ Input( value=record.key, definition=Definition( name=self.parent.config.record_def, primitive="string", ), ) ]))
def _create_dataflow(self, input_, output): dataflow = create_archive_dataflow({ Input( value=input_, definition=Definition("test_inp", primitive="str"), origin="input_path", ), Input( value=output, definition=Definition("test_out", primitive="str"), origin="output_path", ), }) return dataflow
async def records(self) -> AsyncIterator[Record]: async for record in self.sctx.records(): async for ctx, result in self.octx.run([ Input( value=record.feature(feature.name), definition=Definition(name=feature.name, primitive=str(feature.dtype())), ) for feature in self.parent.config.features ]): if result: record.evaluated(result) yield record
ngram_range=ngram_range, analyzer=analyzer, max_df=max_df, min_df=min_df, max_features=max_features, vocabulary=vocabulary, binary=binary, ) names = None X = vectorizer.fit_transform(text).toarray() if get_feature_names: names = vectorizer.get_feature_names() return [X, names] collected_data = Definition(name="collected_data", primitive="List[str]") data_received = Definition(name="data_received", primitive="bool") @op( inputs={"data": collected_data}, outputs={"status": data_received}, ) def get_status_collected_data(data): return {"status": True} data_example = Definition(name="data_example", primitive="str") @op( inputs={"data": data_example}, outputs={"all_data": collected_data},
from typing import Dict, Any from dffml.base import config from dffml.df.base import op from dffml.df.types import DataFlow, Input, Definition @config class RunDataFlowConfig: dataflow: DataFlow @op( name="dffml.dataflow.run", inputs={ "inputs": Definition(name="flow_inputs", primitive="Dict[str,Any]") }, outputs={ "results": Definition(name="flow_results", primitive="Dict[str,Any]") }, config_cls=RunDataFlowConfig, expand=["results"], ) async def run_dataflow(self, inputs: Dict[str, Any]) -> Dict[str, Any]: """ Starts a subflow ``self.config.dataflow`` and adds ``inputs`` in it. Parameters ---------- inputs : dict The inputs to add to the subflow. These should be a key value mapping of
if custom_stop_words: if token.text not in custom_stop_words: all_tokens.append(token.text) else: all_tokens.append(token.text) # Create list of word tokens after removing stopwords for word in all_tokens: lexeme = nlp.vocab[word] if lexeme.is_stop == False: clean_tokens.append(word) return " ".join(clean_tokens) # Definitions text_def = Definition(name="text_def", primitive="str") max_len_def = Definition(name="max_len_def", primitive="int") pad_token_def = Definition(name="pad_token_def", primitive="str") spacy_model_name_def = Definition(name="spacy_model_name_def", primitive="str") embedding_def = Definition(name="embedding", primitive="generic") @op( name="get_embedding", inputs={ "text": text_def, "spacy_model": spacy_model_name_def, "max_len": max_len_def, "pad_token": pad_token_def, }, outputs={"embedding": embedding_def},
import sys from dffml.df.types import Definition definitions = [ Definition(name="input_data", primitive="List[List[int]]"), Definition(name="output_data", primitive="List[List[int]]"), Definition(name="n_components", primitive="int"), Definition(name="n_iter", primitive="int"), Definition(name="random_state", primitive="int"), Definition(name="missing_values", primitive="Any"), Definition(name="strategy", primitive="str"), Definition(name="categories", primitive="List[List[Any]]"), ] for definition in definitions: setattr(sys.modules[__name__], definition.name, definition)
async def test_config_default_label(self): with patch.object(BaseKeyValueStore, "load", load_kvstore_with_args): was = MemoryRedundancyChecker.config(await parse_unknown( "--rchecker-memory-kvstore", "withargs", "--rchecker-memory-kvstore-withargs-filename", "somefile", )) self.assertEqual(type(was), MemoryRedundancyCheckerConfig) self.assertEqual(type(was.kvstore), KeyValueStoreWithArguments) self.assertEqual(type(was.kvstore.config), KeyValueStoreWithArgumentsConfig) self.assertEqual(was.kvstore.config.filename, "somefile") CONDITION = Definition(name="condition", primitive="boolean") class TestMemoryOrchestrator(AsyncTestCase): async def test_condition_does_run(self): ran = [] @op(conditions=[CONDITION]) async def condition_test(hi: str): ran.append(True) async with MemoryOrchestrator() as orchestrator: async with orchestrator(DataFlow(condition_test)) as octx: async for _ in octx.run([ Input( value=True,
import json import asyncio from typing import Dict, Any from dffml.df.base import op from dffml.df.types import Definition package_src_dir = Definition(name="package_src_dir", primitive="str") golangci_lint_output = Definition(name="golangci_lint_output", primitive="Dict[str, Any]") class GoLangCILintError(Exception): """ Raised when golangci-lint fails """ @op(inputs={"pkg": package_src_dir}, outputs={"report": golangci_lint_output}) async def run_golangci_lint(pkg: str) -> Dict[str, Any]: """ CLI usage: dffml service dev run -log debug shouldi.golangci_lint:run_golangci_lint -pkg . """ proc = await asyncio.create_subprocess_exec( "golangci-lint", "run", "--out-format", "json", "./...", cwd=pkg, stdout=asyncio.subprocess.PIPE,
import aiohttp from typing import Dict, Any from dffml.df.types import Definition from dffml.df.base import op package = Definition(name="package", primitive="str") package_version = Definition(name="package_version", primitive="str") @op( inputs={"package": package}, outputs={"version": package_version}, imp_enter={ "session": (lambda self: aiohttp.ClientSession(trust_env=True)) }, ) async def pypi_latest_package_version(self, package: str) -> Dict[str, Any]: url = f"https://pypi.org/pypi/{package}/json" async with self.parent.session.get(url) as resp: package = await resp.json() return {"version": package["info"]["version"]}
from dffml.df.types import Definition class GitRepoSpec(NamedTuple): directory: str URL: str = None class GitRepoCheckedOutSpec(NamedTuple): directory: str URL: str = None commit: str = None definitions = [ Definition(name="quarter_start_date", primitive="int"), Definition(name="quarter", primitive="int"), Definition(name="quarters", primitive="int"), Definition(name="URL", primitive="string"), Definition(name="valid_git_repository_URL", primitive="boolean"), Definition(name="git_branch", primitive="str"), Definition( name="git_repository", primitive="Dict[str, str]", lock=True, spec=GitRepoSpec, ), Definition( name="git_repository_checked_out", primitive="Dict[str, str]", lock=True,
import aiohttp import tempfile import shutil from typing import Dict, Any from dffml.df.types import Definition, Stage from dffml.df.base import op package = Definition(name="package", primitive="str") package_json = Definition(name="package_json", primitive="Dict[str, Any]") package_version = Definition(name="package_version", primitive="str") package_url = Definition(name="package_url", primitive="str") package_src_dir = Definition(name="package_src_dir", primitive="str") @op( inputs={"package": package}, outputs={"response_json": package_json}, imp_enter={ "session": (lambda self: aiohttp.ClientSession(trust_env=True)) }, ) async def pypi_package_json(self, package: str) -> Dict[str, Any]: url = f"https://pypi.org/pypi/{package}/json" async with self.parent.session.get(url) as resp: package_json = await resp.json() return {"response_json": package_json} @op( inputs={"response_json": package_json},
from dffml.df.base import op from dffml.df.types import DataFlow, Input, Definition from dffml.operation.output import GetSingle from dffml.util.asynctestcase import AsyncTestCase from dffml.df.memory import MemoryOrchestrator from dffml.operation.mapping import MAPPING from dffml.df.exceptions import InputValidationError def pie_validation(x): if x == 3.14: return x raise InputValidationError() Pie = Definition(name="pie", primitive="float", validate=pie_validation) Radius = Definition(name="radius", primitive="float") Area = Definition(name="area", primitive="float") ShapeName = Definition(name="shape_name", primitive="str", validate=lambda x: x.upper()) @op( inputs={ "name": ShapeName, "radius": Radius, "pie": Pie }, outputs={"shape": MAPPING}, )
from dffml.source.df import DataFlowSource, DataFlowSourceConfig from dffml.util.asynctestcase import AsyncTestCase from dffml.feature import Features, Feature from dffml.source.source import Sources from dffml.source.memory import MemorySource, MemorySourceConfig from dffml.record import Record from dffml.df.base import op from dffml.df.types import Input, DataFlow, Definition, InputFlow from dffml.operation.output import AssociateDefinition edit_feature_def = Definition(name="feature_data", primitive="generic") @op( name="edit_feature", inputs={"features": edit_feature_def}, outputs={"updated_features": edit_feature_def}, ) async def edit_feature(features): value = features * 10 return {"updated_features": value} FEATURE_DATA = [ [0, 1, 0.1, 10], [1, 3, 0.2, 20], [2, 5, 0.3, 30], [3, 7, 0.4, 40], ] A, B, C, D = list(zip(*FEATURE_DATA))
import sys from dffml.df.types import Definition definitions = [ Definition(name="input_file", primitive="bytes"), Definition(name="Resolution", primitive="int"), Definition(name="output_file", primitive="bytes"), ] for definition in definitions: setattr(sys.modules[__name__], definition.name, definition)
import sys from dffml.df.types import Definition definitions = [ Definition(name="webhook_headers", primitive="Dict[str,Any]"), Definition(name="payload", primitive="bytes"), Definition(name="git_payload", primitive="Dict[str,Any]"), Definition(name="docker_image_id", primitive="str"), Definition(name="is_default_branch", primitive="bool"), Definition(name="docker_image_tag", primitive="str"), Definition(name="docker_running_containers", primitive="List[str]"), Definition(name="got_running_containers", primitive="bool"), Definition(name="is_image_built", primitive="bool"), Definition(name="docker_commands", primitive="Dict[str,Any]"), Definition(name="docker_restarted_containers", primitive="str"), ] for definition in definitions: setattr(sys.modules[__name__], definition.name, definition)
from dffml.df.base import op from dffml.df.types import DataFlow, Input, Definition from dffml.operation.output import GetSingle from dffml.util.asynctestcase import AsyncTestCase from dffml.df.memory import MemoryOrchestrator from dffml.operation.mapping import MAPPING from dffml.df.exceptions import InputValidationError def pie_validation(x): if x == 3.14: return x raise InputValidationError() Pie = Definition(name="pie", primitive="float", validate=pie_validation) Radius = Definition(name="radius", primitive="float") Area = Definition(name="area", primitive="float") ShapeName = Definition(name="shape_name", primitive="str", validate=lambda x: x.upper()) SHOUTIN = Definition(name="shout_in", primitive="str", validate="validate_shout_instance") SHOUTOUT = Definition(name="shout_out", primitive="str") @op( inputs={ "name": ShapeName, "radius": Radius,
from dffml.df.base import op from dffml.df.types import DataFlow, Input, Definition from dffml.operation.output import GetSingle from dffml.util.asynctestcase import AsyncTestCase from dffml.df.memory import MemoryOrchestrator STRING = Definition(name="string", primitive="str") @op(inputs={}, outputs={"string_out": STRING}) async def announce(): return {"string_out": "EXISTS"} class TestAutoStart(AsyncTestCase): async def setUp(self): await super().setUp() dataflow = DataFlow( operations={ "announce": announce.op, "get_single": GetSingle.imp.op, }, seed=[ Input( value=[announce.op.outputs["string_out"].name], definition=GetSingle.op.inputs["spec"], ) ], implementations={announce.op.name: announce.imp}, )
import io import json import asyncio from typing import Dict, Any from dffml.df.types import Definition from dffml.df.base import op from .pypi import package, package_version safety_check_number_of_issues = Definition( name="safety_check_number_of_issues", primitive="int" ) @op( name="safety_check", inputs={"package": package, "version": package_version}, outputs={"issues": safety_check_number_of_issues}, conditions=[], ) async def safety_check(package: str, version: str) -> Dict[str, Any]: pinned = f"{package}=={version}" proc = await asyncio.create_subprocess_exec( "safety", "check", "--stdin", "--json", stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE,
OperationImplementationNotInstantiable, OperationImplementationNotInstantiated, ) from dffml.df.memory import ( MemoryKeyValueStore, MemoryOperationImplementationNetwork, MemoryOrchestrator, MemoryInputSet, MemoryInputSetConfig, ) from dffml.df.archive import create_archive_dataflow from dffml.operation.output import GetSingle from dffml.util.asynctestcase import AsyncTestCase definitions = [ Definition(name="calc_string", primitive="string"), Definition(name="is_add", primitive="bool"), Definition(name="is_mult", primitive="bool"), Definition(name="numbers", primitive="List[int]"), Definition(name="result", primitive="int"), ] for definition in definitions: setattr(sys.modules[__name__], definition.name, definition) @op(inputs={"numbers": numbers}, outputs={"sum": result}, conditions=[is_add]) async def add(numbers: List[int]): return {"sum": sum(numbers)}
import sys import json import asyncio from typing import Dict, Any from dffml.df.base import op from dffml.df.types import Definition package_src_dir = Definition(name="package_src_dir", primitive="str") bandit_output = Definition(name="bandit_output", primitive="Dict[str, Any]") @op(inputs={"pkg": package_src_dir}, outputs={"report": bandit_output}) async def run_bandit(pkg: str) -> Dict[str, Any]: """ CLI usage: dffml service dev run -log debug shouldi.bandit:run_bandit -pkg . """ proc = await asyncio.create_subprocess_exec( sys.executable, "-m", "bandit", "-r", "-f", "json", pkg, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) stdout, _stderr = await proc.communicate() if len(stdout) == 0:
import sys import json import asyncio from typing import Dict, Any from dffml.df.base import op from dffml.df.types import Definition package = Definition(name="package", primitive="str") package_version = Definition(name="package_version", primitive="str") safety_check_number_of_issues = Definition( name="safety_check_number_of_issues", primitive="int" ) @op( name="safety_check", inputs={"package": package, "version": package_version}, outputs={"issues": safety_check_number_of_issues}, conditions=[], ) async def safety_check(package: str, version: str) -> Dict[str, Any]: pinned = f"{package}=={version}" proc = await asyncio.create_subprocess_exec( sys.executable, "-m", "safety", "check", "--stdin", "--json",
import asyncio import concurrent.futures from typing import Dict, Any from dffml.df.types import Operation, Definition from dffml.df.base import ( op, OperationImplementationContext, OperationImplementation, ) # Definitions UserInput = Definition(name="UserInput", primitive="str") DataToPrint = Definition(name="DataToPrint", primitive="generic") AcceptUserInput = Operation( name="AcceptUserInput", inputs={}, outputs={"InputData": UserInput}, conditions=[], ) class AcceptUserInputContext(OperationImplementationContext): @staticmethod def receive_input(): print("Enter the value: ", end="") return input() async def run(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
import io import os import sys import json import asyncio from typing import Dict, Any from dffml.df.types import Definition from dffml.df.base import op from .pypi import package_src_dir bandit_output = Definition(name="bandit_output", primitive="Dict[str, Any]") @op(inputs={"pkg": package_src_dir}, outputs={"report": bandit_output}) async def run_bandit(pkg: str) -> Dict[str, Any]: """ CLI usage: dffml service dev run -log debug shouldi.bandit:run_bandit -pkg . """ proc = await asyncio.create_subprocess_exec( sys.executable, "-m", "bandit", "-r", "-f", "json", pkg, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, )
import copy from typing import NamedTuple from dffml.df.base import op from dffml.df.types import Input, Definition, DataFlow from dffml.operation.output import GetSingle, remap class FormatterConfig(NamedTuple): formatting: str @op( inputs={"data": Definition(name="format_data", primitive="string")}, outputs={"string": Definition(name="message", primitive="string")}, config_cls=FormatterConfig, ) def formatter(data: str, op_config: FormatterConfig): return {"string": op_config.formatting.format(data)} HELLO_BLANK_DATAFLOW = DataFlow( operations={ "hello_blank": formatter.op, "remap_to_response": remap.op }, configs={ "hello_blank": { "formatting": "Hello {}" }, "remap_to_response": {
import ast from dffml.df.types import Definition from dffml.df.base import op # Definitions InputStr = Definition(name="InputStr", primitive="str") EvaluatedStr = Definition(name="EvaluatedStr", primitive="generic") @op( inputs={"str_to_eval": InputStr}, outputs={"str_after_eval": EvaluatedStr}, conditions=[], ) async def literal_eval(str_to_eval: str): """ Evaluate the input using ast.literal_eval() Parameters ++++++++++ str_to_eval : str A string to be evaluated. Returns +++++++ dict A dict containing python literal. Examples ++++++++
import shutil import tempfile from typing import Dict, Any import aiohttp from dffml.df.base import op from dffml.df.types import Definition, Stage from .safety import package, package_version from .bandit import package_src_dir package_json = Definition(name="package_json", primitive="Dict[str, Any]") package_url = Definition(name="package_url", primitive="str") @op( inputs={"package": package}, outputs={"response_json": package_json}, # imp_enter allows us to create instances of objects which are async context # managers and assign them to self.parent which is an object of type # OperationImplementation which will be alive for the lifetime of the # Orchestrator which runs all these operations. imp_enter={ "session": (lambda self: aiohttp.ClientSession(trust_env=True)) }, ) async def pypi_package_json(self, package: str) -> Dict[str, Any]: """ Download the information on the package in JSON format. """
from dffml.df.types import DataFlow, Input, Definition from dffml.df.base import op from dffml.df.memory import MemoryOrchestrator from dffml.operation.output import GetMulti from dffml.util.asynctestcase import AsyncTestCase CountStart = Definition(name="count_start", primitive="int") Count = Definition(name="count", primitive="int") Number = Definition(name="number", primitive="int") @op(inputs={"count_start": CountStart}, outputs={"count": Count}) async def counter(count_start): for i in range(count_start, count_start + 5): yield {"count": i} @op(inputs={}, outputs={"count": Count}) async def counter_auto_start(): for i in range(5): yield {"count": i} @op(inputs={"number_in": Count}, outputs={"number_out": Number}) def echo_num(number_in: int): return {"number_out": number_in} class TestAsyncIter(AsyncTestCase): async def test_gen_with_input(self): test_dataflow = DataFlow.auto(GetMulti, counter, echo_num)