def test_list_action(self): dest, cls, parser = ("features", Features, list_action(Features)) namespace = Namespace(**{dest: False}) with self.subTest(single=dest): action = parser(dest=dest, option_strings="") action(None, namespace, "feed") self.assertEqual(getattr(namespace, dest, False), Features("feed")) with self.subTest(multiple=dest): action = parser(dest=dest, option_strings="") action(None, namespace, ["feed", "face"]) self.assertEqual(getattr(namespace, dest, False), Features("feed", "face"))
async def test_model(self): with tempfile.TemporaryDirectory() as tempdir, patch.object( Model, "load", new=model_load ): config = parse_unknown( "--model-directory", tempdir, "--model-features", "Years:int:1", "Experiance:int:1", ) async with self.post( "/configure/model/fake/salary", json=config ) as r: self.assertEqual(await r.json(), OK) self.assertIn("salary", self.cli.app["models"]) self.assertEqual( self.cli.app["models"]["salary"].config, FakeModelConfig( directory=tempdir, features=Features( DefFeature("Years", int, 1), DefFeature("Experiance", int, 1), ), ), ) with self.subTest(context="salaryctx"): # Create the context async with self.get( "/context/model/salary/salaryctx" ) as r: self.assertEqual(await r.json(), OK) self.assertIn( "salaryctx", self.cli.app["model_contexts"] )
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.model = Misc(ModelConfig(directory=cls.model_dir.name)) cls.feature = StartsWithA() cls.features = Features(cls.feature) cls.classifications = ['a', 'not a'] cls.repos = [ Repo('a' + str(random.random()), data={ 'features': { cls.feature.NAME: 1 }, 'classification': 'a' }) for _ in range(0, 1000) ] cls.repos += [ Repo('b' + str(random.random()), data={ 'features': { cls.feature.NAME: 0 }, 'classification': 'not a' }) for _ in range(0, 1000) ] cls.sources = \ Sources(MemorySource(MemorySourceConfig(repos=cls.repos)))
def setUpClass(cls): cls.features = Features() cls.features.append(Feature("A", str, 1)) A, X = list(zip(*DATA)) cls.records = [ Record(str(i), data={"features": { "A": A[i], "X": X[i] }}) for i in range(0, len(X)) ] cls.sources = Sources( MemorySource(MemorySourceConfig(records=cls.records))) cls.model_dir = tempfile.TemporaryDirectory() cls.model = TextClassificationModel( TextClassifierConfig( directory=cls.model_dir.name, classifications=[0, 1], features=cls.features, predict=Feature("X", int, 1), add_layers=True, layers=[ "Dense(units = 120, activation='relu')", "Dense(units = 64, activation=relu)", "Dense(units = 2, activation='softmax')", ], model_path= "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1", epochs=30, ))
class DataFlowSourceConfig: source: BaseSource = field("Source to wrap") dataflow: DataFlow = field("DataFlow to use for preprocessing") features: Features = field( "Features to pass as definitions to each context from each " "record to be preprocessed", default=Features(), ) inputs: List[str] = field( "Other inputs to add under each ctx (record's key will " + "be used as the context)", action=ParseInputsAction, default_factory=lambda: [], ) record_def: str = field( "Definition to be used for record.key." "If set, record.key will be added to the set of inputs " "under each context (which is also the record's key)", default=None, ) length: str = field("Definition name to add as source length", default=None) all_for_single: bool = field( "Run all records through dataflow before grabing " "results of desired record on a call to record()", default=False, ) no_strict: bool = field( "Do not exit on operation exceptions, just log errors", default=False, ) orchestrator: BaseOrchestrator = MemoryOrchestrator.withconfig({})
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.features = Features() if cls.MODEL_TYPE is "CLASSIFICATION": cls.features.append(DefFeature("A", float, 1)) cls.features.append(DefFeature("B", float, 1)) cls.features.append(DefFeature("C", float, 1)) cls.features.append(DefFeature("D", float, 1)) cls.features.append(DefFeature("E", float, 1)) cls.features.append(DefFeature("F", float, 1)) cls.features.append(DefFeature("G", float, 1)) cls.features.append(DefFeature("H", float, 1)) cls.features.append(DefFeature("I", float, 1)) A, B, C, D, E, F, G, H, I, X = list( zip(*FEATURE_DATA_CLASSIFICATION)) cls.repos = [ Repo( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "E": E[i], "F": F[i], "G": G[i], "H": H[i], "I": I[i], "X": X[i], } }, ) for i in range(0, len(A)) ] elif cls.MODEL_TYPE is "REGRESSION": cls.features.append(DefFeature("A", float, 1)) cls.features.append(DefFeature("B", float, 1)) cls.features.append(DefFeature("C", float, 1)) A, B, C, X = list(zip(*FEATURE_DATA_REGRESSION)) cls.repos = [ Repo( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "X": X[i], } }, ) for i in range(0, len(A)) ] cls.sources = Sources(MemorySource( MemorySourceConfig(repos=cls.repos))) cls.model = cls.MODEL( cls.MODEL_CONFIG( directory=cls.model_dir.name, predict="X", features=cls.features, ))
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.feature = Feature("starts_with_a", int, 1) cls.features = Features(cls.feature) cls.records = [ Record( "a" + str(random.random()), data={"features": { cls.feature.name: 1, "string": "a" }}, ) for _ in range(0, 1000) ] cls.records += [ Record( "b" + str(random.random()), data={"features": { cls.feature.name: 0, "string": "not a" }}, ) for _ in range(0, 1000) ] cls.sources = Sources( MemorySource(MemorySourceConfig(records=cls.records))) cls.model = DNNClassifierModel( DNNClassifierModelConfig( directory=cls.model_dir.name, steps=1000, epochs=40, hidden=[50, 20, 10], predict=Feature("string", str, 1), classifications=["a", "not a"], clstype=str, features=cls.features, ))
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.feature1 = Feature_1() cls.feature2 = Feature_2() cls.features = Features(cls.feature1, cls.feature2) cls.model = DNNRegressionModel( DNNRegressionModelConfig( directory=cls.model_dir.name, steps=1000, epochs=40, hidden=[50, 20, 10], predict=DefFeature("TARGET", float, 1), features=cls.features, )) # Generating data f(x1,x2) = 2*x1 + 3*x2 _n_data = 2000 _temp_data = np.random.rand(2, _n_data) cls.repos = [ Repo( "x" + str(random.random()), data={ "features": { cls.feature1.NAME: float(_temp_data[0][i]), cls.feature2.NAME: float(_temp_data[1][i]), "TARGET": 2 * _temp_data[0][i] + 3 * _temp_data[1][i], } }, ) for i in range(0, _n_data) ] cls.sources = Sources(MemorySource( MemorySourceConfig(repos=cls.repos)))
def setUpClass(cls): cls.feature = StartsWithA() cls.features = Features(cls.feature) cls.model_dir = tempfile.TemporaryDirectory() cls.model = MiscModel( MiscModelConfig( directory=cls.model_dir.name, classifications=["not a", "a"], features=cls.features, )) cls.repos = [ Repo( "a" + str(random.random()), data={"features": { cls.feature.NAME: 1, "string": "a" }}, ) for _ in range(0, 1000) ] cls.repos += [ Repo( "b" + str(random.random()), data={"features": { cls.feature.NAME: 0, "string": "not a" }}, ) for _ in range(0, 1000) ] cls.sources = Sources(MemorySource( MemorySourceConfig(repos=cls.repos)))
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.model = DNNClassifierModel( DNNClassifierModelConfig(directory=cls.model_dir.name, steps=1000, epochs=30, hidden=[10, 20, 10], classification="string", classifications=["a", "not a"], clstype=str)) cls.feature = StartsWithA() cls.features = Features(cls.feature) cls.repos = [ Repo( "a" + str(random.random()), data={"features": { cls.feature.NAME: 1, "string": "a" }}, ) for _ in range(0, 1000) ] cls.repos += [ Repo( "b" + str(random.random()), data={"features": { cls.feature.NAME: 0, "string": "not a" }}, ) for _ in range(0, 1000) ] cls.sources = Sources(MemorySource( MemorySourceConfig(repos=cls.repos)))
async def test_model_context_model_not_found(self): with self.assertRaisesRegex(ServerException, f"salary model not found"): features = Features() async with self.post("/context/model/salary/salaryctx", json=features.export()) as r: pass # pramga: no cov
async def test_model(self): with tempfile.TemporaryDirectory() as tempdir, patch.object( Model, "load", new=model_load): config = parse_unknown("--model-directory", tempdir) async with self.post("/configure/model/fake/salary", json=config) as r: self.assertEqual(await r.json(), OK) self.assertIn("salary", self.cli.app["models"]) self.assertEqual( self.cli.app["models"]["salary"].config, FakeModelConfig(directory=tempdir), ) with self.subTest(context="salaryctx"): # Define the features features = Features( DefFeature("Years", int, 1), DefFeature("Experiance", int, 1), ) exported_features = features.export() # Check that we can send shorthand version of feature_def for name, feature_def in exported_features.items(): del feature_def["name"] # Create the context async with self.post( "/context/model/salary/salaryctx", json=exported_features, ) as r: self.assertEqual(await r.json(), OK) self.assertIn("salaryctx", self.cli.app["model_contexts"]) self.assertEqual( self.cli.app["model_contexts"] ["salaryctx"].features.export(), features.export(), )
async def test_one_applicable_other_not(self): twob = TwoBFeatureTester() features = Features(self.two, twob) async with features: results = await features.evaluate("test") self.assertIn(self.two.NAME, results) self.assertEqual(len(results), 1) self.assertEqual(results[self.two.NAME], True)
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.features = Features() cls.features.append(Feature("A", float, 1)) cls.features.append(Feature("B", float, 1)) cls.features.append(Feature("C", float, 1)) cls.features.append(Feature("D", float, 1)) cls.features.append(Feature("E", float, 1)) cls.features.append(Feature("F", float, 1)) cls.features.append(Feature("G", int, 1)) cls.features.append(Feature("H", int, 1)) A, B, C, D, E, F, G, H, X = list(zip(*DATA)) cls.records = [ Record( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "E": E[i], "F": F[i], "G": G[i], "H": H[i], "X": X[i], } }, ) for i in range(0, len(A)) ] cls.sources = Sources( MemorySource(MemorySourceConfig(records=cls.records)) ) cls.model = VWModel( VWConfig( location=cls.model_dir.name, features=cls.features, predict=Feature("X", float, 1), # A and B will be namespace n1 # A and C will be in namespace n2 namespace=["n1_A_B", "n2_A_C"], importance=Feature("H", int, 1), tag=Feature("G", int, 1), task="regression", vwcmd=[ "l2", "0.1", "loss_function", "squared", "passes", "10", ], ) ) cls.scorer = MeanSquaredErrorAccuracy()
async def _add_memory_source(self): self.features = Features(DefFeature("by_ten", int, 1)) async with MemorySource( MemorySourceConfig(repos=[ Repo(str(i), data={"features": { "by_ten": i * 10 }}) for i in range(0, self.num_repos) ])) as source: self.source = self.cli.app["sources"][self.slabel] = source async with source() as sctx: self.sctx = self.cli.app["source_contexts"][self.slabel] = sctx yield
async def test_monitor_progess(self): progress = ProgessFeatureTester() features = Features(progress) async with features: data = await features.submit('test') logs = await data.logs() results = await data.result() self.assertTrue(logs) self.assertIn('Hi', logs) self.assertIn(progress.NAME, results) self.assertEqual(len(results), 1) self.assertEqual(results[progress.NAME], True)
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.model = SLR(SLRConfig(directory=cls.model_dir.name, predict="Y")) cls.feature = DefFeature("X", float, 1) cls.features = Features(cls.feature) X, Y = list(zip(*FEATURE_DATA)) cls.repos = [ Repo(str(i), data={"features": {"X": X[i], "Y": Y[i]}}) for i in range(0, len(Y)) ] cls.sources = Sources( MemorySource(MemorySourceConfig(repos=cls.repos)) )
def setUp(self): super().setUp() self.repo_keys = {'add 40 and 2': 42, 'multiply 42 and 10': 420} self.repos = list(map(Repo, self.repo_keys.keys())) self.sources = Sources(RepoSource(*self.repos)) self.features = Features(DefFeature('string_calculator', int, 1)) self.cli = OperationsAll(ops=OPERATIONS, opimpn_memory_opimps=OPIMPS, repo_def='calc_string', output_specs=[( ['result'], 'get_single_spec', )], remap=[('get_single', 'result', 'string_calculator')], sources=self.sources, features=self.features)
def setUpClass(cls): cls.features = Features() cls.features.append(Feature("A", str, 1)) A, X = list(zip(*DATA)) cls.records = [ Record(str(i), data={"features": { "A": A[i], "X": X[i] }}) for i in range(len(X)) ] cls.sources = Sources( MemorySource(MemorySourceConfig(records=cls.records))) cls.model_dir = tempfile.TemporaryDirectory() cls.model = HFClassificationModel( HFClassificationModelConfig( model_name_or_path="bert-base-cased", cache_dir=cls.model_dir.name, logging_dir=cls.model_dir.name, output_dir=cls.model_dir.name, features=cls.features, predict=Feature("X", int, 1), label_list=["0", "1"], ))
def test_set_timeout(self): cmd = FeaturesCMD(timeout=5, features=Features()) self.assertEqual(cmd.features.timeout, 5)
def setUpClass(cls): cls.is_multi = "MULTI_" in cls.MODEL_TYPE cls.model_dir = tempfile.TemporaryDirectory() cls.features = Features() if cls.MODEL_TYPE in classifier_types: A, B, C, D, E, F, G, H, X, Y = list( zip(*FEATURE_DATA_CLASSIFICATION) ) cls.features.append(Feature("A", float, 1)) cls.features.append(Feature("B", float, 1)) cls.features.append(Feature("C", float, 1)) cls.features.append(Feature("D", float, 1)) cls.features.append(Feature("E", float, 1)) cls.features.append(Feature("F", float, 1)) cls.features.append(Feature("G", float, 1)) cls.features.append(Feature("H", float, 1)) if cls.MODEL_TYPE == "CLASSIFICATION": cls.features.append(Feature("X", float, 1)) cls.records = [ Record( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "E": E[i], "F": F[i], "G": G[i], "H": H[i], "X": X[i], "Y": Y[i], } }, ) for i in range(0, len(A)) ] elif cls.MODEL_TYPE in regressor_types: cls.features.append(Feature("A", float, 1)) cls.features.append(Feature("B", float, 1)) cls.features.append(Feature("C", float, 1)) cls.features.append(Feature("D", float, 1)) if cls.MODEL_TYPE == "REGRESSION": cls.features.append(Feature("X", float, 1)) A, B, C, D, X, Y = list(zip(*FEATURE_DATA_REGRESSION)) cls.records = [ Record( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "X": X[i], "Y": Y[i], } }, ) for i in range(0, len(A)) ] elif cls.MODEL_TYPE == "CLUSTERING": cls.features.append(Feature("A", float, 1)) cls.features.append(Feature("B", float, 1)) cls.features.append(Feature("C", float, 1)) cls.features.append(Feature("D", float, 1)) A, B, C, D, X = list(zip(*FEATURE_DATA_CLUSTERING)) cls.records = [ Record( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "X": X[i], } }, ) for i in range(0, len(A)) ] cls.sources = Sources( MemorySource(MemorySourceConfig(records=cls.records)) ) properties = { "location": cls.model_dir.name, "features": cls.features, } config_fields = dict() estimator_type = cls.MODEL.SCIKIT_MODEL._estimator_type if estimator_type in supervised_estimators: if cls.is_multi: config_fields["predict"] = Features( Feature("X", float, 1), Feature("Y", float, 1) ) else: config_fields["predict"] = Feature("X", float, 1) elif estimator_type in unsupervised_estimators: # TODO If cls.TRUE_CLSTR_PRESENT then we want to use the # mutual_info_score scikit accuracy scorer. In this case we might # want to change tcluster to a boolean config property. # For more info see commit e4f523976bf37d3457cda140ceab7899420ae2c7 config_fields["predict"] = Feature("X", float, 1) cls.model = cls.MODEL( cls.MODEL_CONFIG(**{**properties, **config_fields}) ) cls.scorer = cls.SCORER()
def setUp(self): self.cli = Train(model=FakeModel(), model_dir=None, sources=Sources(RepoSource()), features=Features())
def setUp(self): self.one = Feature("one", int, 1) self.two = Feature("two", float, 2) self.three = Feature("three", int, 1) self.features = Features(self.one, self.two, self.three)
def setUp(self): self.cli = Accuracy(model=FakeModel(), sources=Sources(RepoSource()), features=Features())
def setUp(self): self.repos = [Repo(str(random.random())) for _ in range(0, 10)] self.sources = Sources(RepoSource(*self.repos)) self.features = Features(FakeFeature())
def setUpClass(cls): cls.model_dir = tempfile.TemporaryDirectory() cls.features = Features() if cls.MODEL_TYPE is "CLASSIFICATION": cls.features.append(DefFeature("A", float, 1)) cls.features.append(DefFeature("B", float, 1)) cls.features.append(DefFeature("C", float, 1)) cls.features.append(DefFeature("D", float, 1)) cls.features.append(DefFeature("E", float, 1)) cls.features.append(DefFeature("F", float, 1)) cls.features.append(DefFeature("G", float, 1)) cls.features.append(DefFeature("H", float, 1)) cls.features.append(DefFeature("I", float, 1)) A, B, C, D, E, F, G, H, I, X = list( zip(*FEATURE_DATA_CLASSIFICATION)) cls.repos = [ Repo( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "E": E[i], "F": F[i], "G": G[i], "H": H[i], "I": I[i], "X": X[i], } }, ) for i in range(0, len(A)) ] elif cls.MODEL_TYPE is "REGRESSION": cls.features.append(DefFeature("A", float, 1)) cls.features.append(DefFeature("B", float, 1)) cls.features.append(DefFeature("C", float, 1)) A, B, C, X = list(zip(*FEATURE_DATA_REGRESSION)) cls.repos = [ Repo( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "X": X[i], } }, ) for i in range(0, len(A)) ] elif cls.MODEL_TYPE is "CLUSTERING": cls.features.append(DefFeature("A", float, 1)) cls.features.append(DefFeature("B", float, 1)) cls.features.append(DefFeature("C", float, 1)) cls.features.append(DefFeature("D", float, 1)) A, B, C, D, X = list(zip(*FEATURE_DATA_CLUSTERING)) cls.repos = [ Repo( str(i), data={ "features": { "A": A[i], "B": B[i], "C": C[i], "D": D[i], "X": X[i], } }, ) for i in range(0, len(A)) ] cls.sources = Sources(MemorySource( MemorySourceConfig(repos=cls.repos))) properties = { "directory": cls.model_dir.name, "features": cls.features, } config_fields = dict() estimator_type = cls.MODEL.SCIKIT_MODEL._estimator_type if estimator_type in supervised_estimators: config_fields["predict"] = DefFeature("X", float, 1) elif estimator_type in unsupervised_estimators: if cls.TRUE_CLSTR_PRESENT: config_fields["tcluster"] = DefFeature("X", float, 1) cls.model = cls.MODEL( cls.MODEL_CONFIG(**{ **properties, **config_fields }))
# pylint: disable=missing-docstring,no-self-use import sys import inspect import unittest from dffml.feature import Feature, Features from dffml.util.asynctestcase import AsyncTestCase # feature_name based features from dffml_feature_feature_name.feature.misc import MiscFeature FEATURES = Features(*[feature() for _name, feature in \ inspect.getmembers(sys.modules[__name__], lambda feature: \ bool(inspect.isclass(feature) and \ issubclass(feature, Feature) and \ feature is not Feature))]) class TestFeature(unittest.TestCase): ''' Test that the model for this feature was pip installed an loads as a plugin to dffml. ''' def test_load_builtin_features(self): features = Feature.load() for mustLoad in FEATURES: with self.subTest(mustLoad=mustLoad): self.assertIn(mustLoad.__class__, features) class Testfeature_nameFeatures(AsyncTestCase):
from dffml_feature_git.feature.cloc import GitClocFeature from dffml_feature_git.feature.lang import GitLangsFeature, GitLangFeature from dffml_feature_git.feature.work import GitWorkFeature from dffml_feature_git.feature.release import GitReleaseFeature from dffml_feature_git.feature.commits import GitCommitsFeature from dffml_feature_git.feature.authors import GitAuthorsFeature FEATURES = [ # Git repo features GitCommitsFeature, GitAuthorsFeature, GitWorkFeature, GitClocFeature, GitReleaseFeature, ] GIT_FEATURES = Features( *[feature() for feature in FEATURES if issubclass(feature, GitFeature)]) class TestFeature(unittest.TestCase): def test_load_builtin_features(self): features = Feature.load() for mustLoad in FEATURES: with self.subTest(mustLoad=mustLoad): self.assertIn(mustLoad, features) class TestGitFeatures(AsyncTestCase): async def test_git_features(self): async with GIT_FEATURES: for src_url in ['https://github.com/tpm2-software/tpm2-tss', 'https://github.com/github/gitignore']:
def setUp(self): self.src_url = 'https://github.com/tpm2-software/tpm2-tss' self.features = Features(GitLangFeature())
def setUp(self): self.one = OneFeatureTester() self.two = TwoFeatureTester() self.three = ThreeFeatureTester() self.features = Features(self.one, self.two, self.three)