Example #1
0
 async def test_model(self):
     with tempfile.TemporaryDirectory() as tempdir, patch.object(
             Model, "load", new=model_load):
         config = parse_unknown("--model-directory", tempdir)
         async with self.post("/configure/model/fake/salary",
                              json=config) as r:
             self.assertEqual(await r.json(), OK)
             self.assertIn("salary", self.cli.app["models"])
             self.assertEqual(
                 self.cli.app["models"]["salary"].config,
                 FakeModelConfig(directory=tempdir),
             )
             with self.subTest(context="salaryctx"):
                 # Define the features
                 features = Features(
                     DefFeature("Years", int, 1),
                     DefFeature("Experiance", int, 1),
                 )
                 exported_features = features.export()
                 # Check that we can send shorthand version of feature_def
                 for name, feature_def in exported_features.items():
                     del feature_def["name"]
                 # Create the context
                 async with self.post(
                         "/context/model/salary/salaryctx",
                         json=exported_features,
                 ) as r:
                     self.assertEqual(await r.json(), OK)
                     self.assertIn("salaryctx",
                                   self.cli.app["model_contexts"])
                     self.assertEqual(
                         self.cli.app["model_contexts"]
                         ["salaryctx"].features.export(),
                         features.export(),
                     )
Example #2
0
 async def test_model_context_model_not_found(self):
     with self.assertRaisesRegex(ServerException,
                                 f"salary model not found"):
         features = Features()
         async with self.post("/context/model/salary/salaryctx",
                              json=features.export()) as r:
             pass  # pramga: no cov
Example #3
0
 def test_list_action(self):
     dest, cls, parser = ("features", Features, list_action(Features))
     namespace = Namespace(**{dest: False})
     with self.subTest(single=dest):
         action = parser(dest=dest, option_strings="")
         action(None, namespace, "feed")
         self.assertEqual(getattr(namespace, dest, False), Features("feed"))
     with self.subTest(multiple=dest):
         action = parser(dest=dest, option_strings="")
         action(None, namespace, ["feed", "face"])
         self.assertEqual(getattr(namespace, dest, False),
                          Features("feed", "face"))
Example #4
0
 def setUpClass(cls):
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = MiscModel(
         MiscModelConfig(
             directory=cls.model_dir.name,
             classifications=["not a", "a"],
             features=cls.features,
         ))
     cls.repos = [
         Repo(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.repos += [
         Repo(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))
Example #5
0
 def setUpClass(cls):
     cls.features = Features()
     cls.features.append(Feature("A", str, 1))
     A, X = list(zip(*DATA))
     cls.records = [
         Record(str(i), data={"features": {
             "A": A[i],
             "X": X[i]
         }}) for i in range(0, len(X))
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = TextClassificationModel(
         TextClassifierConfig(
             directory=cls.model_dir.name,
             classifications=[0, 1],
             features=cls.features,
             predict=Feature("X", int, 1),
             add_layers=True,
             layers=[
                 "Dense(units = 120, activation='relu')",
                 "Dense(units = 64, activation=relu)",
                 "Dense(units = 2, activation='softmax')",
             ],
             model_path=
             "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1",
             epochs=30,
         ))
Example #6
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = Misc(ModelConfig(directory=cls.model_dir.name))
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.classifications = ['a', 'not a']
     cls.repos = [
         Repo('a' + str(random.random()),
              data={
                  'features': {
                      cls.feature.NAME: 1
                  },
                  'classification': 'a'
              }) for _ in range(0, 1000)
     ]
     cls.repos += [
         Repo('b' + str(random.random()),
              data={
                  'features': {
                      cls.feature.NAME: 0
                  },
                  'classification': 'not a'
              }) for _ in range(0, 1000)
     ]
     cls.sources = \
         Sources(MemorySource(MemorySourceConfig(repos=cls.repos)))
    async def context_model(self, request):
        label = request.match_info["label"]
        ctx_label = request.match_info["ctx_label"]

        if not label in request.app["models"]:
            return web.json_response(
                {"error": f"{label} model not found"},
                status=HTTPStatus.NOT_FOUND,
            )

        features_dict = await request.json()

        try:
            features = Features._fromdict(**features_dict)
        except:
            return web.json_response(
                {"error": "Incorrect format for features"},
                status=HTTPStatus.BAD_REQUEST,
            )

        # Enter the model context and pass the features
        exit_stack = request.app["exit_stack"]
        model = request.app["models"][label]
        mctx = await exit_stack.enter_async_context(model(features))
        request.app["model_contexts"][ctx_label] = mctx

        return web.json_response(OK)
Example #8
0
 async def test_model(self):
     with tempfile.TemporaryDirectory() as tempdir, patch.object(
         Model, "load", new=model_load
     ):
         config = parse_unknown(
             "--model-directory",
             tempdir,
             "--model-features",
             "Years:int:1",
             "Experiance:int:1",
         )
         async with self.post(
             "/configure/model/fake/salary", json=config
         ) as r:
             self.assertEqual(await r.json(), OK)
             self.assertIn("salary", self.cli.app["models"])
             self.assertEqual(
                 self.cli.app["models"]["salary"].config,
                 FakeModelConfig(
                     directory=tempdir,
                     features=Features(
                         DefFeature("Years", int, 1),
                         DefFeature("Experiance", int, 1),
                     ),
                 ),
             )
             with self.subTest(context="salaryctx"):
                 # Create the context
                 async with self.get(
                     "/context/model/salary/salaryctx"
                 ) as r:
                     self.assertEqual(await r.json(), OK)
                     self.assertIn(
                         "salaryctx", self.cli.app["model_contexts"]
                     )
Example #9
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature = Feature("starts_with_a", int, 1)
     cls.features = Features(cls.feature)
     cls.records = [
         Record(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.name: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.records += [
         Record(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.name: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model = DNNClassifierModel(
         DNNClassifierModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=Feature("string", str, 1),
             classifications=["a", "not a"],
             clstype=str,
             features=cls.features,
         ))
Example #10
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.features = Features()
     if cls.MODEL_TYPE is "CLASSIFICATION":
         cls.features.append(DefFeature("A", float, 1))
         cls.features.append(DefFeature("B", float, 1))
         cls.features.append(DefFeature("C", float, 1))
         cls.features.append(DefFeature("D", float, 1))
         cls.features.append(DefFeature("E", float, 1))
         cls.features.append(DefFeature("F", float, 1))
         cls.features.append(DefFeature("G", float, 1))
         cls.features.append(DefFeature("H", float, 1))
         cls.features.append(DefFeature("I", float, 1))
         A, B, C, D, E, F, G, H, I, X = list(
             zip(*FEATURE_DATA_CLASSIFICATION))
         cls.repos = [
             Repo(
                 str(i),
                 data={
                     "features": {
                         "A": A[i],
                         "B": B[i],
                         "C": C[i],
                         "D": D[i],
                         "E": E[i],
                         "F": F[i],
                         "G": G[i],
                         "H": H[i],
                         "I": I[i],
                         "X": X[i],
                     }
                 },
             ) for i in range(0, len(A))
         ]
     elif cls.MODEL_TYPE is "REGRESSION":
         cls.features.append(DefFeature("A", float, 1))
         cls.features.append(DefFeature("B", float, 1))
         cls.features.append(DefFeature("C", float, 1))
         A, B, C, X = list(zip(*FEATURE_DATA_REGRESSION))
         cls.repos = [
             Repo(
                 str(i),
                 data={
                     "features": {
                         "A": A[i],
                         "B": B[i],
                         "C": C[i],
                         "X": X[i],
                     }
                 },
             ) for i in range(0, len(A))
         ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))
     cls.model = cls.MODEL(
         cls.MODEL_CONFIG(
             directory=cls.model_dir.name,
             predict="X",
             features=cls.features,
         ))
Example #11
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature1 = Feature_1()
     cls.feature2 = Feature_2()
     cls.features = Features(cls.feature1, cls.feature2)
     cls.model = DNNRegressionModel(
         DNNRegressionModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=DefFeature("TARGET", float, 1),
             features=cls.features,
         ))
     # Generating data f(x1,x2) = 2*x1 + 3*x2
     _n_data = 2000
     _temp_data = np.random.rand(2, _n_data)
     cls.repos = [
         Repo(
             "x" + str(random.random()),
             data={
                 "features": {
                     cls.feature1.NAME: float(_temp_data[0][i]),
                     cls.feature2.NAME: float(_temp_data[1][i]),
                     "TARGET": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                 }
             },
         ) for i in range(0, _n_data)
     ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))
Example #12
0
class DataFlowSourceConfig:
    source: BaseSource = field("Source to wrap")
    dataflow: DataFlow = field("DataFlow to use for preprocessing")
    features: Features = field(
        "Features to pass as definitions to each context from each "
        "record to be preprocessed",
        default=Features(),
    )
    inputs: List[str] = field(
        "Other inputs to add under each ctx (record's key will " +
        "be used as the context)",
        action=ParseInputsAction,
        default_factory=lambda: [],
    )
    record_def: str = field(
        "Definition to be used for record.key."
        "If set, record.key will be added to the set of inputs "
        "under each context (which is also the record's key)",
        default=None,
    )
    length: str = field("Definition name to add as source length",
                        default=None)
    all_for_single: bool = field(
        "Run all records through dataflow before grabing "
        "results of desired record on a call to record()",
        default=False,
    )
    no_strict: bool = field(
        "Do not exit on operation exceptions, just log errors",
        default=False,
    )
    orchestrator: BaseOrchestrator = MemoryOrchestrator.withconfig({})
Example #13
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = DNNClassifierModel(
         DNNClassifierModelConfig(directory=cls.model_dir.name,
                                  steps=1000,
                                  epochs=30,
                                  hidden=[10, 20, 10],
                                  classification="string",
                                  classifications=["a", "not a"],
                                  clstype=str))
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.repos = [
         Repo(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.repos += [
         Repo(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))
Example #14
0
 async def test_one_applicable_other_not(self):
     twob = TwoBFeatureTester()
     features = Features(self.two, twob)
     async with features:
         results = await features.evaluate("test")
         self.assertIn(self.two.NAME, results)
         self.assertEqual(len(results), 1)
         self.assertEqual(results[self.two.NAME], True)
Example #15
0
    def setUpClass(cls):
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        cls.features.append(Feature("A", float, 1))
        cls.features.append(Feature("B", float, 1))
        cls.features.append(Feature("C", float, 1))
        cls.features.append(Feature("D", float, 1))
        cls.features.append(Feature("E", float, 1))
        cls.features.append(Feature("F", float, 1))
        cls.features.append(Feature("G", int, 1))
        cls.features.append(Feature("H", int, 1))

        A, B, C, D, E, F, G, H, X = list(zip(*DATA))
        cls.records = [
            Record(
                str(i),
                data={
                    "features": {
                        "A": A[i],
                        "B": B[i],
                        "C": C[i],
                        "D": D[i],
                        "E": E[i],
                        "F": F[i],
                        "G": G[i],
                        "H": H[i],
                        "X": X[i],
                    }
                },
            )
            for i in range(0, len(A))
        ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records))
        )
        cls.model = VWModel(
            VWConfig(
                location=cls.model_dir.name,
                features=cls.features,
                predict=Feature("X", float, 1),
                # A and B will be namespace n1
                # A and C will be in namespace n2
                namespace=["n1_A_B", "n2_A_C"],
                importance=Feature("H", int, 1),
                tag=Feature("G", int, 1),
                task="regression",
                vwcmd=[
                    "l2",
                    "0.1",
                    "loss_function",
                    "squared",
                    "passes",
                    "10",
                ],
            )
        )
        cls.scorer = MeanSquaredErrorAccuracy()
Example #16
0
 def test_load_defs(self):
     no_def, (one, two) = Features.load_defs('na', 'def:one:float:10',
                                             'def:two:bool:1')
     self.assertEqual(no_def, ['na'])
     self.assertEqual(one.NAME, 'one')
     self.assertEqual(one.dtype(), float)
     self.assertEqual(one.length(), 10)
     self.assertEqual(two.NAME, 'two')
     self.assertEqual(two.dtype(), bool)
     self.assertEqual(two.length(), 1)
Example #17
0
class TestFeatures(AsyncTestCase):
    def setUp(self):
        self.one = Feature("one", int, 1)
        self.two = Feature("two", float, 2)
        self.three = Feature("three", int, 1)
        self.features = Features(self.one, self.two, self.three)

    async def test_names(self):
        names = self.features.names()
        for check in ["one", "two", "three"]:
            self.assertIn(check, names)
Example #18
0
class TestFeatures(AsyncTestCase):
    def setUp(self):
        self.one = OneFeatureTester()
        self.two = TwoFeatureTester()
        self.three = ThreeFeatureTester()
        self.features = Features(self.one, self.two, self.three)

    async def test_names(self):
        names = self.features.names()
        for check in ["one", "two", "three"]:
            self.assertIn(check, names)
Example #19
0
 async def test_monitor_progess(self):
     progress = ProgessFeatureTester()
     features = Features(progress)
     async with features:
         data = await features.submit('test')
         logs = await data.logs()
         results = await data.result()
         self.assertTrue(logs)
         self.assertIn('Hi', logs)
         self.assertIn(progress.NAME, results)
         self.assertEqual(len(results), 1)
         self.assertEqual(results[progress.NAME], True)
Example #20
0
 async def _add_memory_source(self):
     self.features = Features(DefFeature("by_ten", int, 1))
     async with MemorySource(
             MemorySourceConfig(repos=[
                 Repo(str(i), data={"features": {
                     "by_ten": i * 10
                 }}) for i in range(0, self.num_repos)
             ])) as source:
         self.source = self.cli.app["sources"][self.slabel] = source
         async with source() as sctx:
             self.sctx = self.cli.app["source_contexts"][self.slabel] = sctx
             yield
Example #21
0
 def model_dir_path(self, features: Features):
     '''
     Creates the path to the model dir by using the provided model dir and
     the sha256 hash of the concatenated feature names.
     '''
     if self.model_dir is None:
         return None
     model = hashlib.sha256(''.join(features.names()).encode('utf-8'))\
             .hexdigest()
     if not os.path.isdir(self.model_dir):
         raise NotADirectoryError('%s is not a directory' % (self.model_dir))
     return os.path.join(self.model_dir, model)
Example #22
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = SLR(SLRConfig(directory=cls.model_dir.name, predict="Y"))
     cls.feature = DefFeature("X", float, 1)
     cls.features = Features(cls.feature)
     X, Y = list(zip(*FEATURE_DATA))
     cls.repos = [
         Repo(str(i), data={"features": {"X": X[i], "Y": Y[i]}})
         for i in range(0, len(Y))
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(repos=cls.repos))
     )
Example #23
0
class TestFeatures(AsyncTestCase):
    def setUp(self):
        self.one = OneFeatureTester()
        self.two = TwoFeatureTester()
        self.three = ThreeFeatureTester()
        self.features = Features(self.one, self.two, self.three)

    async def test_names(self):
        async with self.features:
            names = self.features.names()
            for check in ["one", "two", "three"]:
                self.assertIn(check, names)

    async def test_applicable(self):
        async with self.features:
            applicable = await self.features.applicable("test")
            self.assertIn(self.one, applicable)
            self.assertIn(self.two, applicable)
            self.assertNotIn(self.three, applicable)

    async def test_evaluate(self):
        async with self.features:
            results = await self.features.evaluate("test")
            self.assertIn(self.one.NAME, results)
            self.assertIn(self.two.NAME, results)
            self.assertNotIn(self.three.NAME, results)
            self.assertEqual(results[self.one.NAME], False)
            self.assertEqual(results[self.two.NAME], True)

    async def test_one_applicable_other_not(self):
        twob = TwoBFeatureTester()
        features = Features(self.two, twob)
        async with features:
            results = await features.evaluate("test")
            self.assertIn(self.two.NAME, results)
            self.assertEqual(len(results), 1)
            self.assertEqual(results[self.two.NAME], True)

    async def test_monitor_progess(self):
        progress = ProgessFeatureTester()
        features = Features(progress)
        async with features:
            data = await features.submit("test")
            logs = await data.logs()
            results = await data.result()
            self.assertTrue(logs)
            self.assertIn("Hi", logs)
            self.assertIn(progress.NAME, results)
            self.assertEqual(len(results), 1)
            self.assertEqual(results[progress.NAME], True)
Example #24
0
 def model_dir_path(self, features: Features):
     """
     Creates the path to the model dir by using the provided model dir and
     the sha384 hash of the concatenated feature names.
     """
     if self.parent.config.directory is None:
         return None
     model = hashlib.sha384(
         "".join(features.names()).encode("utf-8")
     ).hexdigest()
     if not os.path.isdir(self.parent.config.directory):
         raise NotADirectoryError(
             "%s is not a directory" % (self.parent.config.directory)
         )
     return os.path.join(self.parent.config.directory, model)
Example #25
0
 def setUp(self):
     super().setUp()
     self.repo_keys = {'add 40 and 2': 42, 'multiply 42 and 10': 420}
     self.repos = list(map(Repo, self.repo_keys.keys()))
     self.sources = Sources(RepoSource(*self.repos))
     self.features = Features(DefFeature('string_calculator', int, 1))
     self.cli = OperationsAll(ops=OPERATIONS,
                              opimpn_memory_opimps=OPIMPS,
                              repo_def='calc_string',
                              output_specs=[(
                                  ['result'],
                                  'get_single_spec',
                              )],
                              remap=[('get_single', 'result',
                                      'string_calculator')],
                              sources=self.sources,
                              features=self.features)
Example #26
0
class TestFeatures(AsyncTestCase):
    def setUp(self):
        self.one = OneFeatureTester()
        self.two = TwoFeatureTester()
        self.three = ThreeFeatureTester()
        self.features = Features(self.one, self.two, self.three)

    async def test_names(self):
        async with self.features:
            names = self.features.names()
            for check in ["one", "two", "three"]:
                self.assertIn(check, names)

    async def test_applicable(self):
        async with self.features:
            applicable = await self.features.applicable("test")
            self.assertIn(self.one, applicable)
            self.assertIn(self.two, applicable)
            self.assertNotIn(self.three, applicable)

    async def test_evaluate(self):
        async with self.features:
            results = await self.features.evaluate("test")
            self.assertIn(self.one.NAME, results)
            self.assertIn(self.two.NAME, results)
            self.assertNotIn(self.three.NAME, results)
            self.assertEqual(results[self.one.NAME], False)
            self.assertEqual(results[self.two.NAME], True)

    async def test_one_applicable_other_not(self):
        twob = TwoBFeatureTester()
        features = Features(self.two, twob)
        async with features:
            results = await features.evaluate("test")
            self.assertIn(self.two.NAME, results)
            self.assertEqual(len(results), 1)
            self.assertEqual(results[self.two.NAME], True)
Example #27
0
    def setUpClass(cls):
        cls.features = Features()
        cls.features.append(Feature("A", str, 1))
        A, X = list(zip(*DATA))
        cls.records = [
            Record(str(i), data={"features": {
                "A": A[i],
                "X": X[i]
            }}) for i in range(len(X))
        ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records)))
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = HFClassificationModel(
            HFClassificationModelConfig(
                model_name_or_path="bert-base-cased",
                cache_dir=cls.model_dir.name,
                logging_dir=cls.model_dir.name,
                output_dir=cls.model_dir.name,
                features=cls.features,
                predict=Feature("X", int, 1),
                label_list=["0", "1"],
            ))
Example #28
0
    def setUpClass(cls):
        cls.is_multi = "MULTI_" in cls.MODEL_TYPE
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        if cls.MODEL_TYPE in classifier_types:
            A, B, C, D, E, F, G, H, X, Y = list(
                zip(*FEATURE_DATA_CLASSIFICATION)
            )
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            cls.features.append(Feature("E", float, 1))
            cls.features.append(Feature("F", float, 1))
            cls.features.append(Feature("G", float, 1))
            cls.features.append(Feature("H", float, 1))
            if cls.MODEL_TYPE == "CLASSIFICATION":
                cls.features.append(Feature("X", float, 1))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "E": E[i],
                            "F": F[i],
                            "G": G[i],
                            "H": H[i],
                            "X": X[i],
                            "Y": Y[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]

        elif cls.MODEL_TYPE in regressor_types:
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            if cls.MODEL_TYPE == "REGRESSION":
                cls.features.append(Feature("X", float, 1))
            A, B, C, D, X, Y = list(zip(*FEATURE_DATA_REGRESSION))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "X": X[i],
                            "Y": Y[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]
        elif cls.MODEL_TYPE == "CLUSTERING":
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            A, B, C, D, X = list(zip(*FEATURE_DATA_CLUSTERING))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "X": X[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records))
        )
        properties = {
            "location": cls.model_dir.name,
            "features": cls.features,
        }
        config_fields = dict()
        estimator_type = cls.MODEL.SCIKIT_MODEL._estimator_type
        if estimator_type in supervised_estimators:
            if cls.is_multi:
                config_fields["predict"] = Features(
                    Feature("X", float, 1), Feature("Y", float, 1)
                )
            else:
                config_fields["predict"] = Feature("X", float, 1)
        elif estimator_type in unsupervised_estimators:
            # TODO If cls.TRUE_CLSTR_PRESENT then we want to use the
            # mutual_info_score scikit accuracy scorer. In this case we might
            # want to change tcluster to a boolean config property.
            # For more info see commit e4f523976bf37d3457cda140ceab7899420ae2c7
            config_fields["predict"] = Feature("X", float, 1)
        cls.model = cls.MODEL(
            cls.MODEL_CONFIG(**{**properties, **config_fields})
        )
        cls.scorer = cls.SCORER()
Example #29
0
 def setUp(self):
     self.one = Feature("one", int, 1)
     self.two = Feature("two", float, 2)
     self.three = Feature("three", int, 1)
     self.features = Features(self.one, self.two, self.three)
Example #30
0
    def setUpClass(cls):
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        if cls.MODEL_TYPE is "CLASSIFICATION":
            cls.features.append(DefFeature("A", float, 1))
            cls.features.append(DefFeature("B", float, 1))
            cls.features.append(DefFeature("C", float, 1))
            cls.features.append(DefFeature("D", float, 1))
            cls.features.append(DefFeature("E", float, 1))
            cls.features.append(DefFeature("F", float, 1))
            cls.features.append(DefFeature("G", float, 1))
            cls.features.append(DefFeature("H", float, 1))
            cls.features.append(DefFeature("I", float, 1))
            A, B, C, D, E, F, G, H, I, X = list(
                zip(*FEATURE_DATA_CLASSIFICATION))
            cls.repos = [
                Repo(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "E": E[i],
                            "F": F[i],
                            "G": G[i],
                            "H": H[i],
                            "I": I[i],
                            "X": X[i],
                        }
                    },
                ) for i in range(0, len(A))
            ]
        elif cls.MODEL_TYPE is "REGRESSION":
            cls.features.append(DefFeature("A", float, 1))
            cls.features.append(DefFeature("B", float, 1))
            cls.features.append(DefFeature("C", float, 1))
            A, B, C, X = list(zip(*FEATURE_DATA_REGRESSION))
            cls.repos = [
                Repo(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "X": X[i],
                        }
                    },
                ) for i in range(0, len(A))
            ]
        elif cls.MODEL_TYPE is "CLUSTERING":
            cls.features.append(DefFeature("A", float, 1))
            cls.features.append(DefFeature("B", float, 1))
            cls.features.append(DefFeature("C", float, 1))
            cls.features.append(DefFeature("D", float, 1))
            A, B, C, D, X = list(zip(*FEATURE_DATA_CLUSTERING))
            cls.repos = [
                Repo(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "X": X[i],
                        }
                    },
                ) for i in range(0, len(A))
            ]

        cls.sources = Sources(MemorySource(
            MemorySourceConfig(repos=cls.repos)))
        properties = {
            "directory": cls.model_dir.name,
            "features": cls.features,
        }
        config_fields = dict()
        estimator_type = cls.MODEL.SCIKIT_MODEL._estimator_type
        if estimator_type in supervised_estimators:
            config_fields["predict"] = DefFeature("X", float, 1)
        elif estimator_type in unsupervised_estimators:
            if cls.TRUE_CLSTR_PRESENT:
                config_fields["tcluster"] = DefFeature("X", float, 1)
        cls.model = cls.MODEL(
            cls.MODEL_CONFIG(**{
                **properties,
                **config_fields
            }))