Beispiel #1
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.features = Features()
     if cls.MODEL_TYPE is "CLASSIFICATION":
         cls.features.append(DefFeature("A", float, 1))
         cls.features.append(DefFeature("B", float, 1))
         cls.features.append(DefFeature("C", float, 1))
         cls.features.append(DefFeature("D", float, 1))
         cls.features.append(DefFeature("E", float, 1))
         cls.features.append(DefFeature("F", float, 1))
         cls.features.append(DefFeature("G", float, 1))
         cls.features.append(DefFeature("H", float, 1))
         cls.features.append(DefFeature("I", float, 1))
         A, B, C, D, E, F, G, H, I, X = list(
             zip(*FEATURE_DATA_CLASSIFICATION))
         cls.repos = [
             Repo(
                 str(i),
                 data={
                     "features": {
                         "A": A[i],
                         "B": B[i],
                         "C": C[i],
                         "D": D[i],
                         "E": E[i],
                         "F": F[i],
                         "G": G[i],
                         "H": H[i],
                         "I": I[i],
                         "X": X[i],
                     }
                 },
             ) for i in range(0, len(A))
         ]
     elif cls.MODEL_TYPE is "REGRESSION":
         cls.features.append(DefFeature("A", float, 1))
         cls.features.append(DefFeature("B", float, 1))
         cls.features.append(DefFeature("C", float, 1))
         A, B, C, X = list(zip(*FEATURE_DATA_REGRESSION))
         cls.repos = [
             Repo(
                 str(i),
                 data={
                     "features": {
                         "A": A[i],
                         "B": B[i],
                         "C": C[i],
                         "X": X[i],
                     }
                 },
             ) for i in range(0, len(A))
         ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))
     cls.model = cls.MODEL(
         cls.MODEL_CONFIG(
             directory=cls.model_dir.name,
             predict="X",
             features=cls.features,
         ))
Beispiel #2
0
 async def update(self, repo: Repo):
     db = self.parent.db
     # Store feature data
     feature_cols = self.parent.FEATURE_COLS
     feature_data = OrderedDict.fromkeys(feature_cols)
     feature_data.update(repo.features(feature_cols))
     await db.execute(
         "INSERT OR REPLACE INTO features (src_url, "
         + ", ".join(feature_cols)
         + ") "
         "VALUES(?, " + ", ".join("?" * len(feature_cols)) + ")",
         [repo.src_url] + list(feature_data.values()),
     )
     # Store prediction
     prediction = repo.prediction()
     if prediction:
         prediction_cols = self.parent.PREDICTION_COLS
         prediction_data = OrderedDict.fromkeys(prediction_cols)
         prediction_data.update(prediction.dict())
         await db.execute(
             "INSERT OR REPLACE INTO prediction (src_url, "
             + ", ".join(prediction_cols)
             + ") "
             "VALUES(?, " + ", ".join("?" * len(prediction_cols)) + ")",
             [repo.src_url] + list(prediction_data.values()),
         )
     # Store classification
     if repo.classified():
         await db.execute(
             "INSERT OR REPLACE INTO classification "
             "(src_url, classification) VALUES(?, ?)",
             [repo.src_url, repo.classification()],
         )
Beispiel #3
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = Misc(ModelConfig(directory=cls.model_dir.name))
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.classifications = ['a', 'not a']
     cls.repos = [
         Repo('a' + str(random.random()),
              data={
                  'features': {
                      cls.feature.NAME: 1
                  },
                  'classification': 'a'
              }) for _ in range(0, 1000)
     ]
     cls.repos += [
         Repo('b' + str(random.random()),
              data={
                  'features': {
                      cls.feature.NAME: 0
                  },
                  'classification': 'not a'
              }) for _ in range(0, 1000)
     ]
     cls.sources = \
         Sources(MemorySource(MemorySourceConfig(repos=cls.repos)))
Beispiel #4
0
 async def test_update(self):
     key = "1"
     new_repo = Repo(key, data={"features": {"by_ten": 10}})
     async with self.post(f"/source/{self.slabel}/update/{key}",
                          json=new_repo.export()) as r:
         self.assertEqual(await r.json(), OK)
     self.assertEqual((await self.sctx.repo(key)).feature("by_ten"), 10)
Beispiel #5
0
    async def repo(self, key: str):
        query = self.parent.config.repo_query
        repo = Repo(key)
        db = self.conn
        await db.execute(query, (key, ))
        row = await db.fetchone()

        if row is not None:
            features = {}
            predictions = {}
            for key, value in row.items():
                if key.startswith("feature_"):
                    features[key.replace("feature_", "")] = value
                elif "_value" in key:
                    target = key.replace("_value", "")
                    predictions[target] = {
                        "value": row[target + "_value"],
                        "confidence": row[target + "_confidence"],
                    }
            repo.merge(
                Repo(
                    row["key"],
                    data={
                        "features": features,
                        "prediction": predictions
                    },
                ))
        return repo
Beispiel #6
0
 def setUpClass(cls):
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = MiscModel(
         MiscModelConfig(
             directory=cls.model_dir.name,
             classifications=["not a", "a"],
             features=cls.features,
         ))
     cls.repos = [
         Repo(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.repos += [
         Repo(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))
Beispiel #7
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = DNNClassifierModel(
         DNNClassifierModelConfig(directory=cls.model_dir.name,
                                  steps=1000,
                                  epochs=30,
                                  hidden=[10, 20, 10],
                                  classification="string",
                                  classifications=["a", "not a"],
                                  clstype=str))
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.repos = [
         Repo(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.repos += [
         Repo(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))
Beispiel #8
0
 async def update(self, repo: Repo):
     db = self.parent.db
     # Store feature data
     feature_cols = self.parent.FEATURE_COLS
     feature_data = OrderedDict.fromkeys(feature_cols)
     feature_data.update(repo.features(feature_cols))
     await db.execute(
         "INSERT OR REPLACE INTO features (key, " +
         ", ".join(feature_cols) + ") "
         "VALUES(?, " + ", ".join("?" * len(feature_cols)) + ")",
         [repo.key] + list(feature_data.values()),
     )
     # Store prediction
     try:
         prediction = repo.prediction("target_name")
         prediction_cols = self.parent.PREDICTION_COLS
         prediction_data = OrderedDict.fromkeys(prediction_cols)
         prediction_data.update(prediction.dict())
         await db.execute(
             "INSERT OR REPLACE INTO prediction (key, " +
             ", ".join(prediction_cols) + ") "
             "VALUES(?, " + ", ".join("?" * len(prediction_cols)) + ")",
             [repo.key] + list(prediction_data.values()),
         )
     except KeyError:
         pass
Beispiel #9
0
 async def test_label(self):
     with tempfile.TemporaryDirectory() as testdir:
         self.testfile = os.path.join(testdir, str(random.random()))
         unlabeled = await self.setUpSource()
         labeled = await self.setUpSource()
         labeled.config = labeled.config._replace(label="somelabel")
         async with unlabeled, labeled:
             async with unlabeled() as uctx, labeled() as lctx:
                 await uctx.update(
                     Repo("0", data={"features": {"feed": 1}})
                 )
                 await lctx.update(
                     Repo("0", data={"features": {"face": 2}})
                 )
             # async with unlabeled, labeled:
             async with unlabeled() as uctx, labeled() as lctx:
                 repo = await uctx.repo("0")
                 self.assertIn("feed", repo.features())
                 repo = await lctx.repo("0")
                 self.assertIn("face", repo.features())
         with open(self.testfile, "r") as fd:
             dict_reader = csv.DictReader(fd, dialect="strip")
             rows = {
                 row["label"]: {row["src_url"]: row} for row in dict_reader
             }
             self.assertIn("unlabeled", rows)
             self.assertIn("somelabel", rows)
             self.assertIn("0", rows["unlabeled"])
             self.assertIn("0", rows["somelabel"])
             self.assertIn("feed", rows["unlabeled"]["0"])
             self.assertIn("face", rows["somelabel"]["0"])
             self.assertEqual("1", rows["unlabeled"]["0"]["feed"])
             self.assertEqual("2", rows["somelabel"]["0"]["face"])
Beispiel #10
0
 def setUp(self):
     self.null = Repo("null")
     self.full = Repo(
         "full",
         data=dict(
             features=dict(dead="beef"),
             extra=dict(extra="read all about it"),
         ),
         extra=dict(half=True),
     )
Beispiel #11
0
 async def test_02_predict(self):
     a = Repo("a", data={"features": {self.feature.NAME: 1}})
     b = Repo("not a", data={"features": {self.feature.NAME: 0}})
     async with Sources(MemorySource(MemorySourceConfig(
             repos=[a, b]))) as sources, self.model as model:
         async with sources() as sctx, model() as mctx:
             num = 0
             async for repo, prediction, confidence in mctx.predict(
                     sctx.repos()):
                 with self.subTest(repo=repo):
                     self.assertEqual(prediction, repo.key)
                 num += 1
             self.assertEqual(num, 2)
Beispiel #12
0
 async def repo(self, src_url: str):
     db = self.parent.db
     repo = Repo(src_url)
     # Get features
     features = await db.execute(
         "SELECT " + ", ".join(self.parent.FEATURE_COLS) + " "
         "FROM features WHERE src_url=?",
         (repo.src_url,),
     )
     features = await features.fetchone()
     if features is not None:
         repo.evaluated(features)
     # Get prediction
     prediction = await db.execute(
         "SELECT * FROM prediction WHERE " "src_url=?", (repo.src_url,)
     )
     prediction = await prediction.fetchone()
     if prediction is not None:
         repo.predicted(
             prediction["classification"], prediction["confidence"]
         )
     # Get classification
     classification = await db.execute(
         "SELECT * FROM classification WHERE " "src_url=?", (repo.src_url,)
     )
     classification = await classification.fetchone()
     if classification is not None:
         repo.classify(classification["classification"])
     return repo
    async def model_predict(self, request, mctx):
        # TODO Provide an iterkey method for model prediction
        chunk_size = int(request.match_info["chunk_size"])
        if chunk_size != 0:
            return web.json_response(
                {"error": "Multiple request iteration not yet supported"},
                status=HTTPStatus.BAD_REQUEST,
            )
        # Get the repos
        repos: Dict[str, Repo] = {
            src_url: Repo(src_url, data=repo_data)
            for src_url, repo_data in (await request.json()).items()
        }
        # Create an async generator to feed repos
        async def repo_gen():
            for repo in repos.values():
                yield repo

        # Feed them through prediction
        return web.json_response(
            {
                "iterkey": None,
                "repos": {
                    repo.src_url: repo.export()
                    async for repo in mctx.predict(repo_gen())
                },
            }
        )
Beispiel #14
0
 async def setUp(self):
     await super().setUp()
     self.repos = [Repo(str(random.random())) for _ in range(0, 10)]
     self.temp_filename = self.mktempfile()
     self.sconfig = FileSourceConfig(filename=self.temp_filename)
     async with JSONSource(self.sconfig) as source:
         async with source() as sctx:
             for repo in self.repos:
                 await sctx.update(repo)
     contents = json.loads(Path(self.sconfig.filename).read_text())
     # Ensure there are repos in the file
     self.assertEqual(
         len(contents.get(self.sconfig.label)),
         len(self.repos),
         "ReposTestCase JSON file erroneously initialized as empty",
     )
     # TODO(p3) For some reason patching Model.load doesn't work
     # self._stack.enter_context(patch("dffml.model.model.Model.load",
     #     new=model_load))
     self._stack.enter_context(
         patch.object(
             ModelCMD,
             "arg_model",
             new=ModelCMD.arg_model.modify(type=model_load),
         ))
     self._stack.enter_context(
         patch("dffml.feature.feature.Feature.load", new=feature_load))
     self._stack.enter_context(
         patch("dffml.df.base.OperationImplementation.load",
               new=opimp_load))
     self._stack.enter_context(
         patch("dffml.df.types.Operation.load", new=op_load))
Beispiel #15
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature1 = Feature_1()
     cls.feature2 = Feature_2()
     cls.features = Features(cls.feature1, cls.feature2)
     cls.model = DNNRegressionModel(
         DNNRegressionModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=DefFeature("TARGET", float, 1),
             features=cls.features,
         ))
     # Generating data f(x1,x2) = 2*x1 + 3*x2
     _n_data = 2000
     _temp_data = np.random.rand(2, _n_data)
     cls.repos = [
         Repo(
             "x" + str(random.random()),
             data={
                 "features": {
                     cls.feature1.NAME: float(_temp_data[0][i]),
                     cls.feature2.NAME: float(_temp_data[1][i]),
                     "TARGET": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                 }
             },
         ) for i in range(0, _n_data)
     ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))
Beispiel #16
0
 def setUpClass(cls):
     cls.features = Features()
     cls.features.append(DefFeature("A", str, 1))
     A, X = list(zip(*DATA))
     cls.repos = [
         Repo(
             str(i),
             data={"features": {
                 "A": A[i],
                 "X": X[i],
             }},
         ) for i in range(0, len(X))
     ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = TextClassificationModel(
         TextClassifierConfig(
             directory=cls.model_dir.name,
             classifications=[0, 1],
             features=cls.features,
             predict=DefFeature("X", int, 1),
             add_layers=True,
             layers=[
                 "Dense(units = 120, activation='relu')",
                 "Dense(units = 64, activation=relu)",
                 "Dense(units = 2, activation='softmax')",
             ],
             model_path=
             "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1",
             epochs=30,
         ))
Beispiel #17
0
 async def test_02_predict(self):
     test_feature_val = [
         0,
         1.5,
         2,
     ]  # inserting zero so that its 1-indexable
     test_target = 2 * test_feature_val[1] + 3 * test_feature_val[2]
     # should be same function used in TestDNN.setupclass
     a = Repo(
         "a",
         data={
             "features": {
                 self.feature1.NAME: test_feature_val[1],
                 self.feature2.NAME: test_feature_val[2],
             }
         },
     )
     async with Sources(MemorySource(MemorySourceConfig(
             repos=[a]))) as sources, self.model as model:
         target_name = model.config.predict.NAME
         async with sources() as sctx, model() as mctx:
             res = [repo async for repo in mctx.predict(sctx.repos())]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         test_error_norm = abs(
             (test_target - res[0].prediction(target_name).value) /
             test_target + 1e-6)
         error_threshold = 0.3
         self.assertLess(test_error_norm, error_threshold)
Beispiel #18
0
 async def test_predict(self):
     repos: Dict[str, Repo] = {
         repo.key: repo.export() async for repo in self.sctx.repos()
     }
     async with self.post(
         f"/model/{self.mlabel}/predict/0", json=repos
     ) as r:
         i: int = 0
         response = await r.json()
         for key, repo_data in response["repos"].items():
             repo = Repo(key, data=repo_data)
             self.assertEqual(int(repo.key), i)
             self.assertEqual(
                 repo.feature("by_ten"), repo.prediction().value / 10
             )
             self.assertEqual(float(repo.key), repo.prediction().confidence)
             i += 1
         self.assertEqual(i, self.num_repos)
Beispiel #19
0
 async def test_02_predict(self):
     a = Repo("a", data={"features": {self.feature.NAME: 1}})
     async with Sources(MemorySource(MemorySourceConfig(
             repos=[a]))) as sources, self.model as model:
         async with sources() as sctx, model() as mctx:
             res = [repo async for repo in mctx.predict(sctx.repos())]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0].src_url, a.src_url)
         self.assertTrue(res[0].prediction().value)
Beispiel #20
0
    async def test_update(self):
        full_src_url = '0'
        empty_src_url = '1'
        full_repo = Repo(full_src_url, data= {
            "classification": "1",
            "features": {
                "PetalLength": 3.9,
                "PetalWidth": 1.2,
                "SepalLength": 5.8,
                "SepalWidth": 2.7,
            },
            "prediction": {
                "classification": "feedface",
                "confidence": 0.42
            },
        })
        empty_repo = Repo(empty_src_url, data= {
            "classification": "1",
            "features": {
                "PetalLength": 3.9,
                "PetalWidth": 1.2,
                "SepalLength": 5.8,
                "SepalWidth": 2.7,
            },
        })

        with tempfile.NamedTemporaryFile() as csvfile:
            csvSource = CSVSource(csvfile.name)
            # Open, update, and close
            async with csvSource as source:
                await source.update(full_repo)
                await source.update(empty_repo)
            # Open and confirm we saved and loaded correctly
            async with csvSource as source:
                with self.subTest(src_url=full_src_url):
                    repo = await source.repo(full_src_url)
                    self.assertEqual(repo.data.prediction.classification,
                                     "feedface")
                    self.assertEqual(repo.data.prediction.confidence, 0.42)
                with self.subTest(src_url=empty_src_url):
                    repo = await source.repo(empty_src_url)
                    self.assertFalse(repo.data.prediction.classification)
                    self.assertFalse(repo.data.prediction.confidence)
Beispiel #21
0
 async def setUp(self):
     super().setUp()
     self.repos = [Repo(str(random.random())) for _ in range(0, 10)]
     self.__temp_filename = non_existant_tempfile()
     self.temp_filename = self.__temp_filename.__enter__()
     self.sconfig = FileSourceConfig(filename=self.temp_filename)
     async with JSONSource(self.sconfig) as source:
         async with source() as sctx:
             for repo in self.repos:
                 await sctx.update(repo)
Beispiel #22
0
 async def _add_memory_source(self):
     async with MemorySource(
             MemorySourceConfig(repos=[
                 Repo(str(i), data={"features": {
                     "by_ten": i * 10
                 }}) for i in range(0, self.num_repos)
             ])) as source:
         self.source = self.cli.app["sources"][self.slabel] = source
         async with source() as sctx:
             self.sctx = self.cli.app["source_contexts"][self.slabel] = sctx
             yield
Beispiel #23
0
 async def test_02_predict(self):
     a = Repo('a', data={'features': {self.feature.NAME: 1}})
     sources = Sources(RepoSource(a))
     async with sources as sources, self.features as features:
         res = [
             repo async for repo in self.model.predict(
                 sources.repos(), features, self.classifications)
         ]
         self.assertEqual(len(res), 1)
         self.assertEqual(res[0][0].src_url, a.src_url)
         self.assertTrue(res[0][1])
Beispiel #24
0
 async def test_02_predict(self):
     a = Repo('a', data={'features': {self.feature.NAME: 1}})
     async with Sources(MemorySource(MemorySourceConfig(repos=[a]))) \
             as sources, self.features as features, self.model as model:
         async with sources() as sctx, model() as mctx:
             res = [
                 repo async for repo in mctx.predict(
                     sctx.repos(), features, self.classifications)
             ]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0][0].src_url, a.src_url)
         self.assertTrue(res[0][1])
Beispiel #25
0
 async def update(self, repo: Repo):
     db = self.conn
     # Just dump it (if you want a setup the queries easily, then you need to
     # massage the columns in this table to your liking, and perhaps add more
     # tables.
     marshall = json.dumps(repo.dict())
     await db.execute(
         "INSERT INTO ml_data (src_url, json) VALUES(%s, %s) "
         "ON DUPLICATE KEY UPDATE json = %s",
         (repo.src_url, marshall, marshall),
     )
     self.logger.debug("updated: %s", marshall)
     self.logger.debug("update: %s", await self.repo(repo.src_url))
Beispiel #26
0
 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = SLR(SLRConfig(directory=cls.model_dir.name, predict="Y"))
     cls.feature = DefFeature("X", float, 1)
     cls.features = Features(cls.feature)
     X, Y = list(zip(*FEATURE_DATA))
     cls.repos = [
         Repo(str(i), data={"features": {"X": X[i], "Y": Y[i]}})
         for i in range(0, len(Y))
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(repos=cls.repos))
     )
Beispiel #27
0
 async def repo(self, key: str):
     query = self.parent.config.repo_query
     repo = Repo(key)
     db = self.conn
     await db.execute(query, (key, ))
     row = await db.fetchone()
     if row is not None:
         repo.merge(
             Repo(
                 row["key"],
                 data={
                     "features": {
                         key.replace("feature_", ""): value
                         for key, value in row.items()
                         if key.startswith("feature_")
                     },
                     "prediction": {
                         key.replace("prediction_", ""): value
                         for key, value in row.items()
                         if key.startswith("prediction_")
                     },
                 },
             ))
     return repo
Beispiel #28
0
 def convert_to_repo(self, result):
     modified_repo = {"key": "", "data": {"features": {}, "prediction": {}}}
     for key, value in result.items():
         if key.startswith("feature_"):
             modified_repo["data"]["features"][key.replace("feature_",
                                                           "")] = value
         elif ("_value" in key) or ("_confidence" in key):
             target = key.replace("_value", "").replace("_confidence", "")
             modified_repo["data"]["prediction"][target] = {
                 "value": result[target + "_value"],
                 "confidence": result[target + "_confidence"],
             }
         else:
             modified_repo[key] = value
     return Repo(modified_repo["key"], data=modified_repo["data"])
Beispiel #29
0
 async def repo(self, key: str):
     repo = Repo(key)
     db = self.conn
     # Get features
     await db.execute("SELECT json FROM ml_data WHERE key=%s", (key, ))
     dump = await db.fetchone()
     if dump is not None and dump[0] is not None:
         repo.merge(Repo(key, data=json.loads(dump[0])))
     await db.execute("SELECT maintained FROM `status` WHERE key=%s",
                      (key, ))
     maintained = await db.fetchone()
     if maintained is not None and maintained[0] is not None:
         repo.evaluated({"maintained": str(maintained[0])})
     return repo
Beispiel #30
0
 async def repo(self, src_url: str):
     repo = Repo(src_url)
     db = self.conn
     # Get features
     await db.execute("SELECT json FROM ml_data WHERE src_url=%s",
                      (src_url, ))
     dump = await db.fetchone()
     if dump is not None and dump[0] is not None:
         repo.merge(Repo(src_url, data=json.loads(dump[0])))
     await db.execute("SELECT maintained FROM `status` WHERE src_url=%s",
                      (src_url, ))
     classification = await db.fetchone()
     if classification is not None and classification[0] is not None:
         repo.classify(str(classification[0]))
     return repo