Example #1
0
 async def model_predict(self, request, mctx):
     # TODO Provide an iterkey method for model prediction
     chunk_size = int(request.match_info["chunk_size"])
     if chunk_size != 0:
         return web.json_response(
             {"error": "Multiple request iteration not yet supported"},
             status=HTTPStatus.BAD_REQUEST,
         )
     # Get the records
     records: Dict[str, Record] = {}
     # Create a source with will provide the records
     async with Sources(
         MemorySource(records=[
             Record(key, data=record_data)
             for key, record_data in (await request.json()).items()
         ])) as source:
         async with source() as sctx:
             # Feed them through prediction
             return web.json_response({
                 "iterkey": None,
                 "records": {
                     record.key: record.export()
                     async for record in mctx.predict(sctx)
                 },
             })
Example #2
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     # Load saved regression line
     regression_line = self.storage.get("regression_line", None)
     # Ensure the model has been trained before we try to make a prediction
     if regression_line is None:
         raise ModelNotTrained("Train model before assessing for accuracy")
     # Split regression line tuple into variables, ignore accuracy from
     # training data since we'll be re-calculating it for the test data
     m, b, _accuracy = regression_line
     # X and Y data
     x = []
     y = []
     # Go through all records that have the feature we're testing on and the
     # feature we want to predict.
     async for record in sources.with_features(
         [self.config.feature.name, self.config.predict.name]):
         x.append(record.feature(self.config.feature.name))
         y.append(record.feature(self.config.predict.name))
     # Use self.logger to report how many records are being used for testing
     self.logger.debug("Number of test records: %d", len(x))
     # Calculate the regression line for test data and accuracy of line
     regression_line = [m * x + b for x in x]
     accuracy = coeff_of_deter(y, regression_line)
     # Update the accuracy to be the accuracy when assessed on the test data
     self.storage["regression_line"] = m, b, accuracy
     return Accuracy(accuracy)
Example #3
0
 async def train(self, sources: Sources):
     async for record in sources.with_features(self.features +
                                               [self.config.predict.NAME]):
         feature_data = record.features(self.features +
                                        [self.config.predict.NAME])
         self.xData = np.append(self.xData, feature_data[self.features[0]])
         self.yData = np.append(self.yData,
                                feature_data[self.config.predict.NAME])
     self.separating_line = self.best_separating_line()
Example #4
0
class ServerConfig(TLSCMDConfig, MultiCommCMDConfig):
    port: int = field(
        "Port to bind to", default=8080,
    )
    addr: str = field(
        "Address to bind to", default="127.0.0.1",
    )
    upload_dir: str = field(
        "Directory to store uploaded files in", default=None,
    )
    static: str = field(
        "Directory to serve static content from", default=None,
    )
    js: bool = field(
        "Serve JavaScript API file at /api.js",
        default=False,
        action="store_true",
    )
    insecure: bool = field(
        "Start without TLS encryption", action="store_true", default=False,
    )
    cors_domains: List[str] = field(
        "Domains to allow CORS for (see keys in defaults dict for aiohttp_cors.setup)",
        default_factory=lambda: [],
    )
    allow_caching: bool = field(
        "Allow caching of HTTP responses", action="store_true", default=False,
    )
    models: Model = field(
        "Models configured on start",
        default_factory=lambda: AsyncContextManagerList(),
        action=list_action(AsyncContextManagerList),
        labeled=True,
    )
    sources: Sources = field(
        "Sources configured on start",
        default_factory=lambda: Sources(),
        action=list_action(Sources),
        labeled=True,
    )
    scorers: AccuracyScorer = field(
        "Scorers configured on start",
        default_factory=lambda: AsyncContextManagerList(),
        action=list_action(AsyncContextManagerList),
        labeled=True,
    )
    redirect: List[str] = field(
        "list of METHOD SOURCE_PATH DESTINATION_PATH pairs, number of elements must be divisible by 3",
        action=ParseRedirectsAction,
        default_factory=lambda: [],
    )
    portfile: pathlib.Path = field(
        "File to write bound port to when starting. Helpful when port 0 was requeseted to bind to any free port",
        default=None,
    )
Example #5
0
 async def train(self, sources: Sources) -> None:
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         feature_data = record.features(self.features +
                                        [self.parent.config.predict.name])
         df = self.pd.DataFrame(feature_data, index=[0])
         xdata = df.drop([self.parent.config.predict.name], 1)
         ydata = df[self.parent.config.predict.name]
         self.lm.compute(xdata, ydata)
     self.lm_trained = self.lm.finalize().model
     self.joblib.dump(self.lm_trained, self.path)
Example #6
0
 async def train(self, sources: Sources):
     all_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         all_data.append(record.features())
     df = pd.DataFrame(all_data)
     y_train = df[[self.parent.config.predict.name]]
     x_train = df.drop(columns=[self.parent.config.predict.name])
     self.model.fit(x_train, y_train)
     self.model.fit_ensemble(y_train,
                             ensemble_size=self.parent.config.ensemble_size)
     joblib.dump(self.model, self.path)
Example #7
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     if not self.model:
         raise ModelNotTrained("Train the model before assessing accuracy")
     test_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         test_data.append(record.features())
     df = pd.DataFrame(test_data)
     y_test = df[[self.parent.config.predict.name]]
     x_test = df.drop(columns=[self.parent.config.predict.name])
     predictions = await self.get_predictions(x_test)
     accuracy = await self.accuracy_score(y_test, predictions)
     return Accuracy(accuracy)
Example #8
0
 async def train(self, sources: Sources) -> None:
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         feature_data = record.features(self.features +
                                        [self.parent.config.predict.name])
         # NOTE Duplicate feature data due to regression in oneDAL
         # See https://github.com/intel/dffml/issues/801
         df = self.pd.DataFrame([feature_data] * 2, index=[0, 1])
         xdata = df.drop([self.parent.config.predict.name], 1)
         ydata = df[self.parent.config.predict.name]
         self.lm.compute(xdata, ydata)
     self.lm_trained = self.lm.finalize().model
     self.joblib.dump(self.lm_trained, self.path)
Example #9
0
 async def train(self, sources: Sources) -> None:
     # X and Y data
     x = []
     y = []
     # Go through all records that have the feature we're training on and the
     # feature we want to predict.
     async for record in sources.with_features(
         [self.config.feature.name, self.config.predict.name]):
         x.append(record.feature(self.config.feature.name))
         y.append(record.feature(self.config.predict.name))
     # Use self.logger to report how many records are being used for training
     self.logger.debug("Number of training records: %d", len(x))
     # Save m, b, and accuracy
     self.storage["regression_line"] = best_fit_line(x, y)
Example #10
0
 async def train(self, sources: Sources) -> None:
     # X and Y data
     x = []
     y = []
     # Go through all records that have the feature we're training on and the
     # feature we want to predict. Since our model only supports 1 feature,
     # the self.features list will only have one element at index 0.
     async for record in sources.with_features(self.features +
                                               [self.config.predict.NAME]):
         x.append(record.feature(self.features[0]))
         y.append(record.feature(self.config.predict.NAME))
     # Use self.logger to report how many records are being used for training
     self.logger.debug("Number of input records: %d", len(x))
     # Save m, b, and accuracy
     self.storage["regression_line"] = best_fit_line(x, y)
Example #11
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     if self.lm_trained is None:
         raise ModelNotTrained("Train model before assessing for accuracy.")
     feature_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         feature_data.append(
             record.features(self.features +
                             [self.parent.config.predict.name]))
     df = self.pd.DataFrame(feature_data)
     xdata = df.drop([self.parent.config.predict.name], 1)
     ydata = df[self.parent.config.predict.name]
     preds = self.ac_predictor.compute(xdata, self.lm_trained)
     # Calculate accuracy with an error margin of 0.1
     accuracy_val = sum(
         self.compare(list(map(abs, map(sub, ydata, preds.prediction))),
                      0.1)) / len(ydata)
     return Accuracy(accuracy_val)
Example #12
0
class ServerConfig(TLSCMDConfig, MultiCommCMDConfig):
    port: int = field(
        "Port to bind to",
        default=8080,
    )
    addr: str = field(
        "Address to bind to",
        default="127.0.0.1",
    )
    upload_dir: str = field(
        "Directory to store uploaded files in",
        default=None,
    )
    static: str = field(
        "Directory to serve static content from",
        default=None,
    )
    js: bool = field(
        "Serve JavaScript API file at /api.js",
        default=False,
        action="store_true",
    )
    insecure: bool = field(
        "Start without TLS encryption",
        action="store_true",
        default=False,
    )
    cors_domains: List[str] = field(
        "Domains to allow CORS for (see keys in defaults dict for aiohttp_cors.setup)",
        default_factory=lambda: [],
    )
    models: Model = field(
        "Models configured on start",
        default_factory=lambda: AsyncContextManagerList(),
        action=list_action(AsyncContextManagerList),
        labeled=True,
    )
    sources: Sources = field(
        "Sources configured on start",
        default_factory=lambda: Sources(),
        action=list_action(Sources),
        labeled=True,
    )
Example #13
0
 async def train(self, sources: Sources):
     async for record in sources.records():
         self.trained_on[record.key] = record
Example #14
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     accuracy: int = 0
     async for record in sources.records():
         accuracy += int(record.key)
     return Accuracy(accuracy)
Example #15
0
class Server(TLSCMD, MultiCommCMD, Routes):
    """
    HTTP server providing access to DFFML APIs
    """

    # Used for testing
    RUN_YIELD_START = False
    RUN_YIELD_FINISH = False
    INSECURE_NO_TLS = False

    arg_port = Arg("-port", help="Port to bind to", type=int, default=8080)
    arg_addr = Arg("-addr", help="Address to bind to", default="127.0.0.1")
    arg_upload_dir = Arg(
        "-upload-dir",
        help="Directory to store uploaded files in",
        default=None,
    )
    arg_static = Arg("-static",
                     help="Directory to serve static content from",
                     default=None)
    arg_js = Arg(
        "-js",
        help="Serve JavaScript API file at /api.js",
        default=False,
        action="store_true",
    )
    arg_insecure = Arg(
        "-insecure",
        help="Start without TLS encryption",
        action="store_true",
        default=False,
    )
    arg_cors_domains = Arg(
        "-cors-domains",
        help=
        "Domains to allow CORS for (see keys in defaults dict for aiohttp_cors.setup)",
        nargs="+",
        default=[],
    )
    arg_models = Arg(
        "-models",
        help="Models configured on start",
        nargs="+",
        default=AsyncContextManagerList(),
        type=Model.load_labeled,
        action=list_action(AsyncContextManagerList),
    )
    arg_sources = Arg(
        "-sources",
        help="Sources configured on start",
        nargs="+",
        default=Sources(),
        type=BaseSource.load_labeled,
        action=list_action(Sources),
    )

    async def start(self):
        if self.insecure:
            self.site = web.TCPSite(self.runner,
                                    host=self.addr,
                                    port=self.port)
        else:
            ssl_context = ssl.create_default_context(
                purpose=ssl.Purpose.SERVER_AUTH, cafile=self.cert)
            ssl_context.load_cert_chain(self.cert, self.key)
            self.site = web.TCPSite(
                self.runner,
                host=self.addr,
                port=self.port,
                ssl_context=ssl_context,
            )
        await self.site.start()
        self.port = self.site._server.sockets[0].getsockname()[1]
        self.logger.info(f"Serving on {self.addr}:{self.port}")

    async def run(self):
        """
        Binds to port and starts HTTP server
        """
        # Create dictionaries to hold configured sources and models
        await self.setup()
        await self.start()
        # Load
        if self.mc_config is not None:
            # Restore atomic after config is set, allow setting for now
            atomic = self.mc_atomic
            self.mc_atomic = False
            await self.register_directory(self.mc_config)
            self.mc_atomic = atomic
        try:
            # If we are testing then RUN_YIELD will be an asyncio.Event
            if self.RUN_YIELD_START is not False:
                await self.RUN_YIELD_START.put(self)
                await self.RUN_YIELD_FINISH.wait()
            else:  # pragma: no cov
                # Wait for ctrl-c
                while True:
                    await asyncio.sleep(60)
        finally:
            await self.app.cleanup()
            await self.site.stop()