Beispiel #1
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     # Load saved regression line
     regression_line = self.storage.get("regression_line", None)
     # Ensure the model has been trained before we try to make a prediction
     if regression_line is None:
         raise ModelNotTrained("Train model before assessing for accuracy")
     # Split regression line tuple into variables, ignore accuracy from
     # training data since we'll be re-calculating it for the test data
     m, b, _accuracy = regression_line
     # X and Y data
     x = []
     y = []
     # Go through all records that have the feature we're testing on and the
     # feature we want to predict.
     async for record in sources.with_features(
         [self.config.feature.name, self.config.predict.name]):
         x.append(record.feature(self.config.feature.name))
         y.append(record.feature(self.config.predict.name))
     # Use self.logger to report how many records are being used for testing
     self.logger.debug("Number of test records: %d", len(x))
     # Calculate the regression line for test data and accuracy of line
     regression_line = [m * x + b for x in x]
     accuracy = coeff_of_deter(y, regression_line)
     # Update the accuracy to be the accuracy when assessed on the test data
     self.storage["regression_line"] = m, b, accuracy
     return Accuracy(accuracy)
Beispiel #2
0
 async def train(self, sources: Sources):
     async for record in sources.with_features(self.features +
                                               [self.config.predict.NAME]):
         feature_data = record.features(self.features +
                                        [self.config.predict.NAME])
         self.xData = np.append(self.xData, feature_data[self.features[0]])
         self.yData = np.append(self.yData,
                                feature_data[self.config.predict.NAME])
     self.separating_line = self.best_separating_line()
Beispiel #3
0
 async def train(self, sources: Sources) -> None:
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         feature_data = record.features(self.features +
                                        [self.parent.config.predict.name])
         df = self.pd.DataFrame(feature_data, index=[0])
         xdata = df.drop([self.parent.config.predict.name], 1)
         ydata = df[self.parent.config.predict.name]
         self.lm.compute(xdata, ydata)
     self.lm_trained = self.lm.finalize().model
     self.joblib.dump(self.lm_trained, self.path)
Beispiel #4
0
 async def train(self, sources: Sources):
     all_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         all_data.append(record.features())
     df = pd.DataFrame(all_data)
     y_train = df[[self.parent.config.predict.name]]
     x_train = df.drop(columns=[self.parent.config.predict.name])
     self.model.fit(x_train, y_train)
     self.model.fit_ensemble(y_train,
                             ensemble_size=self.parent.config.ensemble_size)
     joblib.dump(self.model, self.path)
Beispiel #5
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     if not self.model:
         raise ModelNotTrained("Train the model before assessing accuracy")
     test_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         test_data.append(record.features())
     df = pd.DataFrame(test_data)
     y_test = df[[self.parent.config.predict.name]]
     x_test = df.drop(columns=[self.parent.config.predict.name])
     predictions = await self.get_predictions(x_test)
     accuracy = await self.accuracy_score(y_test, predictions)
     return Accuracy(accuracy)
Beispiel #6
0
 async def train(self, sources: Sources) -> None:
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         feature_data = record.features(self.features +
                                        [self.parent.config.predict.name])
         # NOTE Duplicate feature data due to regression in oneDAL
         # See https://github.com/intel/dffml/issues/801
         df = self.pd.DataFrame([feature_data] * 2, index=[0, 1])
         xdata = df.drop([self.parent.config.predict.name], 1)
         ydata = df[self.parent.config.predict.name]
         self.lm.compute(xdata, ydata)
     self.lm_trained = self.lm.finalize().model
     self.joblib.dump(self.lm_trained, self.path)
Beispiel #7
0
 async def train(self, sources: Sources) -> None:
     # X and Y data
     x = []
     y = []
     # Go through all records that have the feature we're training on and the
     # feature we want to predict.
     async for record in sources.with_features(
         [self.config.feature.name, self.config.predict.name]):
         x.append(record.feature(self.config.feature.name))
         y.append(record.feature(self.config.predict.name))
     # Use self.logger to report how many records are being used for training
     self.logger.debug("Number of training records: %d", len(x))
     # Save m, b, and accuracy
     self.storage["regression_line"] = best_fit_line(x, y)
Beispiel #8
0
 async def train(self, sources: Sources) -> None:
     # X and Y data
     x = []
     y = []
     # Go through all records that have the feature we're training on and the
     # feature we want to predict. Since our model only supports 1 feature,
     # the self.features list will only have one element at index 0.
     async for record in sources.with_features(self.features +
                                               [self.config.predict.NAME]):
         x.append(record.feature(self.features[0]))
         y.append(record.feature(self.config.predict.NAME))
     # Use self.logger to report how many records are being used for training
     self.logger.debug("Number of input records: %d", len(x))
     # Save m, b, and accuracy
     self.storage["regression_line"] = best_fit_line(x, y)
Beispiel #9
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     if self.lm_trained is None:
         raise ModelNotTrained("Train model before assessing for accuracy.")
     feature_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         feature_data.append(
             record.features(self.features +
                             [self.parent.config.predict.name]))
     df = self.pd.DataFrame(feature_data)
     xdata = df.drop([self.parent.config.predict.name], 1)
     ydata = df[self.parent.config.predict.name]
     preds = self.ac_predictor.compute(xdata, self.lm_trained)
     # Calculate accuracy with an error margin of 0.1
     accuracy_val = sum(
         self.compare(list(map(abs, map(sub, ydata, preds.prediction))),
                      0.1)) / len(ydata)
     return Accuracy(accuracy_val)