Example #1
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     # Load saved regression line
     regression_line = self.storage.get("regression_line", None)
     # Ensure the model has been trained before we try to make a prediction
     if regression_line is None:
         raise ModelNotTrained("Train model before assessing for accuracy")
     # Split regression line tuple into variables, ignore accuracy from
     # training data since we'll be re-calculating it for the test data
     m, b, _accuracy = regression_line
     # X and Y data
     x = []
     y = []
     # Go through all records that have the feature we're testing on and the
     # feature we want to predict.
     async for record in sources.with_features(
         [self.config.feature.name, self.config.predict.name]):
         x.append(record.feature(self.config.feature.name))
         y.append(record.feature(self.config.predict.name))
     # Use self.logger to report how many records are being used for testing
     self.logger.debug("Number of test records: %d", len(x))
     # Calculate the regression line for test data and accuracy of line
     regression_line = [m * x + b for x in x]
     accuracy = coeff_of_deter(y, regression_line)
     # Update the accuracy to be the accuracy when assessed on the test data
     self.storage["regression_line"] = m, b, accuracy
     return Accuracy(accuracy)
Example #2
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     # Load saved regression line
     regression_line = self.storage.get("regression_line", None)
     # Ensure the model has been trained before we try to make a prediction
     if regression_line is None:
         raise ModelNotTrained("Train model before assessing for accuracy.")
     # Accuracy is the last element in regression_line, which is a list of
     # three values: m, b, and accuracy.
     return Accuracy(regression_line[2])
Example #3
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     if not self.model:
         raise ModelNotTrained("Train the model before assessing accuracy")
     test_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         test_data.append(record.features())
     df = pd.DataFrame(test_data)
     y_test = df[[self.parent.config.predict.name]]
     x_test = df.drop(columns=[self.parent.config.predict.name])
     predictions = await self.get_predictions(x_test)
     accuracy = await self.accuracy_score(y_test, predictions)
     return Accuracy(accuracy)
Example #4
0
    async def accuracy(self, sources: SourcesContext) -> Accuracy:
        if not os.path.isdir(os.path.join(self.parent.config.output_dir,
                                          "ner")):
            raise ModelNotTrained("Train model before assessing for accuracy.")

        test_examples = await self._preprocess_data(sources)
        self.nlp = spacy.load(self.parent.config.output_dir)

        scorer = Scorer()
        for input_, annot in test_examples:
            doc_gold_text = self.nlp.make_doc(input_)
            gold = GoldParse(doc_gold_text, entities=annot["entities"])
            pred_value = self.nlp(input_)
            scorer.score(pred_value, gold)
        return Accuracy(scorer.scores["tags_acc"])
Example #5
0
    async def accuracy(self, sources: SourcesContext) -> Accuracy:
        # Load saved anomalies
        anomalies = self.storage.get("anomalies", None)
        # Ensure the model has been trained before we try to make a prediction
        if anomalies is None:
            raise ModelNotTrained("Train model before assessing for accuracy.")

        epsilon, _F1val, mu, sigma2 = anomalies

        X = []
        Y = []
        # Go through all records that have the feature we're training on and the
        # feature we want to predict.
        async for record in sources.with_features(
            self.features + [self.parent.config.predict.name]):
            record_data = []
            for feature in record.features(self.features).values():
                record_data.extend(
                    [feature] if np.isscalar(feature) else feature)

            X.append(record_data)
            Y.append(record.feature(self.parent.config.predict.name))

        self.logger.debug("Number of test records: %d", len(X))

        # Number of features
        nof = len(self.features)

        X = np.reshape(X, (len(X), nof))

        Y = np.reshape(Y, (len(Y), 1))

        mu = np.array(mu)
        sigma2 = np.array(sigma2)
        p = multivariateGaussian(X, mu, sigma2)

        pred = (p < epsilon).astype(int)

        F1 = getF1(Y, pred)

        outliers = p < epsilon

        listOfOl = findIndices(outliers)

        accuracy = F1
        # Update the accuracy
        self.storage["anomalies"] = epsilon, F1, mu.tolist(), sigma2.tolist()
        return Accuracy(accuracy)
Example #6
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     if self.lm_trained is None:
         raise ModelNotTrained("Train model before assessing for accuracy.")
     feature_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         feature_data.append(
             record.features(self.features +
                             [self.parent.config.predict.name]))
     df = self.pd.DataFrame(feature_data)
     xdata = df.drop([self.parent.config.predict.name], 1)
     ydata = df[self.parent.config.predict.name]
     preds = self.ac_predictor.compute(xdata, self.lm_trained)
     # Calculate accuracy with an error margin of 0.1
     accuracy_val = sum(
         self.compare(list(map(abs, map(sub, ydata, preds.prediction))),
                      0.1)) / len(ydata)
     return Accuracy(accuracy_val)
Example #7
0
    async def accuracy(self, sources: SourcesContext) -> Accuracy:
        if not os.path.isdir(os.path.join(self.parent.config.directory,
                                          "ner")):
            raise ModelNotTrained("Train model before assessing for accuracy.")

        test_examples = await self._preprocess_data(sources)
        self.nlp = spacy.load(self.parent.config.directory)

        scorer = Scorer()
        examples = []
        for input_, annot in test_examples:
            pred_value = self.nlp(input_)
            example = Example.from_dict(pred_value,
                                        {"entities": annot["entities"]})
            example.reference = self.nlp.make_doc(input_)
            examples.append(example)
        scores = scorer.score(examples)
        return Accuracy(scores["token_acc"])
Example #8
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     # Ensure the model has been trained before we try to make a prediction
     if self.separating_line is None:
         raise ModelNotTrained("Train model before assessing for accuracy.")
     accuracy_value = self.separating_line[2]
     return Accuracy(accuracy_value)
Example #9
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     accuracy: int = 0
     async for record in sources.records():
         accuracy += int(record.key)
     return Accuracy(accuracy)