コード例 #1
0
ファイル: anomalydetection.py プロジェクト: up1512001/dffml
    async def predict(self, sources: SourcesContext) -> AsyncIterator[Record]:

        # Load saved anomalies
        anomalies = self.storage.get("anomalies", None)
        # Ensure the model has been trained before we try to make a prediction
        if anomalies is None:
            raise ModelNotTrained("Train model before prediction")
        # Expand the anomalies into named variables
        epsilon, F1, mu, sigma2 = anomalies
        mu = np.array(mu)
        sigma2 = np.array(sigma2)
        # Grab records and input data (X data)
        input_data = await self.get_input_data(sources)
        # Make predictions
        X = []
        for record in input_data:
            record_data = []
            for feature in record.features(self.features).values():
                record_data.extend(
                    [feature] if np.isscalar(feature) else feature)
            X.append(record_data)
        p = multivariateGaussian(X, mu, sigma2)
        predictions = (p < epsilon).astype(int)
        for record, prediction in zip(input_data, predictions):
            record.predicted(self.config.predict.name, int(prediction),
                             float(F1))
            yield record
コード例 #2
0
ファイル: myslr.py プロジェクト: aburgool/dffml
 async def accuracy(self, sources: Sources) -> Accuracy:
     # Load saved regression line
     regression_line = self.storage.get("regression_line", None)
     # Ensure the model has been trained before we try to make a prediction
     if regression_line is None:
         raise ModelNotTrained("Train model before assessing for accuracy")
     # Split regression line tuple into variables, ignore accuracy from
     # training data since we'll be re-calculating it for the test data
     m, b, _accuracy = regression_line
     # X and Y data
     x = []
     y = []
     # Go through all records that have the feature we're testing on and the
     # feature we want to predict.
     async for record in sources.with_features(
         [self.config.feature.name, self.config.predict.name]):
         x.append(record.feature(self.config.feature.name))
         y.append(record.feature(self.config.predict.name))
     # Use self.logger to report how many records are being used for testing
     self.logger.debug("Number of test records: %d", len(x))
     # Calculate the regression line for test data and accuracy of line
     regression_line = [m * x + b for x in x]
     accuracy = coeff_of_deter(y, regression_line)
     # Update the accuracy to be the accuracy when assessed on the test data
     self.storage["regression_line"] = m, b, accuracy
     return Accuracy(accuracy)
コード例 #3
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     # Load saved regression line
     regression_line = self.storage.get("regression_line", None)
     # Ensure the model has been trained before we try to make a prediction
     if regression_line is None:
         raise ModelNotTrained("Train model before assessing for accuracy.")
     # Accuracy is the last element in regression_line, which is a list of
     # three values: m, b, and accuracy.
     return Accuracy(regression_line[2])
コード例 #4
0
ファイル: ner_model.py プロジェクト: mHash1m/dffml
    async def predict(
            self, sources: SourcesContext
    ) -> AsyncIterator[Tuple[Record, Any, float]]:
        if not self.is_trained:
            raise ModelNotTrained("Train model before prediction.")

        async for record in sources.records():
            doc = self.parent.nlp(record.feature("sentence"))
            prediction = [(ent.text, ent.label_) for ent in doc.ents]
            record.predicted("Tag", prediction, "Nan")
            yield record
コード例 #5
0
 async def predict(self, sources: SourcesContext) -> AsyncIterator[Record]:
     if not self.model:
         raise ModelNotTrained(
             "Train the model first before getting preictions")
     test_records = await self.get_test_records(sources)
     x_test = pd.DataFrame([record.features() for record in test_records])
     predictions = await self.get_predictions(x_test)
     probability = await self.get_probabilities(x_test)
     target = self.parent.config.predict.name
     for record, predict, prob in zip(test_records, predictions,
                                      probability):
         record.predicted(target, predict, max(prob))
         yield record
コード例 #6
0
ファイル: ner_model.py プロジェクト: oliverob/dffml
    async def predict(
            self, sources: SourcesContext
    ) -> AsyncIterator[Tuple[Record, Any, float]]:
        if not os.path.isdir(os.path.join(self.parent.config.output_dir,
                                          "ner")):
            raise ModelNotTrained("Train model before prediction.")
        self.nlp = spacy.load(self.parent.config.output_dir)

        async for record in sources.records():
            doc = self.nlp(record.feature("sentence"))
            prediction = [(ent.text, ent.label_) for ent in doc.ents]
            record.predicted("Tag", prediction, "Nan")
            yield record
コード例 #7
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     if not self.model:
         raise ModelNotTrained("Train the model before assessing accuracy")
     test_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         test_data.append(record.features())
     df = pd.DataFrame(test_data)
     y_test = df[[self.parent.config.predict.name]]
     x_test = df.drop(columns=[self.parent.config.predict.name])
     predictions = await self.get_predictions(x_test)
     accuracy = await self.accuracy_score(y_test, predictions)
     return Accuracy(accuracy)
コード例 #8
0
ファイル: daal4pylr.py プロジェクト: aburgool/dffml
 async def predict(
     self, records: AsyncIterator[Record]
 ) -> AsyncIterator[Tuple[Record, Any, float]]:
     # Iterate through each record that needs a prediction
     if self.lm_trained is None:
         raise ModelNotTrained("Train model before prediction.")
     async for record in records:
         feature_data = record.features(self.features)
         predict = self.pd.DataFrame(feature_data, index=[0])
         preds = self.lm_predictor.compute(predict, self.lm_trained)
         target = self.parent.config.predict.name
         record.predicted(target, preds.prediction, float("nan"))
         # Yield the record to the caller
         yield record
コード例 #9
0
ファイル: ner_model.py プロジェクト: oliverob/dffml
    async def accuracy(self, sources: SourcesContext) -> Accuracy:
        if not os.path.isdir(os.path.join(self.parent.config.output_dir,
                                          "ner")):
            raise ModelNotTrained("Train model before assessing for accuracy.")

        test_examples = await self._preprocess_data(sources)
        self.nlp = spacy.load(self.parent.config.output_dir)

        scorer = Scorer()
        for input_, annot in test_examples:
            doc_gold_text = self.nlp.make_doc(input_)
            gold = GoldParse(doc_gold_text, entities=annot["entities"])
            pred_value = self.nlp(input_)
            scorer.score(pred_value, gold)
        return Accuracy(scorer.scores["tags_acc"])
コード例 #10
0
 async def predict(
     self, records: AsyncIterator[Record]
 ) -> AsyncIterator[Tuple[Record, Any, float]]:
     # Ensure the model has been trained before we try to make a prediction
     if self.separating_line is None:
         raise ModelNotTrained("Train model before prediction.")
     target = self.config.predict.NAME
     async for record in records:
         feature_data = record.features(self.features)
         record.predicted(
             target,
             self.predict_input(feature_data[self.features[0]]),
             self.separating_line[2],
         )
         yield record
コード例 #11
0
ファイル: anomalydetection.py プロジェクト: up1512001/dffml
    async def accuracy(self, sources: SourcesContext) -> Accuracy:
        # Load saved anomalies
        anomalies = self.storage.get("anomalies", None)
        # Ensure the model has been trained before we try to make a prediction
        if anomalies is None:
            raise ModelNotTrained("Train model before assessing for accuracy.")

        epsilon, _F1val, mu, sigma2 = anomalies

        X = []
        Y = []
        # Go through all records that have the feature we're training on and the
        # feature we want to predict.
        async for record in sources.with_features(
            self.features + [self.parent.config.predict.name]):
            record_data = []
            for feature in record.features(self.features).values():
                record_data.extend(
                    [feature] if np.isscalar(feature) else feature)

            X.append(record_data)
            Y.append(record.feature(self.parent.config.predict.name))

        self.logger.debug("Number of test records: %d", len(X))

        # Number of features
        nof = len(self.features)

        X = np.reshape(X, (len(X), nof))

        Y = np.reshape(Y, (len(Y), 1))

        mu = np.array(mu)
        sigma2 = np.array(sigma2)
        p = multivariateGaussian(X, mu, sigma2)

        pred = (p < epsilon).astype(int)

        F1 = getF1(Y, pred)

        outliers = p < epsilon

        listOfOl = findIndices(outliers)

        accuracy = F1
        # Update the accuracy
        self.storage["anomalies"] = epsilon, F1, mu.tolist(), sigma2.tolist()
        return Accuracy(accuracy)
コード例 #12
0
 async def predict(
         self, sources: SourcesContext
 ) -> AsyncIterator[Tuple[Record, Any, float]]:
     # Ensure the model has been trained before we try to make a prediction
     if self.separating_line is None:
         raise ModelNotTrained("Train model before prediction.")
     target = self.config.predict.name
     async for record in sources.with_features(
             self.parent.config.features.names()):
         feature_data = record.features(self.features)
         record.predicted(
             target,
             self.predict_input(feature_data[self.features[0]]),
             self.separating_line[2],
         )
         yield record
コード例 #13
0
    async def accuracy(self, sources: SourcesContext) -> Accuracy:
        if not os.path.isdir(os.path.join(self.parent.config.directory,
                                          "ner")):
            raise ModelNotTrained("Train model before assessing for accuracy.")

        test_examples = await self._preprocess_data(sources)
        self.nlp = spacy.load(self.parent.config.directory)

        scorer = Scorer()
        examples = []
        for input_, annot in test_examples:
            pred_value = self.nlp(input_)
            example = Example.from_dict(pred_value,
                                        {"entities": annot["entities"]})
            example.reference = self.nlp.make_doc(input_)
            examples.append(example)
        scores = scorer.score(examples)
        return Accuracy(scores["token_acc"])
コード例 #14
0
ファイル: daal4pylr.py プロジェクト: aburgool/dffml
 async def accuracy(self, sources: Sources) -> Accuracy:
     if self.lm_trained is None:
         raise ModelNotTrained("Train model before assessing for accuracy.")
     feature_data = []
     async for record in sources.with_features(
             self.features + [self.parent.config.predict.name]):
         feature_data.append(
             record.features(self.features +
                             [self.parent.config.predict.name]))
     df = self.pd.DataFrame(feature_data)
     xdata = df.drop([self.parent.config.predict.name], 1)
     ydata = df[self.parent.config.predict.name]
     preds = self.ac_predictor.compute(xdata, self.lm_trained)
     # Calculate accuracy with an error margin of 0.1
     accuracy_val = sum(
         self.compare(list(map(abs, map(sub, ydata, preds.prediction))),
                      0.1)) / len(ydata)
     return Accuracy(accuracy_val)
コード例 #15
0
ファイル: myslr.py プロジェクト: oliverob/dffml
 async def predict(self, sources: SourcesContext) -> AsyncIterator[Record]:
     # Load saved regression line
     regression_line = self.storage.get("regression_line", None)
     # Ensure the model has been trained before we try to make a prediction
     if regression_line is None:
         raise ModelNotTrained("Train model before prediction")
     # Expand the regression_line into named variables
     m, b, accuracy = regression_line
     # Iterate through each record that needs a prediction
     async for record in sources.with_features([self.config.feature.name]):
         # Grab the x data from the record
         x = record.feature(self.config.feature.name)
         # Calculate y
         y = m * x + b
         # Set the calculated value with the estimated accuracy
         record.predicted(self.config.predict.name, y, accuracy)
         # Yield the record to the caller
         yield record
コード例 #16
0
    async def score(self, mctx: ModelContext, sources: SourcesContext,
                    *features: Features):
        if not mctx.is_trained:
            raise ModelNotTrained("Train model before assessing for accuracy.")

        test_examples = await mctx._preprocess_data(sources)
        mctx.nlp = spacy.load(mctx.parent.model_path)

        scorer = Scorer()
        examples = []
        for input_, annot in test_examples:
            pred_value = mctx.nlp(input_)
            example = Example.from_dict(pred_value,
                                        {"entities": annot["entities"]})
            example.reference = mctx.nlp.make_doc(input_)
            examples.append(example)
        scores = scorer.score(examples)
        return scores["token_acc"]
コード例 #17
0
ファイル: daal4pylr.py プロジェクト: mrinath123/dffml
 async def predict(
         self, sources: SourcesContext
 ) -> AsyncIterator[Tuple[Record, Any, float]]:
     # Iterate through each record that needs a prediction
     if self.lm_trained is None:
         raise ModelNotTrained("Train model before prediction.")
     async for record in sources.with_features(
             self.parent.config.features.names()):
         feature_data = record.features(self.features)
         predict = self.pd.DataFrame(feature_data, index=[0])
         preds = self.lm_predictor.compute(predict, self.lm_trained)
         target = self.parent.config.predict.name
         if preds.prediction.size == 1:
             prediction = preds.prediction.flat[0]
         else:
             prediction = preds.prediction
         record.predicted(target, prediction, float("nan"))
         # Yield the record to the caller
         yield record
コード例 #18
0
 async def predict(
     self, records: AsyncIterator[Record]
 ) -> AsyncIterator[Tuple[Record, Any, float]]:
     # Load saved regression line
     regression_line = self.storage.get("regression_line", None)
     # Ensure the model has been trained before we try to make a prediction
     if regression_line is None:
         raise ModelNotTrained("Train model before prediction.")
     # Expand the regression_line into named variables
     m, b, accuracy = regression_line
     # Iterate through each record that needs a prediction
     async for record in records:
         # Grab the x data from the record
         x = record.feature(self.features[0])
         # Calculate y
         y = m * x + b
         # Set the calculated value with the estimated accuracy
         record.predicted(self.config.predict.NAME, y, accuracy)
         # Yield the record to the caller
         yield record
コード例 #19
0
 async def accuracy(self, sources: Sources) -> Accuracy:
     # Ensure the model has been trained before we try to make a prediction
     if self.separating_line is None:
         raise ModelNotTrained("Train model before assessing for accuracy.")
     accuracy_value = self.separating_line[2]
     return Accuracy(accuracy_value)
コード例 #20
0
ファイル: logisticregression.py プロジェクト: jacob771/dffml
 async def predict(
     self, sources: SourcesContext
 ) -> AsyncIterator[Tuple[Record, Any, float]]:
     # Ensure the model has been trained before we try to make a prediction
     if self.separating_line is None:
         raise ModelNotTrained("Train model before prediction.")