async def predict(self, sources: SourcesContext) -> AsyncIterator[Record]: # Load saved anomalies anomalies = self.storage.get("anomalies", None) # Ensure the model has been trained before we try to make a prediction if anomalies is None: raise ModelNotTrained("Train model before prediction") # Expand the anomalies into named variables epsilon, F1, mu, sigma2 = anomalies mu = np.array(mu) sigma2 = np.array(sigma2) # Grab records and input data (X data) input_data = await self.get_input_data(sources) # Make predictions X = [] for record in input_data: record_data = [] for feature in record.features(self.features).values(): record_data.extend( [feature] if np.isscalar(feature) else feature) X.append(record_data) p = multivariateGaussian(X, mu, sigma2) predictions = (p < epsilon).astype(int) for record, prediction in zip(input_data, predictions): record.predicted(self.config.predict.name, int(prediction), float(F1)) yield record
async def accuracy(self, sources: Sources) -> Accuracy: # Load saved regression line regression_line = self.storage.get("regression_line", None) # Ensure the model has been trained before we try to make a prediction if regression_line is None: raise ModelNotTrained("Train model before assessing for accuracy") # Split regression line tuple into variables, ignore accuracy from # training data since we'll be re-calculating it for the test data m, b, _accuracy = regression_line # X and Y data x = [] y = [] # Go through all records that have the feature we're testing on and the # feature we want to predict. async for record in sources.with_features( [self.config.feature.name, self.config.predict.name]): x.append(record.feature(self.config.feature.name)) y.append(record.feature(self.config.predict.name)) # Use self.logger to report how many records are being used for testing self.logger.debug("Number of test records: %d", len(x)) # Calculate the regression line for test data and accuracy of line regression_line = [m * x + b for x in x] accuracy = coeff_of_deter(y, regression_line) # Update the accuracy to be the accuracy when assessed on the test data self.storage["regression_line"] = m, b, accuracy return Accuracy(accuracy)
async def accuracy(self, sources: Sources) -> Accuracy: # Load saved regression line regression_line = self.storage.get("regression_line", None) # Ensure the model has been trained before we try to make a prediction if regression_line is None: raise ModelNotTrained("Train model before assessing for accuracy.") # Accuracy is the last element in regression_line, which is a list of # three values: m, b, and accuracy. return Accuracy(regression_line[2])
async def predict( self, sources: SourcesContext ) -> AsyncIterator[Tuple[Record, Any, float]]: if not self.is_trained: raise ModelNotTrained("Train model before prediction.") async for record in sources.records(): doc = self.parent.nlp(record.feature("sentence")) prediction = [(ent.text, ent.label_) for ent in doc.ents] record.predicted("Tag", prediction, "Nan") yield record
async def predict(self, sources: SourcesContext) -> AsyncIterator[Record]: if not self.model: raise ModelNotTrained( "Train the model first before getting preictions") test_records = await self.get_test_records(sources) x_test = pd.DataFrame([record.features() for record in test_records]) predictions = await self.get_predictions(x_test) probability = await self.get_probabilities(x_test) target = self.parent.config.predict.name for record, predict, prob in zip(test_records, predictions, probability): record.predicted(target, predict, max(prob)) yield record
async def predict( self, sources: SourcesContext ) -> AsyncIterator[Tuple[Record, Any, float]]: if not os.path.isdir(os.path.join(self.parent.config.output_dir, "ner")): raise ModelNotTrained("Train model before prediction.") self.nlp = spacy.load(self.parent.config.output_dir) async for record in sources.records(): doc = self.nlp(record.feature("sentence")) prediction = [(ent.text, ent.label_) for ent in doc.ents] record.predicted("Tag", prediction, "Nan") yield record
async def accuracy(self, sources: Sources) -> Accuracy: if not self.model: raise ModelNotTrained("Train the model before assessing accuracy") test_data = [] async for record in sources.with_features( self.features + [self.parent.config.predict.name]): test_data.append(record.features()) df = pd.DataFrame(test_data) y_test = df[[self.parent.config.predict.name]] x_test = df.drop(columns=[self.parent.config.predict.name]) predictions = await self.get_predictions(x_test) accuracy = await self.accuracy_score(y_test, predictions) return Accuracy(accuracy)
async def predict( self, records: AsyncIterator[Record] ) -> AsyncIterator[Tuple[Record, Any, float]]: # Iterate through each record that needs a prediction if self.lm_trained is None: raise ModelNotTrained("Train model before prediction.") async for record in records: feature_data = record.features(self.features) predict = self.pd.DataFrame(feature_data, index=[0]) preds = self.lm_predictor.compute(predict, self.lm_trained) target = self.parent.config.predict.name record.predicted(target, preds.prediction, float("nan")) # Yield the record to the caller yield record
async def accuracy(self, sources: SourcesContext) -> Accuracy: if not os.path.isdir(os.path.join(self.parent.config.output_dir, "ner")): raise ModelNotTrained("Train model before assessing for accuracy.") test_examples = await self._preprocess_data(sources) self.nlp = spacy.load(self.parent.config.output_dir) scorer = Scorer() for input_, annot in test_examples: doc_gold_text = self.nlp.make_doc(input_) gold = GoldParse(doc_gold_text, entities=annot["entities"]) pred_value = self.nlp(input_) scorer.score(pred_value, gold) return Accuracy(scorer.scores["tags_acc"])
async def predict( self, records: AsyncIterator[Record] ) -> AsyncIterator[Tuple[Record, Any, float]]: # Ensure the model has been trained before we try to make a prediction if self.separating_line is None: raise ModelNotTrained("Train model before prediction.") target = self.config.predict.NAME async for record in records: feature_data = record.features(self.features) record.predicted( target, self.predict_input(feature_data[self.features[0]]), self.separating_line[2], ) yield record
async def accuracy(self, sources: SourcesContext) -> Accuracy: # Load saved anomalies anomalies = self.storage.get("anomalies", None) # Ensure the model has been trained before we try to make a prediction if anomalies is None: raise ModelNotTrained("Train model before assessing for accuracy.") epsilon, _F1val, mu, sigma2 = anomalies X = [] Y = [] # Go through all records that have the feature we're training on and the # feature we want to predict. async for record in sources.with_features( self.features + [self.parent.config.predict.name]): record_data = [] for feature in record.features(self.features).values(): record_data.extend( [feature] if np.isscalar(feature) else feature) X.append(record_data) Y.append(record.feature(self.parent.config.predict.name)) self.logger.debug("Number of test records: %d", len(X)) # Number of features nof = len(self.features) X = np.reshape(X, (len(X), nof)) Y = np.reshape(Y, (len(Y), 1)) mu = np.array(mu) sigma2 = np.array(sigma2) p = multivariateGaussian(X, mu, sigma2) pred = (p < epsilon).astype(int) F1 = getF1(Y, pred) outliers = p < epsilon listOfOl = findIndices(outliers) accuracy = F1 # Update the accuracy self.storage["anomalies"] = epsilon, F1, mu.tolist(), sigma2.tolist() return Accuracy(accuracy)
async def predict( self, sources: SourcesContext ) -> AsyncIterator[Tuple[Record, Any, float]]: # Ensure the model has been trained before we try to make a prediction if self.separating_line is None: raise ModelNotTrained("Train model before prediction.") target = self.config.predict.name async for record in sources.with_features( self.parent.config.features.names()): feature_data = record.features(self.features) record.predicted( target, self.predict_input(feature_data[self.features[0]]), self.separating_line[2], ) yield record
async def accuracy(self, sources: SourcesContext) -> Accuracy: if not os.path.isdir(os.path.join(self.parent.config.directory, "ner")): raise ModelNotTrained("Train model before assessing for accuracy.") test_examples = await self._preprocess_data(sources) self.nlp = spacy.load(self.parent.config.directory) scorer = Scorer() examples = [] for input_, annot in test_examples: pred_value = self.nlp(input_) example = Example.from_dict(pred_value, {"entities": annot["entities"]}) example.reference = self.nlp.make_doc(input_) examples.append(example) scores = scorer.score(examples) return Accuracy(scores["token_acc"])
async def accuracy(self, sources: Sources) -> Accuracy: if self.lm_trained is None: raise ModelNotTrained("Train model before assessing for accuracy.") feature_data = [] async for record in sources.with_features( self.features + [self.parent.config.predict.name]): feature_data.append( record.features(self.features + [self.parent.config.predict.name])) df = self.pd.DataFrame(feature_data) xdata = df.drop([self.parent.config.predict.name], 1) ydata = df[self.parent.config.predict.name] preds = self.ac_predictor.compute(xdata, self.lm_trained) # Calculate accuracy with an error margin of 0.1 accuracy_val = sum( self.compare(list(map(abs, map(sub, ydata, preds.prediction))), 0.1)) / len(ydata) return Accuracy(accuracy_val)
async def predict(self, sources: SourcesContext) -> AsyncIterator[Record]: # Load saved regression line regression_line = self.storage.get("regression_line", None) # Ensure the model has been trained before we try to make a prediction if regression_line is None: raise ModelNotTrained("Train model before prediction") # Expand the regression_line into named variables m, b, accuracy = regression_line # Iterate through each record that needs a prediction async for record in sources.with_features([self.config.feature.name]): # Grab the x data from the record x = record.feature(self.config.feature.name) # Calculate y y = m * x + b # Set the calculated value with the estimated accuracy record.predicted(self.config.predict.name, y, accuracy) # Yield the record to the caller yield record
async def score(self, mctx: ModelContext, sources: SourcesContext, *features: Features): if not mctx.is_trained: raise ModelNotTrained("Train model before assessing for accuracy.") test_examples = await mctx._preprocess_data(sources) mctx.nlp = spacy.load(mctx.parent.model_path) scorer = Scorer() examples = [] for input_, annot in test_examples: pred_value = mctx.nlp(input_) example = Example.from_dict(pred_value, {"entities": annot["entities"]}) example.reference = mctx.nlp.make_doc(input_) examples.append(example) scores = scorer.score(examples) return scores["token_acc"]
async def predict( self, sources: SourcesContext ) -> AsyncIterator[Tuple[Record, Any, float]]: # Iterate through each record that needs a prediction if self.lm_trained is None: raise ModelNotTrained("Train model before prediction.") async for record in sources.with_features( self.parent.config.features.names()): feature_data = record.features(self.features) predict = self.pd.DataFrame(feature_data, index=[0]) preds = self.lm_predictor.compute(predict, self.lm_trained) target = self.parent.config.predict.name if preds.prediction.size == 1: prediction = preds.prediction.flat[0] else: prediction = preds.prediction record.predicted(target, prediction, float("nan")) # Yield the record to the caller yield record
async def predict( self, records: AsyncIterator[Record] ) -> AsyncIterator[Tuple[Record, Any, float]]: # Load saved regression line regression_line = self.storage.get("regression_line", None) # Ensure the model has been trained before we try to make a prediction if regression_line is None: raise ModelNotTrained("Train model before prediction.") # Expand the regression_line into named variables m, b, accuracy = regression_line # Iterate through each record that needs a prediction async for record in records: # Grab the x data from the record x = record.feature(self.features[0]) # Calculate y y = m * x + b # Set the calculated value with the estimated accuracy record.predicted(self.config.predict.NAME, y, accuracy) # Yield the record to the caller yield record
async def accuracy(self, sources: Sources) -> Accuracy: # Ensure the model has been trained before we try to make a prediction if self.separating_line is None: raise ModelNotTrained("Train model before assessing for accuracy.") accuracy_value = self.separating_line[2] return Accuracy(accuracy_value)
async def predict( self, sources: SourcesContext ) -> AsyncIterator[Tuple[Record, Any, float]]: # Ensure the model has been trained before we try to make a prediction if self.separating_line is None: raise ModelNotTrained("Train model before prediction.")