def wrap_high_level_accuracy(state): model = SLRModel( features=Features(Feature("Years", int, 1), ), predict=Feature("Salary", int, 1), location="tempdir", ) train( model, { "Years": 0, "Salary": 10 }, { "Years": 1, "Salary": 20 }, { "Years": 2, "Salary": 30 }, { "Years": 3, "Salary": 40 }, ) yield
def test_predict(self): self.required_plugins("dffml-model-scikit") # Import SciKit modules dffml_model_scikit = importlib.import_module("dffml_model_scikit") # Instantiate the model model = dffml_model_scikit.LinearRegressionModel( directory=self.mktempdir(), predict=Feature("Salary", int, 1), features=Features( Feature("Years", int, 1), Feature("Expertise", int, 1), Feature("Trust", float, 1), ), ) training_data = CSVSource(filename=self.train_filename) test_data = CSVSource(filename=self.test_filename) predict_data = CSVSource(filename=self.predict_filename) # Train the model train(model, training_data) # Assess accuracy accuracy(model, test_data) # Make prediction predictions = [ prediction for prediction in predict(model, predict_data) ] self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70) self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
def wrap_noasync_accuracy(state): model = SLRModel( features=Features(Feature("Years", int, 1), ), predict=Feature("Salary", int, 1), directory="tempdir", ) train( model, { "Years": 0, "Salary": 10 }, { "Years": 1, "Salary": 20 }, { "Years": 2, "Salary": 30 }, { "Years": 3, "Salary": 40 }, ) yield
def main(): # Train the model train(model, "train.csv") # Assess accuracy print("Accuracy:", accuracy(model, "test.csv")) # Make prediction for i, features, prediction in predict(model, "predict.csv"): features["TARGET"] = prediction["TARGET"]["value"] print(features)
def main(): # Train the model train(model, "train.csv") # Assess accuracy scorer = MeanSquaredErrorAccuracy() print( "Accuracy:", score(model, scorer, Feature("TARGET", float, 1), "test.csv"), ) # Make prediction for i, features, prediction in predict(model, "predict.csv"): features["TARGET"] = prediction["TARGET"]["value"] print(features)
from dffml import CSVSource, Features, Feature from dffml.noasync import train, accuracy, predict from dffml_model_scratch.logisticregression import LogisticRegression model = LogisticRegression( features=Features(Feature("f1", float, 1)), predict=Feature("ans", int, 1), ) # Train the model train(model, "dataset.csv") # Assess accuracy (alternate way of specifying data source) print("Accuracy:", accuracy(model, CSVSource(filename="dataset.csv"))) # Make prediction for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}): features["ans"] = prediction["ans"]["value"] print(features)
from dffml import CSVSource, DefFeature from dffml.noasync import train, accuracy, predict from dffml_model_transformers.ner.ner_model import NERModel model = NERModel( sid=DefFeature("SentenceId", int, 1), words=DefFeature("Words", str, 1), predict=DefFeature("Tag", str, 1), model_architecture_type="distilbert", model_name_or_path="distilbert-base-cased", epochs=1, no_cuda=True, ) # Train the model train(model, "train.csv") # Assess accuracy (alternate way of specifying data source) print("Accuracy:", accuracy(model, CSVSource(filename="train.csv"))) # Make prediction for i, features, prediction in predict( model, {"SentenceID": 1, "Words": "DFFML models can do NER",}, {"SentenceID": 2, "Words": "DFFML models can do regression",}, ): features["Tag"] = prediction["Tag"]["value"] print(features)
# Train the model train( model, { "Years": 0, "Expertise": 1, "Trust": 0.1, "Salary": 10 }, { "Years": 1, "Expertise": 3, "Trust": 0.2, "Salary": 20 }, { "Years": 2, "Expertise": 5, "Trust": 0.3, "Salary": 30 }, { "Years": 3, "Expertise": 7, "Trust": 0.4, "Salary": 40 }, ) # Assess accuracy
from dffml import CSVSource, Features, Feature from dffml.noasync import train, accuracy, predict from dffml_model_scikit import LinearRegressionModel model = LinearRegressionModel( features=Features( Feature("Years", int, 1), Feature("Expertise", int, 1), Feature("Trust", float, 1), ), predict=Feature("Salary", int, 1), directory="tempdir", ) # Train the model train(model, "training.csv") # Assess accuracy (alternate way of specifying data source) print("Accuracy:", accuracy(model, CSVSource(filename="test.csv"))) # Make prediction for i, features, prediction in predict( model, { "Years": 6, "Expertise": 13, "Trust": 0.7 }, { "Years": 7, "Expertise": 15,
float, )), predict=Feature("target", float, 1), directory="model", max_depth=3, learning_rate=0.01, n_estimators=200, reg_lambda=1, reg_alpha=0, gamma=0, colsample_bytree=0, subsample=1, )) # Train the model train(model, *[{"data": x, "target": y} for x, y in zip(trainX, trainy)]) # Assess accuracy print( "Test accuracy:", accuracy(model, *[{ "data": x, "target": y } for x, y in zip(testX, testy)]), ) print( "Training accuracy:", accuracy(model, *[{ "data": x, "target": y } for x, y in zip(trainX, trainy)]),
model = DNNClassifierModel( features=Features( DefFeature("SepalLength", float, 1), DefFeature("SepalWidth", float, 1), DefFeature("PetalLength", float, 1), DefFeature("PetalWidth", float, 1), ), predict=DefFeature("classification", int, 1), epochs=3000, steps=20000, classifications=[0, 1, 2], clstype=int, ) # Train the model train(model, "iris_training.csv") # Assess accuracy (alternate way of specifying data source) print("Accuracy:", accuracy(model, CSVSource(filename="iris_test.csv"))) # Make prediction for i, features, prediction in predict( model, { "PetalLength": 4.2, "PetalWidth": 1.5, "SepalLength": 5.9, "SepalWidth": 3.0, }, { "PetalLength": 5.4,
from dffml_model_scratch.anomalydetection import AnomalyModel from dffml_model_scratch.anomaly_detection_scorer import ( AnomalyDetectionAccuracy, ) # Configure the model model = AnomalyModel( features=Features(Feature("A", int, 2),), predict=Feature("Y", int, 1), location="model", ) # Train the model train(model, "trainex.csv") # Assess accuracy for test set scorer = AnomalyDetectionAccuracy() print( "Test set F1 score :", score(model, scorer, Feature("Y", int, 1), "testex.csv"), ) # Assess accuracy for training set print( "Training set F1 score :", score(model, scorer, Feature("Y", int, 1), "trainex.csv"), )