def test_predict(self): self.required_plugins("dffml-model-scikit") # Import SciKit modules dffml_model_scikit = importlib.import_module("dffml_model_scikit") # Instantiate the model model = dffml_model_scikit.LinearRegressionModel( directory=self.mktempdir(), predict=Feature("Salary", int, 1), features=Features( Feature("Years", int, 1), Feature("Expertise", int, 1), Feature("Trust", float, 1), ), ) training_data = CSVSource(filename=self.train_filename) test_data = CSVSource(filename=self.test_filename) predict_data = CSVSource(filename=self.predict_filename) # Train the model train(model, training_data) # Assess accuracy accuracy(model, test_data) # Make prediction predictions = [ prediction for prediction in predict(model, predict_data) ] self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70) self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
def main(): # Train the model train(model, "train.csv") # Assess accuracy print("Accuracy:", accuracy(model, "test.csv")) # Make prediction for i, features, prediction in predict(model, "predict.csv"): features["TARGET"] = prediction["TARGET"]["value"] print(features)
from dffml import CSVSource, Features, Feature from dffml.noasync import train, accuracy, predict from dffml_model_scratch.logisticregression import LogisticRegression model = LogisticRegression( features=Features(Feature("f1", float, 1)), predict=Feature("ans", int, 1), ) # Train the model train(model, "dataset.csv") # Assess accuracy (alternate way of specifying data source) print("Accuracy:", accuracy(model, CSVSource(filename="dataset.csv"))) # Make prediction for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}): features["ans"] = prediction["ans"]["value"] print(features)
from dffml import CSVSource, DefFeature from dffml.noasync import train, accuracy, predict from dffml_model_transformers.ner.ner_model import NERModel model = NERModel( sid=DefFeature("SentenceId", int, 1), words=DefFeature("Words", str, 1), predict=DefFeature("Tag", str, 1), model_architecture_type="distilbert", model_name_or_path="distilbert-base-cased", epochs=1, no_cuda=True, ) # Train the model train(model, "train.csv") # Assess accuracy (alternate way of specifying data source) print("Accuracy:", accuracy(model, CSVSource(filename="train.csv"))) # Make prediction for i, features, prediction in predict( model, {"SentenceID": 1, "Words": "DFFML models can do NER",}, {"SentenceID": 2, "Words": "DFFML models can do regression",}, ): features["Tag"] = prediction["Tag"]["value"] print(features)
from dffml import Features, DefFeature from dffml.noasync import train, accuracy, predict from dffml.model.slr import SLRModel model = SLRModel( features=Features(DefFeature("f1", float, 1)), predict=DefFeature("ans", int, 1), ) # Train the model train(model, "dataset.csv") # Assess accuracy (alternate way of specifying data source) print("Accuracy:", accuracy(model, "dataset.csv")) # Make prediction for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}): features["ans"] = prediction["ans"]["value"] print(features)
"Trust": 0.4, "Salary": 40 }, ) # Assess accuracy print( "Accuracy:", accuracy( model, { "Years": 4, "Expertise": 9, "Trust": 0.5, "Salary": 50 }, { "Years": 5, "Expertise": 11, "Trust": 0.6, "Salary": 60 }, ), ) # Make prediction for i, features, prediction in predict( model, { "Years": 6, "Expertise": 13,
from dffml import Feature from dffml.noasync import train, accuracy, predict from myslr import MySLRModel # Configure the model model = MySLRModel( feature=Feature("Years", int, 1), predict=Feature("Salary", int, 1), directory="model", ) # Train the model train(model, "train.csv") # Assess accuracy print("Accuracy:", accuracy(model, "test.csv")) # Make predictions for i, features, prediction in predict(model, "predict.csv"): features["Salary"] = prediction["Salary"]["value"] print(features)
predict=Feature("target", float, 1), directory="model", max_depth=3, learning_rate=0.01, n_estimators=200, reg_lambda=1, reg_alpha=0, gamma=0, colsample_bytree=0, subsample=1, )) # Train the model train(model, *[{"data": x, "target": y} for x, y in zip(trainX, trainy)]) # Assess accuracy print( "Test accuracy:", accuracy(model, *[{ "data": x, "target": y } for x, y in zip(testX, testy)]), ) print( "Training accuracy:", accuracy(model, *[{ "data": x, "target": y } for x, y in zip(trainX, trainy)]), )
DefFeature("SepalWidth", float, 1), DefFeature("PetalLength", float, 1), DefFeature("PetalWidth", float, 1), ), predict=DefFeature("classification", int, 1), epochs=3000, steps=20000, classifications=[0, 1, 2], clstype=int, ) # Train the model train(model, "iris_training.csv") # Assess accuracy (alternate way of specifying data source) print("Accuracy:", accuracy(model, CSVSource(filename="iris_test.csv"))) # Make prediction for i, features, prediction in predict( model, { "PetalLength": 4.2, "PetalWidth": 1.5, "SepalLength": 5.9, "SepalWidth": 3.0, }, { "PetalLength": 5.4, "PetalWidth": 2.1, "SepalLength": 6.9, "SepalWidth": 3.1,
from dffml import Feature, Features from dffml.noasync import accuracy, predict, train from dffml_model_scratch.anomalydetection import AnomalyModel # Configure the model model = AnomalyModel( features=Features(Feature("A", int, 2),), predict=Feature("Y", int, 1), directory="model", ) # Train the model train(model, "trainex.csv") # Assess accuracy for test set print("Test set F1 score :", accuracy(model, "testex.csv")) # Assess accuracy for training set print("Training set F1 score :", accuracy(model, "trainex.csv"))