def test_predict(self): self.required_plugins("dffml-model-scikit") # Import SciKit modules dffml_model_scikit = importlib.import_module("dffml_model_scikit") # Instantiate the model model = dffml_model_scikit.LinearRegressionModel( location=self.mktempdir(), predict=Feature("Salary", int, 1), features=Features( Feature("Years", int, 1), Feature("Expertise", int, 1), Feature("Trust", float, 1), ), ) training_data = CSVSource(filename=self.train_filename) test_data = CSVSource(filename=self.test_filename) predict_data = CSVSource(filename=self.predict_filename) # Train the model train(model, training_data) # Assess accuracy scorer = MeanSquaredErrorAccuracy() score(model, scorer, Feature("Salary", int, 1), test_data) # Make prediction predictions = [ prediction for prediction in predict(model, predict_data) ] self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70) self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
def main(): # Train the model train(model, "train.csv") # Assess accuracy scorer = MeanSquaredErrorAccuracy() print( "Accuracy:", score(model, scorer, Feature("TARGET", float, 1), "test.csv"), ) # Make prediction for i, features, prediction in predict(model, "predict.csv"): features["TARGET"] = prediction["TARGET"]["value"] print(features)
from dffml import CSVSource, Features, Feature from dffml.noasync import train, score, predict from dffml.accuracy import MeanSquaredErrorAccuracy from dffml_model_scratch.logisticregression import LogisticRegression model = LogisticRegression( features=Features(Feature("f1", float, 1)), predict=Feature("ans", int, 1), location="tempdir", ) # Train the model train(model, "dataset.csv") # Assess accuracy (alternate way of specifying data source) scorer = MeanSquaredErrorAccuracy() print( "Accuracy:", score( model, scorer, Feature("ans", int, 1), CSVSource(filename="dataset.csv"), ), ) # Make prediction for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}): features["ans"] = prediction["ans"]["value"] print(features)
}, ) # Assess accuracy scorer = MeanSquaredErrorAccuracy() print( "Accuracy:", score( model, scorer, Feature("Salary", int, 1), { "Years": 4, "Expertise": 9, "Trust": 0.5, "Salary": 50 }, { "Years": 5, "Expertise": 11, "Trust": 0.6, "Salary": 60 }, ), ) # Make prediction for i, features, prediction in predict( model, { "Years": 6,
Feature("Trust", float, 1), ), predict=Feature("Salary", int, 1), location="tempdir", ) # Train the model train(model, "train.csv") # Assess accuracy (alternate way of specifying data source) scorer = MeanSquaredErrorAccuracy() print( "Accuracy:", score( model, scorer, Feature("Salary", int, 1), CSVSource(filename="test.csv"), ), ) # Make prediction for i, features, prediction in predict( model, { "Years": 6, "Expertise": 13, "Trust": 0.7 }, { "Years": 7, "Expertise": 15,
from dffml import Feature, Features from dffml.noasync import train, score, predict from dffml.accuracy import MeanSquaredErrorAccuracy from REPLACE_IMPORT_PACKAGE_NAME.myslr import MySLRModel model = MySLRModel( features=Features(Feature("x", float, 1)), predict=Feature("y", int, 1), location="tempdir", ) # Train the model train(model, "train.csv") # Assess accuracy (alternate way of specifying data source) scorer = MeanSquaredErrorAccuracy() print("Accuracy:", score(model, scorer, Feature("y", int, 1), "test.csv")) # Make prediction for i, features, prediction in predict(model, "predict.csv"): features["y"] = prediction["y"]["value"] print(features)
from dffml_model_scratch.anomalydetection import AnomalyModel from dffml_model_scratch.anomaly_detection_scorer import ( AnomalyDetectionAccuracy, ) # Configure the model model = AnomalyModel( features=Features(Feature("A", int, 2),), predict=Feature("Y", int, 1), location="model", ) # Train the model train(model, "trainex.csv") # Assess accuracy for test set scorer = AnomalyDetectionAccuracy() print( "Test set F1 score :", score(model, scorer, Feature("Y", int, 1), "testex.csv"), ) # Assess accuracy for training set print( "Training set F1 score :", score(model, scorer, Feature("Y", int, 1), "trainex.csv"), )
colsample_bytree=0, subsample=1, )) # Train the model train(model, *[{"data": x, "target": y} for x, y in zip(trainX, trainy)]) # Assess accuracy scorer = ClassificationAccuracy() print( "Test accuracy:", score( model, scorer, Feature("target", float, 1), *[{ "data": x, "target": y } for x, y in zip(testX, testy)], ), ) print( "Training accuracy:", score( model, scorer, Feature("target", float, 1), *[{ "data": x, "target": y } for x, y in zip(trainX, trainy)],
steps=20000, classifications=[0, 1, 2], clstype=int, location="tempdir", ) # Train the model train(model, "iris_training.csv") # Assess accuracy (alternate way of specifying data source) scorer = ClassificationAccuracy() print( "Accuracy:", score( model, scorer, Feature("classification", int, 1), CSVSource(filename="iris_test.csv"), ), ) # Make prediction for i, features, prediction in predict( model, { "PetalLength": 4.2, "PetalWidth": 1.5, "SepalLength": 5.9, "SepalWidth": 3.0, }, { "PetalLength": 5.4,
predict=Feature("TARGET", float, 1), epochs=300, steps=2000, hidden=[8, 16, 8], location="tempdir", ) # Train the model train(model, "train.csv") # Assess accuracy (alternate way of specifying data source) scorer = MeanSquaredErrorAccuracy() print( "Accuracy:", score( model, scorer, Feature("TARGET", float, 1), CSVSource(filename="test.csv"), ), ) # Make prediction for i, features, prediction in predict(model, { "Feature1": 0.21, "Feature2": 0.18, "TARGET": 0.84 }): features["TARGET"] = prediction["TARGET"]["value"] print(features)
from dffml import Features, Feature, SLRModel from dffml.noasync import score, train from dffml.accuracy import MeanSquaredErrorAccuracy model = SLRModel( features=Features(Feature("f1", float, 1)), predict=Feature("ans", int, 1), location="tempdir", ) # Train the model train(model, "dataset.csv") # Choose the accuracy plugin mse_accuracy = MeanSquaredErrorAccuracy() # Assess accuracy (alternate way of specifying data source) print( "Accuracy:", score(model, mse_accuracy, Feature("ans", int, 1), "dataset.csv"), )
from dffml import CSVSource, Features, Feature from dffml.noasync import train, score, predict from dffml_model_daal4py.daal4pylr import DAAL4PyLRModel from dffml.accuracy import MeanSquaredErrorAccuracy model = DAAL4PyLRModel( features=Features(Feature("f1", float, 1)), predict=Feature("ans", int, 1), location="tempdir", ) # Train the model train(model, "train.csv") # Assess accuracy (alternate way of specifying data source) scorer = MeanSquaredErrorAccuracy() print( "Accuracy:", score(model, scorer, Feature("ans", int, 1), CSVSource(filename="test.csv")), ) # Make prediction for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}): features["ans"] = prediction["ans"]["value"] print(features)
model = TextClassificationModel( features=Features(Feature("sentence", str, 1)), predict=Feature("sentiment", int, 1), classifications=[0, 1, 2], clstype=int, location="tempdir", ) # Train the model train(model, "train.csv") # Assess accuracy (alternate way of specifying data source) scorer = TextClassifierAccuracy() print( "Accuracy:", score( model, scorer, Feature("sentiment", int, 1), CSVSource(filename="test.csv"), ), ) # Make prediction for i, features, prediction in predict( model, {"sentence": "This track is horrible"}, ): features["sentiment"] = prediction["sentiment"]["value"] print(features)
from dffml import Features, Feature, SLRModel from dffml.noasync import train, score, predict from dffml.accuracy import MeanSquaredErrorAccuracy model = SLRModel( features=Features(Feature("f1", float, 1)), predict=Feature("ans", int, 1), location="tempdir", ) # Train the model train(model, "dataset.csv") # Assess accuracy (alternate way of specifying data source) scorer = MeanSquaredErrorAccuracy() print("Accuracy:", score(model, scorer, Feature("ans", int, 1), "dataset.csv")) # Make prediction for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}): features["ans"] = prediction["ans"]["value"] print(features)