Exemple #1
0
    def test_predict(self):
        self.required_plugins("dffml-model-scikit")
        # Import SciKit modules
        dffml_model_scikit = importlib.import_module("dffml_model_scikit")
        # Instantiate the model
        model = dffml_model_scikit.LinearRegressionModel(
            directory=self.mktempdir(),
            predict=Feature("Salary", int, 1),
            features=Features(
                Feature("Years", int, 1),
                Feature("Expertise", int, 1),
                Feature("Trust", float, 1),
            ),
        )

        training_data = CSVSource(filename=self.train_filename)
        test_data = CSVSource(filename=self.test_filename)
        predict_data = CSVSource(filename=self.predict_filename)

        # Train the model
        train(model, training_data)
        # Assess accuracy
        accuracy(model, test_data)
        # Make prediction
        predictions = [
            prediction for prediction in predict(model, predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
Exemple #2
0
def main():
    # Train the model
    train(model, "train.csv")

    # Assess accuracy
    print("Accuracy:", accuracy(model, "test.csv"))

    # Make prediction
    for i, features, prediction in predict(model, "predict.csv"):
        features["TARGET"] = prediction["TARGET"]["value"]
        print(features)
Exemple #3
0
from dffml import CSVSource, Features, Feature
from dffml.noasync import train, accuracy, predict
from dffml_model_scratch.logisticregression import LogisticRegression

model = LogisticRegression(
    features=Features(Feature("f1", float, 1)),
    predict=Feature("ans", int, 1),
)

# Train the model
train(model, "dataset.csv")

# Assess accuracy (alternate way of specifying data source)
print("Accuracy:", accuracy(model, CSVSource(filename="dataset.csv")))

# Make prediction
for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}):
    features["ans"] = prediction["ans"]["value"]
    print(features)
Exemple #4
0
from dffml import CSVSource, DefFeature
from dffml.noasync import train, accuracy, predict
from dffml_model_transformers.ner.ner_model import NERModel

model = NERModel(
    sid=DefFeature("SentenceId", int, 1),
    words=DefFeature("Words", str, 1),
    predict=DefFeature("Tag", str, 1),
    model_architecture_type="distilbert",
    model_name_or_path="distilbert-base-cased",
    epochs=1,
    no_cuda=True,
)

# Train the model
train(model, "train.csv")

# Assess accuracy (alternate way of specifying data source)
print("Accuracy:", accuracy(model, CSVSource(filename="train.csv")))

# Make prediction
for i, features, prediction in predict(
    model,
    {"SentenceID": 1, "Words": "DFFML models can do NER",},
    {"SentenceID": 2, "Words": "DFFML models can do regression",},
):
    features["Tag"] = prediction["Tag"]["value"]
    print(features)
Exemple #5
0
from dffml import Features, DefFeature
from dffml.noasync import train, accuracy, predict
from dffml.model.slr import SLRModel

model = SLRModel(
    features=Features(DefFeature("f1", float, 1)),
    predict=DefFeature("ans", int, 1),
)

# Train the model
train(model, "dataset.csv")

# Assess accuracy (alternate way of specifying data source)
print("Accuracy:", accuracy(model, "dataset.csv"))

# Make prediction
for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}):
    features["ans"] = prediction["ans"]["value"]
    print(features)
Exemple #6
0
        "Trust": 0.4,
        "Salary": 40
    },
)

# Assess accuracy
print(
    "Accuracy:",
    accuracy(
        model,
        {
            "Years": 4,
            "Expertise": 9,
            "Trust": 0.5,
            "Salary": 50
        },
        {
            "Years": 5,
            "Expertise": 11,
            "Trust": 0.6,
            "Salary": 60
        },
    ),
)

# Make prediction
for i, features, prediction in predict(
        model,
    {
        "Years": 6,
        "Expertise": 13,
Exemple #7
0
from dffml import Feature
from dffml.noasync import train, accuracy, predict
from myslr import MySLRModel

# Configure the model
model = MySLRModel(
    feature=Feature("Years", int, 1),
    predict=Feature("Salary", int, 1),
    directory="model",
)

# Train the model
train(model, "train.csv")

# Assess accuracy
print("Accuracy:", accuracy(model, "test.csv"))

# Make predictions
for i, features, prediction in predict(model, "predict.csv"):
    features["Salary"] = prediction["Salary"]["value"]
    print(features)
Exemple #8
0
        predict=Feature("target", float, 1),
        directory="model",
        max_depth=3,
        learning_rate=0.01,
        n_estimators=200,
        reg_lambda=1,
        reg_alpha=0,
        gamma=0,
        colsample_bytree=0,
        subsample=1,
    ))

# Train the model
train(model, *[{"data": x, "target": y} for x, y in zip(trainX, trainy)])

# Assess accuracy
print(
    "Test accuracy:",
    accuracy(model, *[{
        "data": x,
        "target": y
    } for x, y in zip(testX, testy)]),
)
print(
    "Training accuracy:",
    accuracy(model, *[{
        "data": x,
        "target": y
    } for x, y in zip(trainX, trainy)]),
)
Exemple #9
0
        DefFeature("SepalWidth", float, 1),
        DefFeature("PetalLength", float, 1),
        DefFeature("PetalWidth", float, 1),
    ),
    predict=DefFeature("classification", int, 1),
    epochs=3000,
    steps=20000,
    classifications=[0, 1, 2],
    clstype=int,
)

# Train the model
train(model, "iris_training.csv")

# Assess accuracy (alternate way of specifying data source)
print("Accuracy:", accuracy(model, CSVSource(filename="iris_test.csv")))

# Make prediction
for i, features, prediction in predict(
        model,
    {
        "PetalLength": 4.2,
        "PetalWidth": 1.5,
        "SepalLength": 5.9,
        "SepalWidth": 3.0,
    },
    {
        "PetalLength": 5.4,
        "PetalWidth": 2.1,
        "SepalLength": 6.9,
        "SepalWidth": 3.1,
Exemple #10
0
from dffml import Feature, Features
from dffml.noasync import accuracy, predict, train

from dffml_model_scratch.anomalydetection import AnomalyModel

# Configure the model

model = AnomalyModel(
    features=Features(Feature("A", int, 2),),
    predict=Feature("Y", int, 1),
    directory="model",
)


# Train the model
train(model, "trainex.csv")

# Assess accuracy for test set
print("Test set F1 score :", accuracy(model, "testex.csv"))

# Assess accuracy for training set
print("Training set F1 score :", accuracy(model, "trainex.csv"))