Esempio n. 1
0
def plot_importances():
    model = tasks.TaskTrain().output().load()
    df_train = tasks.TaskPreprocess().output().load()
    df_importance = pd.Series(model.feature_importances_,
                              index=df_train.iloc[:, :-1].columns)
    import matplotlib.pyplot as plt
    df_importance.sort_values(ascending=False).plot.bar()
    plt.savefig('reports/plot.png')
Esempio n. 2
0
def accuracy(do_preprocess=cfg.do_preprocess):
    model = tasks.TaskTrain(do_preprocess=do_preprocess).output().load()
    df_train = tasks.TaskPreprocess().output().load()
    print(
        sklearn.metrics.accuracy_score(df_train['y'],
                                       model.predict(df_train.iloc[:, :-1])))
Esempio n. 3
0
import d6tflow
import cfg, tasks, visualize

# Check task dependencies and their execution status
d6tflow.preview(tasks.TaskTrain())

# Execute the model training task including dependencies. See https://d6tflow.readthedocs.io/en/latest/run.html
d6tflow.run(tasks.TaskTrain())

# use output
visualize.accuracy()
visualize.plot_importances()

# change parameter and rerun, see https://d6tflow.readthedocs.io/en/latest/advparam.html
d6tflow.run(tasks.TaskTrain(do_preprocess=False))
visualize.accuracy(do_preprocess=False)  # task output is parameter specific

# rerun flow after code changes
import importlib
importlib.reload(cfg)
importlib.reload(tasks)

# say you changed TaskGetData, reset all tasks depending on TaskGetData
d6tflow.invalidate_downstream(tasks.TaskGetData(), tasks.TaskTrain())

d6tflow.preview(tasks.TaskTrain())
d6tflow.run(tasks.TaskTrain())