def plot_importances(): model = tasks.TaskTrain().output().load() df_train = tasks.TaskPreprocess().output().load() df_importance = pd.Series(model.feature_importances_, index=df_train.iloc[:, :-1].columns) import matplotlib.pyplot as plt df_importance.sort_values(ascending=False).plot.bar() plt.savefig('reports/plot.png')
def accuracy(do_preprocess=cfg.do_preprocess): model = tasks.TaskTrain(do_preprocess=do_preprocess).output().load() df_train = tasks.TaskPreprocess().output().load() print( sklearn.metrics.accuracy_score(df_train['y'], model.predict(df_train.iloc[:, :-1])))
import d6tflow import cfg, tasks, visualize # Check task dependencies and their execution status d6tflow.preview(tasks.TaskTrain()) # Execute the model training task including dependencies. See https://d6tflow.readthedocs.io/en/latest/run.html d6tflow.run(tasks.TaskTrain()) # use output visualize.accuracy() visualize.plot_importances() # change parameter and rerun, see https://d6tflow.readthedocs.io/en/latest/advparam.html d6tflow.run(tasks.TaskTrain(do_preprocess=False)) visualize.accuracy(do_preprocess=False) # task output is parameter specific # rerun flow after code changes import importlib importlib.reload(cfg) importlib.reload(tasks) # say you changed TaskGetData, reset all tasks depending on TaskGetData d6tflow.invalidate_downstream(tasks.TaskGetData(), tasks.TaskTrain()) d6tflow.preview(tasks.TaskTrain()) d6tflow.run(tasks.TaskTrain())