-
        order: [3, 4] #overall avg recall at 10%
    -
        order: [5, 6] #overall_groups at 10%
    -
        order: [7, 8] #overall_groups at 30%
    -
        order: [9, 10] #director3 at 30%
    -
        order: [11, 12] #director2 at 30%
    -
        order: [13, 14] #director1 at 30%
"""

import yaml
res = select_model.run(args['experiment_id'], yaml.load(selector),
                       data.query('fold != "2017-12-26"'))

# +
import seaborn as sns

selector = res['selectors'][0]

fig, axis = plt.subplots(figsize=(15, 10), ncols=1, nrows=1)

df = explode(selector['data'])
model_name = df['name'].unique()[0]
df = pd.concat([df, data.query('name in ("higher_than_x", "random")')])
df['new_name'] = df.apply(lambda x: x['name'] + '_' + str(x['learner_id']), 1)
translate = {
    'higher_than_x': 'Ordered by Value Baseline',
    'random': 'Random Baseline',
from model_selection import plotting, select_model
from utils.utils import connect_to_database

con = connect_to_database()

# Add here your parameters:
experiment_id = 1129688681
model_selector = 'good_selector'

args = dict(experiment_id=experiment_id)

plotting.overall_performance_per_fold(args, thresh=0)

res = select_model.run(
    args['experiment_id'],
    Path().cwd().parent / 'model_selectors' / (model_selector + '.yaml'))

for df in res['selectors']:
    print(df['learner_stats'])

good_learners = res['selectors'][3]['learner_ids']

# ## Feature Importance

query = """
select feature, avg(abs(importance)) importance
from experiments.feature_importances
where learner_id = {learner_id}
and importance != 0
group by feature
import sys
from pathlib import Path
import os

source_path = str(
    Path(os.path.abspath('joaoc-model-selector.py')).parent.parent / 'src')
if source_path not in sys.path:
    sys.path.insert(0, source_path)
# sys.path.insert(0, '../utils')

import model_selection.select_model as sm

# !pwd

res = sm.run(
    3124936745,
    '/home/joao.carabetta/Documents/dncp/model_selectors/template.yaml')

# +
import pickle
# Packages
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import seaborn as sns
# from pipeline.data_persistence import persist_local
from itertools import cycle

color = cycle(cm.get_cmap('tab20', 20).colors)
Beispiel #4
0
            type: top
            value: 10
        statistic: std
        higher_is_better: false
        

selectors:
    -
        order: [1] 
    -
        order: [2] 
    -
        order: [3, 4]
""")

metric_selected, data, max_fold = select_model.run(macro_experiment_id,
                                                   selector)

# ## Recall by Quality Reviews
#
# <div class="alert alert-block alert-info">
# The <b>Selected Models</b> line has an interval. The intervall boundaries are the minimum and maximum of all selected models.
# </div>

plotting.plot_metrics(metric_selected,
                      data,
                      max_fold,
                      baselines,
                      metric='recall')

# ## Precision by Quality Reviews
            type: threshold
            value: 0.01
        statistic: mean
        higher_is_better: true
        

selectors:
    -
        order: [1]
    -
        order: [2]
    -
        order: [3]
"""

res = select_model.run(args['experiment_id'], yaml.load(selector), data)


# +
nrows = len(res['selectors'])

fig, ax = plt.subplots(figsize=(15, nrows * 10),
                         ncols=1, nrows=nrows)

for i, selector in enumerate(res['selectors']):
    
    axis = ax[i]

    df = explode(selector['data'])
    df = pd.concat([df, data.query('name in ("higher_than_x", "random")')])
    df['new_name'] = df.apply(lambda x: x['name'] + '_' + str(x['learner_id']), 1)