예제 #1
0
def test_flow_graph_elements(dash_br):
    flow_id = 405
    dash_br.server_url = BASE_URL + 'flow/'+str(flow_id)
    time.sleep(15)
    flow_plot = dash_br.find_element("#flowplot")
    evals = evaluations.list_evaluations(function='area_under_roc_curve', flows=[flow_id],
                                         sort_order='desc', size=10,
                                         output_format='dataframe')
    # Check if the data of first 10 evaluations match
    list_of_data = list(evals['data_name'].values)
    for element in list_of_data:
        assert(element in flow_plot.text)
예제 #2
0
def test_task_graph_elements(dash_br):
    task_id = 2
    dash_br.server_url = f"{BASE_URL}task/{task_id}"
    time.sleep(10)
    task_plot = dash_br.find_element("#tab1")
    evals = evaluations.list_evaluations(
        function="area_under_roc_curve",
        size=10,
        sort_order="desc",
        tasks=[task_id],
        output_format="dataframe",
    )
    assert task_plot.text is not None if evals is not None else None
예제 #3
0
def get_layout_from_study(study_id):
    """
    params:
    study_id: study id provided
    outpus:
    scatter plot for runs and studies combined
    """
    items = study.get_study(int(study_id))
    run_ids = items.runs[1:10]
    item = evaluations.list_evaluations('predictive_accuracy', id=run_ids, output_format='dataframe', per_fold=False)
    layout = html.Div([
        dcc.Dropdown(
            id = 'dropdown-study',
            options = [
                {'label':'mean-value', 'value':'0'},
                {'label':'folded', 'value':'1'}
            ],
            value = '0'
        ),
        html.Div(id='scatterplot-study'),
    ], style={"fontFamily": font})
    return layout, item
예제 #4
0
    def update_task_plots(pathname, metric, n_clicks):
        """

        :param pathname: str
            The url pathname which contains task id
        :param metric: str
            Metric for plotting evaluations, ex: accuracy
        :param n_clicks: int
            No of clicks of "Fetch next 1000 runs" button
        :return:
            Interactive graph (Evaluations tab) and leaderboard(People tab)
        """
        n_runs = 100

        # extract task id
        if pathname is not None and '/dashboard/task' in pathname:
            task_id = int(re.search(r'task/(\d+)', pathname).group(1))
        else:
            return html.Div(), html.Div()

        if n_clicks is None:
            n_clicks = 0

        # pickle file which caches previous evaluations
        # df_old may contain 0-1000 evaluations (we cache this)
        # current request may be to include 1000-2000 evaluations (We fetch this)
        try:
            df_old = pd.read_pickle('cache/task' + str(task_id) + '.pkl')
        except OSError:
            df_old = pd.DataFrame()

        df_new = evaluations.list_evaluations(function=metric,
                                              tasks=[int(task_id)],
                                              sort_order="desc",
                                              offset=n_clicks * n_runs,
                                              size=n_runs,
                                              output_format='dataframe')

        if df_new.empty and df_old.empty:
            return html.Div(), html.Div()
        else:
            df = df_old.append(df_new)

        df.to_pickle('cache/task' + str(task_id) + '.pkl')
        run_link = []
        tick_text = []
        truncated = []
        # Plotly hack to add href to each data point
        for run_id in df["run_id"].values:
            link = "<a href=\"https://www.openml.org/r/" + str(
                run_id) + "/\"> "
            run_link.append(link)
        # Plotly hack to link flow names
        for flow_id in df["flow_id"].values:
            link = "<a href=\"https://www.openml.org/f/" + str(
                flow_id) + "/\">"
            tick_text.append(link)
        # Truncate flow names (50 chars)
        for flow in df['flow_name'].values:
            truncated.append(SklearnExtension.trim_flow_name(flow))
            # truncated.append(short[:50] + '..' if len(short) > 50 else short)

        df['flow_name'] = truncated

        # Figure 1 - Evaluations
        data = [
            go.Scatter(
                y=df["flow_name"],
                x=df["value"],
                mode='text+markers',
                text=run_link,
                # hovertext=df["value"].astype(str)+['<br>'] *
                # df.shape[0] + ['click for more info'] * df.shape[0],
                # hoverinfo='text',
                # hoveron = 'points+fills',
                hoverlabel=dict(bgcolor="white",
                                bordercolor="black",
                                namelength=-1),
                marker=dict(
                    opacity=0.5,
                    symbol='diamond',
                    color=df["run_id"],  # set color equal to a variable
                    colorscale='RdBu',
                ))
        ]
        layout = go.Layout(
            autosize=False,
            margin={'l': 400},
            height=500 + 15 * (df['flow_name'].nunique()),
            title='Every point is a run, click for details <br>'
            'Every y label is a flow, click for details <br>'
            'Top ' + str(n_runs) + ' runs shown<br>',
            font=dict(size=11),
            width=1000,
            # hovermode='x',
            xaxis=go.layout.XAxis(side='top'),
            yaxis=go.layout.YAxis(autorange="reversed",
                                  ticktext=tick_text + df["flow_name"],
                                  tickvals=df["flow_name"]))
        fig = go.Figure(data, layout)

        # Figure 2 People
        tick_text = []
        run_link = []
        for run_id in df["run_id"].values:
            link = "<a href=\"https://www.openml.org/r/" + str(
                run_id) + "/\"> "
            run_link.append(link)

        for flow_id in df["flow_id"].values:
            link = "<a href=\"https://www.openml.org/f/" + str(
                flow_id) + "/\">"
            tick_text.append(link)

        df['upload_time'] = pd.to_datetime(df['upload_time'])
        df['upload_time'] = df['upload_time'].dt.date

        data = [
            go.Scatter(
                y=df["value"],
                x=df["upload_time"],
                mode='text+markers',
                text=run_link,
                hovertext=df["uploader_name"],
                hoverlabel=dict(bgcolor="white", bordercolor="black"),
                marker=dict(
                    opacity=0.5,
                    symbol='diamond',
                    color=df["uploader"],  # set color equal to a variable
                    colorscale='Rainbow',
                ))
        ]
        layout = go.Layout(
            title='Contributions over time,<br>every point is a run, '
            'click for details',
            autosize=True,
            margin={'l': 100},
            hovermode='y',
            font=dict(size=11),
            xaxis=go.layout.XAxis(showgrid=False),
            yaxis=go.layout.YAxis(
                showgrid=True,
                title=go.layout.yaxis.Title(text=str(metric)),
                ticktext=tick_text + df["flow_name"],
                showticklabels=True))
        fig1 = go.Figure(data, layout)

        # Leaderboard table

        top_uploader = df.sort_values('value', ascending=False).groupby(
            ['uploader_name'], sort=False)
        name = top_uploader['uploader_name'].unique()
        rank = list(range(1, len(name) + 1))
        entries = top_uploader['uploader_name'].value_counts().values
        leaderboard = pd.DataFrame({
            'Rank': rank,
            'Name': name,
            'Entries': entries
        }).reset_index()
        leaderboard.drop('Name', axis=1, inplace=True)
        ranks = []
        df = top_uploader.head(df.shape[1])
        for uploader in df['uploader_name']:
            ranks.append(leaderboard[leaderboard['uploader_name'] ==
                                     uploader].Rank.values[0])
        df['Rank'] = ranks

        # Sort by time
        df.sort_values(by=['upload_time'], inplace=True)
        # Get highest score
        leaderboard = get_highest_rank(df, leaderboard)

        # Create table
        table = html.Div(
            dt.DataTable(data=leaderboard.to_dict('records'),
                         columns=[{
                             "name": i,
                             "id": i
                         } for i in leaderboard.columns],
                         sort_action="native",
                         row_deletable=False,
                         style_cell={
                             'textAlign': 'left',
                             'backgroundColor': 'white',
                             'minWidth': '100px',
                             'width': '150px',
                             'maxWidth': '300px',
                             "fontFamily": font,
                             'textOverflow': 'ellipsis',
                             "fontSize": 14
                         },
                         style_header={
                             'backgroundColor': 'white',
                             'fontWeight': 'bold'
                         },
                         selected_rows=[0],
                         id='tasktable'), )
        dummy_fig = html.Div(dcc.Graph(figure=fig), style={'display': 'none'})
        eval_div = html.Div(dcc.Graph(figure=fig))
        return dummy_fig, eval_div, html.Div(
            [dcc.Graph(figure=fig1),
             html.Div('Leaderboard'), table])