Ejemplo n.º 1
0
def test_all_tasks(dash_br):
    df = tasks.list_tasks(output_format='dataframe')
    ids = []
    for id in df['task_id'].values:
        dash_br.server_url = BASE_URL + 'task/' + str(id)
        time.sleep(5)
        if dash_br.get_logs() != []:
            ids.append(id)
    np.save('task_ids.npy', np.asarray(ids))
Ejemplo n.º 2
0
    def get_task_overview(size):
        """

        :return: Overview page for all tasks on openml
        """
        df = tasks.list_tasks(output_format="dataframe")
        title = ["Task types on OpenML", "Estimation procedure used across tasks"]

        # 1. Task type
        grouped = df.groupby("task_type").size().reset_index(name="counts")
        colors = ["gold", "mediumturquoise", "darkorange", "lightgreen"]
        types_chart = go.Pie(
            labels=grouped["task_type"],
            values=grouped["counts"],
            marker=dict(colors=colors),
            showlegend=True,
        )
        fig1 = go.Figure(data=[types_chart])
        fig1.update_layout(height=400)

        # 2. estimation procedure
        grouped = df.groupby("estimation_procedure").size().reset_index(name="counts")
        grouped = grouped.sort_values(by="counts", ascending=False)
        data = go.Bar(
            x=grouped["counts"],
            y=grouped["estimation_procedure"],
            orientation="h",
            marker_color="#330C73",
            showlegend=False,
        )

        fig2 = go.Figure(data=data)
        fig2.update_layout(bargap=0.4, width=900, height=400)
        fig2.update_xaxes(tickfont=dict(size=10), categoryorder="total descending")
        return (
            html.Div(
                [
                    html.P(title[0]),
                    dcc.Graph(figure=fig1, id="task_type"),
                    html.P(title[1]),
                    dcc.Graph(figure=fig2, id="ep"),
                ]
            ),
            "done",
        )
Ejemplo n.º 3
0
def get_task_overview():
    """

    :return: Overview page for all tasks on openml
    """
    df = tasks.list_tasks(output_format='dataframe')
    cols = ["task_type", "estimation_procedure"]
    title = ["Types of tasks on OpenML", "Estimation procedure across tasks"]

    fig = plotly.subplots.make_subplots(rows=2,
                                        cols=1,
                                        subplot_titles=tuple(title))
    i = 0
    for col in cols:
        i = i + 1
        fig.add_trace(go.Histogram(x=df[col], showlegend=False), row=i, col=1)
    fig.update_layout(height=1000)

    return html.Div(dcc.Graph(figure=fig))
Ejemplo n.º 4
0
    def get_task_overview(size):
        """

        :return: Overview page for all tasks on openml
        """
        df = tasks.list_tasks(output_format='dataframe')
        title = [
            'Task types on OpenML', 'Estimation procedure used across tasks'
        ]

        # 1. Task type
        grouped = (df.groupby("task_type").size().reset_index(name='counts'))
        colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']
        types_chart = go.Pie(labels=grouped["task_type"],
                             values=grouped['counts'],
                             marker=dict(colors=colors),
                             showlegend=True)
        fig1 = go.Figure(data=[types_chart])
        fig1.update_layout(height=400)

        # 2. estimation procedure
        grouped = (df.groupby("estimation_procedure").size().reset_index(
            name='counts'))
        grouped = grouped.sort_values(by='counts', ascending=False)
        data = go.Bar(x=grouped['counts'],
                      y=grouped['estimation_procedure'],
                      orientation='h',
                      marker_color='#330C73',
                      showlegend=False)

        fig2 = go.Figure(data=data)
        fig2.update_layout(bargap=0.4, width=900, height=400)
        fig2.update_xaxes(tickfont=dict(size=10),
                          categoryorder='total descending')
        return html.Div([
            html.P(title[0]),
            dcc.Graph(figure=fig1, id='task_type'),
            html.P(title[1]),
            dcc.Graph(figure=fig2, id='ep')
        ]), "done"
Ejemplo n.º 5
0
    def update_flow_plots(pathname, metric, tasktype, parameter):
        """

        :param pathname: url path
        :param metric: dropdown to choose function/metric
        :param tasktype:drop down to choose task type
        :param parameter: dropdown to choose parameter
        :return:
        """

        if pathname is not None and '/dashboard/flow' in pathname:
            flow_id = int(re.search('flow/(\d+)', pathname).group(1))
        else:
            return []

        # Get all tasks of selected task type
        task_types = [
            "Supervised classification", "Supervised regression",
            "Learning curve", "Supervised data stream classification",
            "Clustering", "Machine Learning Challenge", "Survival Analysis",
            "Subgroup Discovery"
        ]
        tlist = tasks.list_tasks(task_type_id=task_types.index(tasktype) + 1)
        task_id = [value['tid'] for key, value in tlist.items()]
        # Get all evaluations of selected metric and flow
        df = evaluations.list_evaluations_setups(function=metric,
                                                 flow=[flow_id],
                                                 sort_order='desc',
                                                 size=10000,
                                                 output_format='dataframe')
        if df.empty:
            return go.Figure()

        # Filter type of task
        df = df[df['task_id'].isin(task_id)]
        run_link = []
        tick_text = []
        # Set clickable labels
        for run_id in df["run_id"].values:
            link = "<a href=\"https://www.openml.org/r/" + str(
                run_id) + "/\"> "
            run_link.append(link)

        for data_id in df["data_id"].values:
            link = "<a href=\"https://www.openml.org/d/" + str(
                data_id) + "/\">"
            tick_text.append(link)
        hover_text = []
        if parameter == 'None':
            color = [1] * len(df['data_name'])
            hover_text = df["value"]
            marker = dict(
                opacity=0.8,
                symbol='diamond',
                color=color,  # set color equal to a variable
                colorscale='Jet')
        else:
            color = []
            for param_dict in df.parameters:
                values = [
                    value for key, value in param_dict.items()
                    if parameter == key
                ]

                if not values:
                    color.append('0')
                else:
                    color.append(values[0])
                    hover_text.append(values[0])
            if color[0].isdigit():
                color = list(map(int, color))
            else:
                color = pd.DataFrame(color)[0].astype('category').cat.codes
            marker = dict(
                opacity=0.8,
                symbol='diamond',
                color=color,  # set color equal to a variable
                colorscale='Jet',
                colorbar=dict(title='Colorbar'))
        data = [
            go.Scatter(x=df["value"],
                       y=df["data_name"],
                       mode='text+markers',
                       text=run_link,
                       hovertext=hover_text,
                       hoverlabel=dict(bgcolor="white", bordercolor="black"),
                       marker=marker)
        ]
        layout = go.Layout(hovermode='closest',
                           title='Every point is a run, click for details <br>'
                           'Every y label is a dataset, click for details',
                           autosize=False,
                           width=1000,
                           height=500 + 15 * df['data_name'].nunique(),
                           xaxis=go.layout.XAxis(showgrid=False),
                           yaxis=go.layout.YAxis(showgrid=True,
                                                 ticktext=tick_text +
                                                 df["data_name"],
                                                 tickvals=df["data_name"],
                                                 showticklabels=True))
        fig = go.Figure(data, layout)
        return fig
Ejemplo n.º 6
0
    def update_flow_plots(pathname, metric, tasktype, parameter):
        """

        :param pathname: url path
        :param metric: dropdown to choose function/metric
        :param tasktype:drop down to choose task type
        :param parameter: dropdown to choose parameter
        :return:
        """

        if pathname is not None and "/dashboard/flow" in pathname:
            flow_id = int(re.search(r"flow/(\d+)", pathname).group(1))
        else:
            return []

        # Get all tasks of selected task type
        task_type_enum = [
            TaskType.SUPERVISED_CLASSIFICATION,
            TaskType.SUPERVISED_REGRESSION,
            TaskType.LEARNING_CURVE,
            TaskType.SUPERVISED_DATASTREAM_CLASSIFICATION,
            TaskType.SUPERVISED_DATASTREAM_CLASSIFICATION,
            TaskType.CLUSTERING,
            TaskType.MACHINE_LEARNING_CHALLENGE,
            TaskType.SURVIVAL_ANALYSIS,
            TaskType.SUBGROUP_DISCOVERY,
        ]
        task_types = [
            "Supervised classification",
            "Supervised regression",
            "Learning curve",
            "Supervised data stream classification",
            "Clustering",
            "Machine Learning Challenge",
            "Survival Analysis",
            "Subgroup Discovery",
        ]
        t_list = tasks.list_tasks(
            task_type=task_type_enum[task_types.index(tasktype)])
        task_id = [value["tid"] for key, value in t_list.items()]

        # Get all evaluations of selected metric and flow
        import time

        start = time.time()
        df = evaluations.list_evaluations_setups(
            function=metric,
            flows=[flow_id],
            size=1000,
            output_format="dataframe",
            sort_order="desc",
        )
        end = time.time()
        print("list flow evals took ", end - start, " sec")
        if df.empty:
            return go.Figure()

        # Filter type of task
        df = df[df["task_id"].isin(task_id)]
        run_link = []
        tick_text = []
        # Set clickable labels
        for run_id in df["run_id"].values:
            link = '<a href="https://www.openml.org/r/' + str(run_id) + '/"> '
            run_link.append(link)

        for data_id in df["data_id"].values:
            link = '<a href="https://www.openml.org/d/' + str(data_id) + '/">'
            tick_text.append(link)
        hover_text = []
        if parameter == "None":
            color = [1] * len(df["data_name"])
            hover_text = df["value"]
            marker = dict(
                opacity=0.8,
                symbol="diamond",
                color=color,  # set color equal to a variable
                colorscale="Jet",
            )
        else:
            color = []
            for param_dict in df.parameters:
                values = [
                    value for key, value in param_dict.items()
                    if parameter == key
                ]

                if not values:
                    color.append("0")
                else:
                    color.append(values[0])
                    hover_text.append(values[0])
            if color[0].isdigit():
                color = list(map(int, color))
            else:
                color = pd.DataFrame(color)[0].astype("category").cat.codes
            marker = dict(
                opacity=0.8,
                symbol="diamond",
                color=color,  # set color equal to a variable
                colorscale="Jet",
                colorbar=dict(title="Colorbar"),
            )
        data = [
            go.Scatter(
                x=df["value"],
                y=df["data_name"],
                mode="text+markers",
                text=run_link,
                hovertext=hover_text,
                hoverlabel=dict(bgcolor="white", bordercolor="black"),
                marker=marker,
            )
        ]
        layout = go.Layout(
            hovermode="closest",
            title="Every point is a run, click for details <br>"
            "Every y label is a dataset, click for details <br>"
            "Top 1000 runs shown",
            font=dict(size=11),
            autosize=True,
            height=500 + 15 * df["data_name"].nunique(),
            xaxis=go.layout.XAxis(showgrid=False),
            yaxis=go.layout.YAxis(
                showgrid=True,
                ticktext=tick_text + df["data_name"],
                tickvals=df["data_name"],
                showticklabels=True,
            ),
        )
        fig = go.Figure(data, layout)
        return fig