def test_all_tasks(dash_br): df = tasks.list_tasks(output_format='dataframe') ids = [] for id in df['task_id'].values: dash_br.server_url = BASE_URL + 'task/' + str(id) time.sleep(5) if dash_br.get_logs() != []: ids.append(id) np.save('task_ids.npy', np.asarray(ids))
def get_task_overview(size): """ :return: Overview page for all tasks on openml """ df = tasks.list_tasks(output_format="dataframe") title = ["Task types on OpenML", "Estimation procedure used across tasks"] # 1. Task type grouped = df.groupby("task_type").size().reset_index(name="counts") colors = ["gold", "mediumturquoise", "darkorange", "lightgreen"] types_chart = go.Pie( labels=grouped["task_type"], values=grouped["counts"], marker=dict(colors=colors), showlegend=True, ) fig1 = go.Figure(data=[types_chart]) fig1.update_layout(height=400) # 2. estimation procedure grouped = df.groupby("estimation_procedure").size().reset_index(name="counts") grouped = grouped.sort_values(by="counts", ascending=False) data = go.Bar( x=grouped["counts"], y=grouped["estimation_procedure"], orientation="h", marker_color="#330C73", showlegend=False, ) fig2 = go.Figure(data=data) fig2.update_layout(bargap=0.4, width=900, height=400) fig2.update_xaxes(tickfont=dict(size=10), categoryorder="total descending") return ( html.Div( [ html.P(title[0]), dcc.Graph(figure=fig1, id="task_type"), html.P(title[1]), dcc.Graph(figure=fig2, id="ep"), ] ), "done", )
def get_task_overview(): """ :return: Overview page for all tasks on openml """ df = tasks.list_tasks(output_format='dataframe') cols = ["task_type", "estimation_procedure"] title = ["Types of tasks on OpenML", "Estimation procedure across tasks"] fig = plotly.subplots.make_subplots(rows=2, cols=1, subplot_titles=tuple(title)) i = 0 for col in cols: i = i + 1 fig.add_trace(go.Histogram(x=df[col], showlegend=False), row=i, col=1) fig.update_layout(height=1000) return html.Div(dcc.Graph(figure=fig))
def get_task_overview(size): """ :return: Overview page for all tasks on openml """ df = tasks.list_tasks(output_format='dataframe') title = [ 'Task types on OpenML', 'Estimation procedure used across tasks' ] # 1. Task type grouped = (df.groupby("task_type").size().reset_index(name='counts')) colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen'] types_chart = go.Pie(labels=grouped["task_type"], values=grouped['counts'], marker=dict(colors=colors), showlegend=True) fig1 = go.Figure(data=[types_chart]) fig1.update_layout(height=400) # 2. estimation procedure grouped = (df.groupby("estimation_procedure").size().reset_index( name='counts')) grouped = grouped.sort_values(by='counts', ascending=False) data = go.Bar(x=grouped['counts'], y=grouped['estimation_procedure'], orientation='h', marker_color='#330C73', showlegend=False) fig2 = go.Figure(data=data) fig2.update_layout(bargap=0.4, width=900, height=400) fig2.update_xaxes(tickfont=dict(size=10), categoryorder='total descending') return html.Div([ html.P(title[0]), dcc.Graph(figure=fig1, id='task_type'), html.P(title[1]), dcc.Graph(figure=fig2, id='ep') ]), "done"
def update_flow_plots(pathname, metric, tasktype, parameter): """ :param pathname: url path :param metric: dropdown to choose function/metric :param tasktype:drop down to choose task type :param parameter: dropdown to choose parameter :return: """ if pathname is not None and '/dashboard/flow' in pathname: flow_id = int(re.search('flow/(\d+)', pathname).group(1)) else: return [] # Get all tasks of selected task type task_types = [ "Supervised classification", "Supervised regression", "Learning curve", "Supervised data stream classification", "Clustering", "Machine Learning Challenge", "Survival Analysis", "Subgroup Discovery" ] tlist = tasks.list_tasks(task_type_id=task_types.index(tasktype) + 1) task_id = [value['tid'] for key, value in tlist.items()] # Get all evaluations of selected metric and flow df = evaluations.list_evaluations_setups(function=metric, flow=[flow_id], sort_order='desc', size=10000, output_format='dataframe') if df.empty: return go.Figure() # Filter type of task df = df[df['task_id'].isin(task_id)] run_link = [] tick_text = [] # Set clickable labels for run_id in df["run_id"].values: link = "<a href=\"https://www.openml.org/r/" + str( run_id) + "/\"> " run_link.append(link) for data_id in df["data_id"].values: link = "<a href=\"https://www.openml.org/d/" + str( data_id) + "/\">" tick_text.append(link) hover_text = [] if parameter == 'None': color = [1] * len(df['data_name']) hover_text = df["value"] marker = dict( opacity=0.8, symbol='diamond', color=color, # set color equal to a variable colorscale='Jet') else: color = [] for param_dict in df.parameters: values = [ value for key, value in param_dict.items() if parameter == key ] if not values: color.append('0') else: color.append(values[0]) hover_text.append(values[0]) if color[0].isdigit(): color = list(map(int, color)) else: color = pd.DataFrame(color)[0].astype('category').cat.codes marker = dict( opacity=0.8, symbol='diamond', color=color, # set color equal to a variable colorscale='Jet', colorbar=dict(title='Colorbar')) data = [ go.Scatter(x=df["value"], y=df["data_name"], mode='text+markers', text=run_link, hovertext=hover_text, hoverlabel=dict(bgcolor="white", bordercolor="black"), marker=marker) ] layout = go.Layout(hovermode='closest', title='Every point is a run, click for details <br>' 'Every y label is a dataset, click for details', autosize=False, width=1000, height=500 + 15 * df['data_name'].nunique(), xaxis=go.layout.XAxis(showgrid=False), yaxis=go.layout.YAxis(showgrid=True, ticktext=tick_text + df["data_name"], tickvals=df["data_name"], showticklabels=True)) fig = go.Figure(data, layout) return fig
def update_flow_plots(pathname, metric, tasktype, parameter): """ :param pathname: url path :param metric: dropdown to choose function/metric :param tasktype:drop down to choose task type :param parameter: dropdown to choose parameter :return: """ if pathname is not None and "/dashboard/flow" in pathname: flow_id = int(re.search(r"flow/(\d+)", pathname).group(1)) else: return [] # Get all tasks of selected task type task_type_enum = [ TaskType.SUPERVISED_CLASSIFICATION, TaskType.SUPERVISED_REGRESSION, TaskType.LEARNING_CURVE, TaskType.SUPERVISED_DATASTREAM_CLASSIFICATION, TaskType.SUPERVISED_DATASTREAM_CLASSIFICATION, TaskType.CLUSTERING, TaskType.MACHINE_LEARNING_CHALLENGE, TaskType.SURVIVAL_ANALYSIS, TaskType.SUBGROUP_DISCOVERY, ] task_types = [ "Supervised classification", "Supervised regression", "Learning curve", "Supervised data stream classification", "Clustering", "Machine Learning Challenge", "Survival Analysis", "Subgroup Discovery", ] t_list = tasks.list_tasks( task_type=task_type_enum[task_types.index(tasktype)]) task_id = [value["tid"] for key, value in t_list.items()] # Get all evaluations of selected metric and flow import time start = time.time() df = evaluations.list_evaluations_setups( function=metric, flows=[flow_id], size=1000, output_format="dataframe", sort_order="desc", ) end = time.time() print("list flow evals took ", end - start, " sec") if df.empty: return go.Figure() # Filter type of task df = df[df["task_id"].isin(task_id)] run_link = [] tick_text = [] # Set clickable labels for run_id in df["run_id"].values: link = '<a href="https://www.openml.org/r/' + str(run_id) + '/"> ' run_link.append(link) for data_id in df["data_id"].values: link = '<a href="https://www.openml.org/d/' + str(data_id) + '/">' tick_text.append(link) hover_text = [] if parameter == "None": color = [1] * len(df["data_name"]) hover_text = df["value"] marker = dict( opacity=0.8, symbol="diamond", color=color, # set color equal to a variable colorscale="Jet", ) else: color = [] for param_dict in df.parameters: values = [ value for key, value in param_dict.items() if parameter == key ] if not values: color.append("0") else: color.append(values[0]) hover_text.append(values[0]) if color[0].isdigit(): color = list(map(int, color)) else: color = pd.DataFrame(color)[0].astype("category").cat.codes marker = dict( opacity=0.8, symbol="diamond", color=color, # set color equal to a variable colorscale="Jet", colorbar=dict(title="Colorbar"), ) data = [ go.Scatter( x=df["value"], y=df["data_name"], mode="text+markers", text=run_link, hovertext=hover_text, hoverlabel=dict(bgcolor="white", bordercolor="black"), marker=marker, ) ] layout = go.Layout( hovermode="closest", title="Every point is a run, click for details <br>" "Every y label is a dataset, click for details <br>" "Top 1000 runs shown", font=dict(size=11), autosize=True, height=500 + 15 * df["data_name"].nunique(), xaxis=go.layout.XAxis(showgrid=False), yaxis=go.layout.YAxis( showgrid=True, ticktext=tick_text + df["data_name"], tickvals=df["data_name"], showticklabels=True, ), ) fig = go.Figure(data, layout) return fig