def get_flow_overview(): """ :return: overview page for flows """ df = flows.list_flows(output_format='dataframe') count = pd.DataFrame(df["name"].value_counts()).reset_index() count.columns = ["name", "count"] count = count[0:1000] short = [] for name in count["name"]: try: short.append(SklearnExtension.trim_flow_name(name)) except: pass count["name"] = short fig = go.Figure(data=[ go.Bar(y=count["name"].values, x=count["count"].values, marker=dict(color='blue', opacity=0.8), orientation="h") ]) fig.update_layout(yaxis=dict(autorange="reversed"), margin=dict(l=500), title="", height=700), return html.Div(dcc.Graph(figure=fig))
def test__is_onnx_flow(self): self.sklearn_dummy_model = pipeline.Pipeline( steps=[ ('imputer', Imputer()), ('estimator', tree.DecisionTreeClassifier()) ] ) self.onnx_flow_external_version = self.extension._is_onnx_flow(self.flow) self.sklearn_flow = SklearnExtension().model_to_flow(self.sklearn_dummy_model) self.sklearn_flow_external_version = self.extension._is_onnx_flow(self.sklearn_flow) self.assertTrue(self.onnx_flow_external_version) self.assertFalse(self.sklearn_flow_external_version)
def test__is_mxnet_flow(self): self.sklearn_dummy_model = pipeline.Pipeline( steps=[('imputer', Imputer()), ('estimator', tree.DecisionTreeClassifier())]) # Convert MXNet dummy model to flow self.mxnet_flow = self.extension.model_to_flow(self.mxnet_dummy_model) self.mxnet_flow_bool = self.extension._is_mxnet_flow(self.mxnet_flow) # Convert sklearn dummy model to flow self.sklearn_flow = SklearnExtension().model_to_flow( self.sklearn_dummy_model) self.sklearn_flow_bool = self.extension._is_mxnet_flow( self.sklearn_flow) # Check whether the MXNet flow is correctly recognized self.assertTrue(self.mxnet_flow_bool) # Test that the Sklearn flow is not an MXNet flow self.assertFalse(self.sklearn_flow_bool)
def update_task_plots(pathname, metric, n_clicks): """ :param pathname: str The url pathname which contains task id :param metric: str Metric for plotting evaluations, ex: accuracy :param n_clicks: int No of clicks of "Fetch next 1000 runs" button :return: Interactive graph (Evaluations tab) and leaderboard(People tab) """ n_runs = 100 # extract task id if pathname is not None and '/dashboard/task' in pathname: task_id = int(re.search(r'task/(\d+)', pathname).group(1)) else: return html.Div(), html.Div() if n_clicks is None: n_clicks = 0 # pickle file which caches previous evaluations # df_old may contain 0-1000 evaluations (we cache this) # current request may be to include 1000-2000 evaluations (We fetch this) try: df_old = pd.read_pickle('cache/task' + str(task_id) + '.pkl') except OSError: df_old = pd.DataFrame() df_new = evaluations.list_evaluations(function=metric, tasks=[int(task_id)], sort_order="desc", offset=n_clicks * n_runs, size=n_runs, output_format='dataframe') if df_new.empty and df_old.empty: return html.Div(), html.Div() else: df = df_old.append(df_new) df.to_pickle('cache/task' + str(task_id) + '.pkl') run_link = [] tick_text = [] truncated = [] # Plotly hack to add href to each data point for run_id in df["run_id"].values: link = "<a href=\"https://www.openml.org/r/" + str( run_id) + "/\"> " run_link.append(link) # Plotly hack to link flow names for flow_id in df["flow_id"].values: link = "<a href=\"https://www.openml.org/f/" + str( flow_id) + "/\">" tick_text.append(link) # Truncate flow names (50 chars) for flow in df['flow_name'].values: truncated.append(SklearnExtension.trim_flow_name(flow)) # truncated.append(short[:50] + '..' if len(short) > 50 else short) df['flow_name'] = truncated # Figure 1 - Evaluations data = [ go.Scatter( y=df["flow_name"], x=df["value"], mode='text+markers', text=run_link, # hovertext=df["value"].astype(str)+['<br>'] * # df.shape[0] + ['click for more info'] * df.shape[0], # hoverinfo='text', # hoveron = 'points+fills', hoverlabel=dict(bgcolor="white", bordercolor="black", namelength=-1), marker=dict( opacity=0.5, symbol='diamond', color=df["run_id"], # set color equal to a variable colorscale='RdBu', )) ] layout = go.Layout( autosize=False, margin={'l': 400}, height=500 + 15 * (df['flow_name'].nunique()), title='Every point is a run, click for details <br>' 'Every y label is a flow, click for details <br>' 'Top ' + str(n_runs) + ' runs shown<br>', font=dict(size=11), width=1000, # hovermode='x', xaxis=go.layout.XAxis(side='top'), yaxis=go.layout.YAxis(autorange="reversed", ticktext=tick_text + df["flow_name"], tickvals=df["flow_name"])) fig = go.Figure(data, layout) # Figure 2 People tick_text = [] run_link = [] for run_id in df["run_id"].values: link = "<a href=\"https://www.openml.org/r/" + str( run_id) + "/\"> " run_link.append(link) for flow_id in df["flow_id"].values: link = "<a href=\"https://www.openml.org/f/" + str( flow_id) + "/\">" tick_text.append(link) df['upload_time'] = pd.to_datetime(df['upload_time']) df['upload_time'] = df['upload_time'].dt.date data = [ go.Scatter( y=df["value"], x=df["upload_time"], mode='text+markers', text=run_link, hovertext=df["uploader_name"], hoverlabel=dict(bgcolor="white", bordercolor="black"), marker=dict( opacity=0.5, symbol='diamond', color=df["uploader"], # set color equal to a variable colorscale='Rainbow', )) ] layout = go.Layout( title='Contributions over time,<br>every point is a run, ' 'click for details', autosize=True, margin={'l': 100}, hovermode='y', font=dict(size=11), xaxis=go.layout.XAxis(showgrid=False), yaxis=go.layout.YAxis( showgrid=True, title=go.layout.yaxis.Title(text=str(metric)), ticktext=tick_text + df["flow_name"], showticklabels=True)) fig1 = go.Figure(data, layout) # Leaderboard table top_uploader = df.sort_values('value', ascending=False).groupby( ['uploader_name'], sort=False) name = top_uploader['uploader_name'].unique() rank = list(range(1, len(name) + 1)) entries = top_uploader['uploader_name'].value_counts().values leaderboard = pd.DataFrame({ 'Rank': rank, 'Name': name, 'Entries': entries }).reset_index() leaderboard.drop('Name', axis=1, inplace=True) ranks = [] df = top_uploader.head(df.shape[1]) for uploader in df['uploader_name']: ranks.append(leaderboard[leaderboard['uploader_name'] == uploader].Rank.values[0]) df['Rank'] = ranks # Sort by time df.sort_values(by=['upload_time'], inplace=True) # Get highest score leaderboard = get_highest_rank(df, leaderboard) # Create table table = html.Div( dt.DataTable(data=leaderboard.to_dict('records'), columns=[{ "name": i, "id": i } for i in leaderboard.columns], sort_action="native", row_deletable=False, style_cell={ 'textAlign': 'left', 'backgroundColor': 'white', 'minWidth': '100px', 'width': '150px', 'maxWidth': '300px', "fontFamily": font, 'textOverflow': 'ellipsis', "fontSize": 14 }, style_header={ 'backgroundColor': 'white', 'fontWeight': 'bold' }, selected_rows=[0], id='tasktable'), ) dummy_fig = html.Div(dcc.Graph(figure=fig), style={'display': 'none'}) eval_div = html.Div(dcc.Graph(figure=fig)) return dummy_fig, eval_div, html.Div( [dcc.Graph(figure=fig1), html.Div('Leaderboard'), table])