def plot_graph_kpi(xvars, yvars, secondary_yvars, user_id, dataset_choice): """ Plot the graph according to user choices. Args: xvars (str): `x-axis` of the graph. yvars (str or list(str)): `y-axis`, can be multiple. secondary_yvars: `bar-chart` variable. user_id (str): Session/user id. dataset_choice (str): Name of dataset. Returns: dict: A dictionary holding a plotly figure including layout. """ df = get_data(dataset_choice, user_id) if any(x is None for x in [xvars, yvars, secondary_yvars, df, dataset_choice]): return {} # baseline graph traces = kpis.baseline_graph(df, xvars, yvars, secondary_yvars) return {'data': traces, 'layout': layouts.default_2d(xvars, yvars[0])}
def plot_graph_kpi(xvars, yvars, secondary_yvars, dataset_choice): """ Plot the graph according to user choices. Args: xvars (str): `x-axis` of the graph. yvars (str or list(str)): `y-axis`, can be multiple. secondary_yvars: `bar-chart` variable. dataset_choice (str): Name of dataset. Returns: dict: A dictionary holding a plotly figure including layout. """ # Conditions necessary to do any plotting conditions = [xvars, yvars, secondary_yvars, dataset_choice] if any(var is None for var in conditions): return {} df = dill.loads(redis_conn.get(dataset_choice)) # baseline graph traces = kpis.baseline_graph(df, xvars, yvars, secondary_yvars) return { 'data': traces, 'layout': layouts.default_2d(xvars, yvars[0]) }
def fit_clustering_model(xvars, yvars, n_clusters, algo_choice_clustering, user_id, dataset_choice): """ Take user choices and, if all are present, fit the appropriate model. Args: xvars (list(str)): predictor variables. yvars (str): target variable; not needed. algo_choice_clustering (str): The choice of algorithm type. user_id: Session/user id. dataset_choice: Name of dataset. Returns: list, dict: Dash element(s) with the results of model fitting, and parameters for plotting a graph. """ df = get_data(dataset_choice, user_id) ## Make sure all variables have a value before fitting if any(x is None for x in [xvars, df, dataset_choice, algo_choice_clustering]): raise PreventUpdate() # TODO: Make this interface cleaner # We have the dictionary that maps keys to models so use that if algo_choice_clustering == "kmc": model = mapping[algo_choice_clustering](n_clusters=n_clusters) else: model = mapping[algo_choice_clustering]() model.fit(df[xvars]) # TODO: Find a meaningful way (metric) to notify the user of model score. try: layout = [[ html.H4(f"Clustering model scored: {model.score(df[xvars])}") ]] except AttributeError: # model without a score function layout = [[html.H4(f"No score for this method.")]] labels = model.labels_ # TODO: If Y is given, visualize the (in)correctly grouped points. # If we have >=2 variables, visualize the clusters if len(xvars) >= 3: trace1 = go.Scatter3d(x=df[xvars[0]], y=df[xvars[1]], z=df[xvars[2]], showlegend=False, mode='markers', marker=dict(color=labels.astype(np.float), line={ 'color': 'black', 'width': 1 })) layout += [{ 'data': [trace1], 'layout': layouts.default_2d(xvars[0], xvars[1]) }] elif len(xvars) == 2: trace = scatterplot(df[xvars[0]], df[xvars[1]], marker={'color': labels.astype(np.float)}) layout += [{ 'data': [trace], 'layout': layouts.default_2d(xvars[0], xvars[1]) }] else: layout += [{}] return layout
def plot_graph_2d(xvars, yvars, graph_choice_exploration, user_id, dataset_choice): """ Plot the graph according to user choices. Args: xvars (str): `x-axis` of the graph. yvars (str or list(str)): `y-axis`, can be multiple depending \ on graph type. graph_choice_exploration (str): The choice of graph type. user_id (str): Session/user id. dataset_choice (str): Name of dataset. Returns: [dict, bool]: A dictionary holding a plotly figure including \ layout and a boolean to indicate whether a Y \ variable is needed. """ df = get_data(dataset_choice, user_id) # Make sure all variables have a value before moving further test_conditions = [xvars, df, dataset_choice, graph_choice_exploration] if any(x is None for x in test_conditions): return {} needs_yvar, allows_multi = graphs2d.graph_configs[graph_choice_exploration] # Also, if we needs_yvar and they are empty, return. if needs_yvar and yvars is None: return {} # Fix bugs occurring due to Dash not ordering callbacks if not allows_multi and isinstance(yvars, list): yvars = yvars[0] elif allows_multi and isinstance(yvars, str): yvars = [yvars] # Graph choices if graph_choice_exploration == 'line_chart': traces = [ graphs2d.line_chart(df[xvars], df[yvar], name=yvar) for yvar in yvars ] elif graph_choice_exploration == 'scatterplot': traces = [ graphs2d.scatterplot(df[xvars], df[yvar], name=yvar) for yvar in yvars ] elif graph_choice_exploration == 'histogram': traces = [graphs2d.histogram(df[xvars])] elif graph_choice_exploration == 'heatmap': traces = [graphs2d.heatmap(df[xvars], df[yvars])] elif graph_choice_exploration == 'bubble_chart': size = [20, 40, 60, 80, 100, 80, 60, 40, 20, 40] traces = [ graphs2d.bubble_chart(df[xvars], df[yvar], size, name=yvar) for yvar in yvars ] elif graph_choice_exploration == 'pie': vals = df.groupby(xvars).count().iloc[:, 0] labels = df[xvars].unique() traces = [go.Pie(labels=labels, values=vals)] elif graph_choice_exploration == 'filledarea': traces = [ graphs2d.filledarea(df[xvars], df[yvar], name=yvar) for yvar in yvars ] elif graph_choice_exploration == 'errorbar': traces = [ graphs2d.errorbar(df[xvars], df[yvar], name=yvar) for yvar in yvars ] elif graph_choice_exploration == 'density2d': traces = graphs2d.density2d(df[xvars], df[yvars], name=yvars) elif graph_choice_exploration == 'pairplot': # We need more than 1 variable for a pairplot if len(yvars) >= 1: # This returns a whole figure, not a trace return graphs2d.pairplot(df[[xvars] + yvars]) else: traces = [] else: traces = [] return { 'data': traces, 'layout': layouts.default_2d(xvars, ""), }
def fit_model(xvars, yvars, algo_choice, dataset_choice, problem_type): """ Take user choices and, if all are present, fit the appropriate model. \ The results of fitting are given to hidden divs. When the user uses \ the tab menu then the appropriate menu is rendered. Args: xvars (list(str)): predictor variables. yvars (str): target variable. algo_choice (str): The choice of algorithm type. dataset_choice (str): Name of the dataset. problem_type (str): The type of learning problem. Returns: list, dict: Dash element(s) with the results of model fitting, and parameters for plotting a graph. """ df = dill.loads(redis_conn.get(dataset_choice)) # Make sure all variables have a value before fitting if any(x is None for x in [xvars, yvars, df, dataset_choice, algo_choice]): raise PreventUpdate() # The inverse mapping of ml_options, use it to get the sklearn model model = node_options[algo_choice]["model_class"]() # TODO: This probably needs a better/cleaner implementation and/or # might need to be used in other parts as well. y = pd.factorize(df[yvars]) model.fit(df[xvars], y[0]) predictions = model.predict(df[xvars]) score = model.score(df[xvars], y[0]) metrics = [] if problem_type == "regression": metrics.append(html.H4(f"Mean Squared Error: {score:.3f}")) elif problem_type == "classification": metrics.append(html.H4(f"Accuracy: {100*score:.3f} %")) metrics.append(html.H4("Confusion matrix:")) classes = df[yvars].unique() confusion = confusion_matrix(y[0], predictions) metrics.append(html.Table([ html.Thead([html.Th(cls) for cls in classes]), html.Tbody([ html.Tr([html.Td(item) for item in row]) for row in confusion ]) ])) else: metrics.append("Not implemented") labels = model.predict(df[xvars]) # TODO: Visualize the (in)correctly grouped points. # If we have >=2 variables, visualize the classification if len(xvars) >= 3: trace1 = go.Scatter3d(x=df[xvars[0]], y=df[xvars[1]], z=df[xvars[2]], showlegend=False, mode='markers', marker={ 'color': labels.astype(np.float), 'line': dict(color='black', width=1) }) figure = { 'data': [trace1], 'layout': layouts.default_2d(xvars[0], yvars[0]) } elif len(xvars) == 2: traces = scatterplot(df[xvars[0]], df[xvars[1]], marker={'color': labels.astype(np.float)}) figure = { 'data': [traces], 'layout': go.Layout( xaxis={'title': xvars[0]}, yaxis={'title': yvars[0]}, legend={'x': 0, 'y': 1}, hovermode='closest' ) } else: figure = {} return metrics, figure
def fit_classification_model(xvars, yvars, algo_choice_classification, user_id, dataset_choice): """ Take user choices and, if all are present, fit the appropriate model. Args: xvars (list(str)): predictor variables. yvars (str): target variable. algo_choice_classification (str): The choice of algorithm type. user_id: Session/user id. dataset_choice: Name of dataset. Returns: list, dict: Dash element(s) with the results of model fitting, and parameters for plotting a graph. """ df = get_data(dataset_choice, user_id) ## Make sure all variables have a value before fitting if any(x is None for x in [xvars, yvars, df, dataset_choice, algo_choice_classification]): raise PreventUpdate() # We have the dictionary that maps keys to models so use that model = mapping[algo_choice_classification]() # TODO: This probably needs a better/cleaner implementation and/or # might need to be used in other parts as well. y = pd.factorize(df[yvars]) model.fit(df[xvars], y[0]) layout = [ html.H4(f"Classification model scored: {model.score(df[xvars], y[0])}") ] labels = model.predict(df[xvars]) # TODO: Visualize the (in)correctly grouped points. # If we have >=2 variables, visualize the classification if len(xvars) >= 3: trace1 = go.Scatter3d(x=df[xvars[0]], y=df[xvars[1]], z=df[xvars[2]], showlegend=False, mode='markers', marker={ 'color': labels.astype(np.float), 'line': dict(color='black', width=1) }) layout += [{ 'data': [trace1], 'layout': layouts.default_2d(xvars[0], yvars[0]) }] elif len(xvars) == 2: traces = scatterplot(df[xvars[0]], df[xvars[1]], marker={'color': labels.astype(np.float)}) layout += [{ 'data': [traces], 'layout': layouts.default_2d(xvars[0], yvars) }] else: layout += [{}] return layout