Beispiel #1
0
def plot_graph_kpi(xvars, yvars, secondary_yvars, user_id, dataset_choice):
    """
    Plot the graph according to user choices.

    Args:
        xvars (str): `x-axis` of the graph.
        yvars (str or list(str)): `y-axis`, can be multiple.
        secondary_yvars: `bar-chart` variable.
        user_id (str): Session/user id.
        dataset_choice (str): Name of dataset.

    Returns:
        dict: A dictionary holding a plotly figure including layout.
    """

    df = get_data(dataset_choice, user_id)

    if any(x is None
           for x in [xvars, yvars, secondary_yvars, df, dataset_choice]):
        return {}

    # baseline graph
    traces = kpis.baseline_graph(df, xvars, yvars, secondary_yvars)

    return {'data': traces, 'layout': layouts.default_2d(xvars, yvars[0])}
Beispiel #2
0
def plot_graph_kpi(xvars, yvars, secondary_yvars, dataset_choice):
    """
    Plot the graph according to user choices.

    Args:
        xvars (str): `x-axis` of the graph.
        yvars (str or list(str)): `y-axis`, can be multiple.
        secondary_yvars: `bar-chart` variable.
        dataset_choice (str): Name of dataset.

    Returns:
        dict: A dictionary holding a plotly figure including layout.
    """

    # Conditions necessary to do any plotting
    conditions = [xvars, yvars, secondary_yvars, dataset_choice]
    if any(var is None for var in conditions):
        return {}

    df = dill.loads(redis_conn.get(dataset_choice))

    # baseline graph
    traces = kpis.baseline_graph(df, xvars, yvars, secondary_yvars)

    return {
        'data': traces,
        'layout': layouts.default_2d(xvars, yvars[0])
    }
def fit_clustering_model(xvars, yvars, n_clusters, algo_choice_clustering,
                         user_id, dataset_choice):
    """
    Take user choices and, if all are present, fit the appropriate model.

    Args:
        xvars (list(str)): predictor variables.
        yvars (str): target variable; not needed.
        algo_choice_clustering (str): The choice of algorithm type.
        user_id: Session/user id.
        dataset_choice: Name of dataset.

    Returns:
        list, dict: Dash element(s) with the results of model fitting,
                    and parameters for plotting a graph.
    """

    df = get_data(dataset_choice, user_id)

    ## Make sure all variables have a value before fitting
    if any(x is None
           for x in [xvars, df, dataset_choice, algo_choice_clustering]):
        raise PreventUpdate()

    # TODO: Make this interface cleaner
    # We have the dictionary that maps keys to models so use that
    if algo_choice_clustering == "kmc":
        model = mapping[algo_choice_clustering](n_clusters=n_clusters)
    else:
        model = mapping[algo_choice_clustering]()

    model.fit(df[xvars])

    # TODO: Find a meaningful way (metric) to notify the user of model score.
    try:
        layout = [[
            html.H4(f"Clustering model scored: {model.score(df[xvars])}")
        ]]
    except AttributeError:
        # model without a score function
        layout = [[html.H4(f"No score for this method.")]]

    labels = model.labels_
    # TODO: If Y is given, visualize the (in)correctly grouped points.
    # If we have >=2 variables, visualize the clusters
    if len(xvars) >= 3:

        trace1 = go.Scatter3d(x=df[xvars[0]],
                              y=df[xvars[1]],
                              z=df[xvars[2]],
                              showlegend=False,
                              mode='markers',
                              marker=dict(color=labels.astype(np.float),
                                          line={
                                              'color': 'black',
                                              'width': 1
                                          }))

        layout += [{
            'data': [trace1],
            'layout': layouts.default_2d(xvars[0], xvars[1])
        }]

    elif len(xvars) == 2:
        trace = scatterplot(df[xvars[0]],
                            df[xvars[1]],
                            marker={'color': labels.astype(np.float)})

        layout += [{
            'data': [trace],
            'layout': layouts.default_2d(xvars[0], xvars[1])
        }]

    else:
        layout += [{}]

    return layout
Beispiel #4
0
def plot_graph_2d(xvars, yvars, graph_choice_exploration, user_id,
                  dataset_choice):
    """
    Plot the graph according to user choices.

    Args:
        xvars (str): `x-axis` of the graph.
        yvars (str or list(str)): `y-axis`, can be multiple depending \
                                  on graph type.
        graph_choice_exploration (str): The choice of graph type.
        user_id (str): Session/user id.
        dataset_choice (str): Name of dataset.

    Returns:
        [dict, bool]: A dictionary holding a plotly figure including \
                      layout and a boolean to indicate whether a Y \
                      variable is needed.
    """

    df = get_data(dataset_choice, user_id)

    # Make sure all variables have a value before moving further
    test_conditions = [xvars, df, dataset_choice, graph_choice_exploration]
    if any(x is None for x in test_conditions):
        return {}

    needs_yvar, allows_multi = graphs2d.graph_configs[graph_choice_exploration]

    # Also, if we needs_yvar and they are empty, return.
    if needs_yvar and yvars is None:
        return {}

    # Fix bugs occurring due to Dash not ordering callbacks
    if not allows_multi and isinstance(yvars, list):
        yvars = yvars[0]
    elif allows_multi and isinstance(yvars, str):
        yvars = [yvars]

    # Graph choices
    if graph_choice_exploration == 'line_chart':
        traces = [
            graphs2d.line_chart(df[xvars], df[yvar], name=yvar)
            for yvar in yvars
        ]

    elif graph_choice_exploration == 'scatterplot':
        traces = [
            graphs2d.scatterplot(df[xvars], df[yvar], name=yvar)
            for yvar in yvars
        ]

    elif graph_choice_exploration == 'histogram':
        traces = [graphs2d.histogram(df[xvars])]

    elif graph_choice_exploration == 'heatmap':
        traces = [graphs2d.heatmap(df[xvars], df[yvars])]

    elif graph_choice_exploration == 'bubble_chart':
        size = [20, 40, 60, 80, 100, 80, 60, 40, 20, 40]
        traces = [
            graphs2d.bubble_chart(df[xvars], df[yvar], size, name=yvar)
            for yvar in yvars
        ]

    elif graph_choice_exploration == 'pie':

        vals = df.groupby(xvars).count().iloc[:, 0]
        labels = df[xvars].unique()

        traces = [go.Pie(labels=labels, values=vals)]

    elif graph_choice_exploration == 'filledarea':
        traces = [
            graphs2d.filledarea(df[xvars], df[yvar], name=yvar)
            for yvar in yvars
        ]

    elif graph_choice_exploration == 'errorbar':
        traces = [
            graphs2d.errorbar(df[xvars], df[yvar], name=yvar) for yvar in yvars
        ]

    elif graph_choice_exploration == 'density2d':
        traces = graphs2d.density2d(df[xvars], df[yvars], name=yvars)

    elif graph_choice_exploration == 'pairplot':
        # We need more than 1 variable for a pairplot
        if len(yvars) >= 1:
            # This returns a whole figure, not a trace
            return graphs2d.pairplot(df[[xvars] + yvars])
        else:
            traces = []
    else:
        traces = []

    return {
        'data': traces,
        'layout': layouts.default_2d(xvars, ""),
    }
Beispiel #5
0
def fit_model(xvars, yvars, algo_choice, dataset_choice, problem_type):
    """
    Take user choices and, if all are present, fit the appropriate model. \
    The results of fitting are given to hidden divs. When the user uses \
    the tab menu then the appropriate menu is rendered.

    Args:
        xvars (list(str)): predictor variables.
        yvars (str): target variable.
        algo_choice (str): The choice of algorithm type.
        dataset_choice (str): Name of the dataset.
        problem_type (str): The type of learning problem.

    Returns:
        list, dict: Dash element(s) with the results of model fitting,
                    and parameters for plotting a graph.
    """

    df = dill.loads(redis_conn.get(dataset_choice))

    # Make sure all variables have a value before fitting
    if any(x is None for x in [xvars, yvars, df, dataset_choice,
                               algo_choice]):
        raise PreventUpdate()

    # The inverse mapping of ml_options, use it to get the sklearn model
    model = node_options[algo_choice]["model_class"]()

    # TODO: This probably needs a better/cleaner implementation and/or
    #       might need to be used in other parts as well.
    y = pd.factorize(df[yvars])
    model.fit(df[xvars], y[0])

    predictions = model.predict(df[xvars])
    score = model.score(df[xvars], y[0])

    metrics = []
    if problem_type == "regression":
        metrics.append(html.H4(f"Mean Squared Error: {score:.3f}"))

    elif problem_type == "classification":
        metrics.append(html.H4(f"Accuracy: {100*score:.3f} %"))
        metrics.append(html.H4("Confusion matrix:"))

        classes = df[yvars].unique()

        confusion = confusion_matrix(y[0], predictions)
        metrics.append(html.Table([
            html.Thead([html.Th(cls) for cls in classes]),

            html.Tbody([
               html.Tr([html.Td(item) for item in row])
               for row in confusion
            ])
        ]))

    else:
        metrics.append("Not implemented")

    labels = model.predict(df[xvars])
    # TODO: Visualize the (in)correctly grouped points.
    # If we have >=2 variables, visualize the classification
    if len(xvars) >= 3:

        trace1 = go.Scatter3d(x=df[xvars[0]],
                              y=df[xvars[1]],
                              z=df[xvars[2]],
                              showlegend=False,
                              mode='markers',
                              marker={
                                  'color': labels.astype(np.float),
                                  'line': dict(color='black', width=1)
                              })

        figure = {
            'data': [trace1],
            'layout': layouts.default_2d(xvars[0], yvars[0])
        }

    elif len(xvars) == 2:
        traces = scatterplot(df[xvars[0]], df[xvars[1]],
                             marker={'color': labels.astype(np.float)})

        figure = {
            'data': [traces],
            'layout': go.Layout(
                xaxis={'title': xvars[0]},
                yaxis={'title': yvars[0]},
                legend={'x': 0, 'y': 1},
                hovermode='closest'
            )
        }

    else:
        figure = {}

    return metrics, figure
Beispiel #6
0
def fit_classification_model(xvars, yvars, algo_choice_classification,
                             user_id, dataset_choice):
    """
    Take user choices and, if all are present, fit the appropriate model.

    Args:
        xvars (list(str)): predictor variables.
        yvars (str): target variable.
        algo_choice_classification (str): The choice of algorithm type.
        user_id: Session/user id.
        dataset_choice: Name of dataset.

    Returns:
        list, dict: Dash element(s) with the results of model fitting,
                    and parameters for plotting a graph.
    """


    df = get_data(dataset_choice, user_id)

    ## Make sure all variables have a value before fitting
    if any(x is None for x in [xvars, yvars, df, dataset_choice,
                               algo_choice_classification]):
        raise PreventUpdate()

    # We have the dictionary that maps keys to models so use that
    model = mapping[algo_choice_classification]()

    # TODO: This probably needs a better/cleaner implementation and/or
    #       might need to be used in other parts as well.
    y = pd.factorize(df[yvars])
    model.fit(df[xvars], y[0])

    layout = [
        html.H4(f"Classification model scored: {model.score(df[xvars], y[0])}")
    ]

    labels = model.predict(df[xvars])
    # TODO: Visualize the (in)correctly grouped points.
    # If we have >=2 variables, visualize the classification
    if len(xvars) >= 3:

        trace1 = go.Scatter3d(x=df[xvars[0]],
                              y=df[xvars[1]],
                              z=df[xvars[2]],
                              showlegend=False,
                              mode='markers',
                              marker={
                                  'color': labels.astype(np.float),
                                  'line': dict(color='black', width=1)
                              })

        layout += [{
            'data': [trace1],
            'layout': layouts.default_2d(xvars[0], yvars[0])
        }]

    elif len(xvars) == 2:
        traces = scatterplot(df[xvars[0]], df[xvars[1]],
                             marker={'color': labels.astype(np.float)})

        layout += [{
            'data': [traces],
            'layout': layouts.default_2d(xvars[0], yvars)
        }]

    else:
        layout += [{}]

    return layout