Beispiel #1
0
def test_valid_colorscale():
    pipe = make_mapper_pipeline()

    fig_2d = plot_static_mapper_graph(
        pipe,
        X,
        layout_dim=2,
        plotly_params={"node_trace": {
            "marker_colorscale": "blues"
        }})
    fig_3d = plot_static_mapper_graph(
        pipe,
        X,
        layout_dim=3,
        plotly_params={"node_trace": {
            "marker_colorscale": "blues"
        }})

    # Test that the custom colorscale is correctly applied both in 2d and in 3d
    marker_colorscale = fig_2d.data[1]["marker"]["colorscale"]
    marker_colorscale_3d = fig_3d.data[1]["marker"]["colorscale"]
    assert marker_colorscale == marker_colorscale_3d

    # Test that the default colorscale is "viridis" and that the custom one is
    # different
    fig_default = plot_static_mapper_graph(pipe, X)
    marker_colorscale_default = \
        fig_default.data[1]["marker"]["colorscale"]
    assert marker_colorscale_default == viridis_colorscale
    assert marker_colorscale != marker_colorscale_default
def test_invalid_color_features_as_array_of_indices():
    pipe = make_mapper_pipeline()
    with pytest.raises(ValueError):
        plot_static_mapper_graph(
            pipe, X_arr, color_data=X_arr,
            color_features=np.arange(X_arr.shape[1])
            )
def test_node_color_statistic_as_ndarray_wrong_length():
    pipe = make_mapper_pipeline()
    graph = pipe.fit_transform(X_arr)
    node_color_statistic = np.arange(len(graph.vs) + 1)

    with pytest.raises(ValueError):
        plot_static_mapper_graph(pipe, X_arr,
                                 node_color_statistic=node_color_statistic)
def test_colors_same_2d_3d(X, color_variable, node_color_statistic):
    pipe = make_mapper_pipeline()
    fig_2d = plot_static_mapper_graph(
        pipe, X, layout_dim=2, color_variable=color_variable,
        node_color_statistic=node_color_statistic
        )
    fig_3d = plot_static_mapper_graph(
        pipe, X, layout_dim=3, color_variable=color_variable,
        node_color_statistic=node_color_statistic
        )
    assert fig_2d.data[1].marker.color == fig_3d.data[1].marker.color
def test_colors_same_2d_3d(X, color_data, node_color_statistic):
    pipe = make_mapper_pipeline()
    fig_2d = plot_static_mapper_graph(
        pipe, X, layout_dim=2, color_data=color_data,
        node_color_statistic=node_color_statistic
        )
    fig_3d = plot_static_mapper_graph(
        pipe, X, layout_dim=3, color_data=color_data,
        node_color_statistic=node_color_statistic
        )
    assert np.array_equal(fig_2d.data[1].marker.color,
                          fig_3d.data[1].marker.color)
def test_column_dropdown(X, columns, layout_dim):
    pipe = make_mapper_pipeline()
    fig = plot_static_mapper_graph(pipe, X, color_data=X,
                                   layout_dim=layout_dim)
    fig_buttons = fig.layout.updatemenus[0].buttons

    assert list(fig.data[1].marker.color) == \
           list(fig_buttons[0].args[0]["marker.color"][1])

    for i, col in enumerate(columns):
        fig_col = plot_static_mapper_graph(
            pipe, X, layout_dim=layout_dim, color_data=X, color_features=col
            )
        assert list(fig_col.data[1].marker.color) == \
               list(fig_buttons[i].args[0]["marker.color"][1])
def test_valid_layout_dim(X, layout_dim):
    pipe = make_mapper_pipeline()
    fig = plot_static_mapper_graph(pipe, X, layout_dim=layout_dim)
    edge_trace = fig.data[0]
    assert hasattr(edge_trace, "x") and hasattr(edge_trace, "y")
    is_z_present = hasattr(edge_trace, "z")
    assert is_z_present if layout_dim == 3 else not is_z_present
def test_valid_hoverlabel_bgcolor(X, layout_dim):
    pipe = make_mapper_pipeline()
    fig = plot_static_mapper_graph(
        pipe, X, layout_dim=layout_dim,
        plotly_params={"node_trace": {"hoverlabel_bgcolor": "white"}}
        )
    assert fig.data[1]["hoverlabel"]["bgcolor"] == "white"
def test_unsuitable_colorscale_for_hoverlabel_3d(X):
    pipe = make_mapper_pipeline()
    with pytest.warns(RuntimeWarning):
        _ = plot_static_mapper_graph(
            pipe, X, layout_dim=3,
            plotly_params={"node_trace": {"marker_colorscale": hsl_colorscale}}
            )
Beispiel #10
0
def test_color_by_column_dropdown_2d(layout_dim):
    pipe = make_mapper_pipeline()
    fig = plot_static_mapper_graph(pipe,
                                   X,
                                   layout_dim=layout_dim,
                                   color_by_columns_dropdown=True)
    fig_buttons = fig.layout.updatemenus[0].buttons

    assert list(fig.data[1].marker.color) == \
        list(fig_buttons[0].args[0]["marker.color"][1])

    for i in range(X.shape[1]):
        fig_col_i = plot_static_mapper_graph(pipe,
                                             X,
                                             layout_dim=layout_dim,
                                             color_variable=i)
        assert list(fig_col_i.data[1].marker.color) == \
            list(fig_buttons[i + 1].args[0]["marker.color"][1])
def test_node_color_statistic_as_ndarray():
    pipe = make_mapper_pipeline()
    graph = pipe.fit_transform(X_arr)
    node_color_statistic = np.arange(len(graph.vs))
    fig = plot_static_mapper_graph(pipe,
                                   X_arr,
                                   node_color_statistic=node_color_statistic)

    assert np.array_equal(fig.data[1].marker.color, node_color_statistic)
    def test_cluster_sizes(self):
        """Verify that the total number of calculated clusters is equal to
        the number of displayed clusters."""
        pipe = make_mapper_pipeline(clusterer=FirstSimpleGap())
        fig = plot_static_mapper_graph(pipe, X_arr)
        node_trace = fig.data[1]

        node_sizes_vis = [_get_size_from_hovertext(ht) for ht in
                          node_trace.hovertext]

        g = pipe.fit_transform(X_arr)
        node_size_real = [len(node) for node in g.vs['node_elements']]

        assert sum(node_sizes_vis) == sum(node_size_real)
def test_node_color_statistic_as_ndarray(is_2d):
    pipe = make_mapper_pipeline()
    graph = pipe.fit_transform(X_arr)
    node_color_statistic_col_0 = np.arange(len(graph.vs))
    if is_2d:
        node_color_statistic = np.vstack([node_color_statistic_col_0,
                                          node_color_statistic_col_0]).T
    else:
        node_color_statistic = node_color_statistic_col_0

    fig = plot_static_mapper_graph(pipe, X_arr,
                                   node_color_statistic=node_color_statistic)

    assert np.array_equal(fig.data[1].marker.color, node_color_statistic_col_0)
Beispiel #14
0
    def test_is_data_present(self):
        """Verify that what we see in the graph corresponds to
        the number of samples in the graph."""
        pipe = make_mapper_pipeline()
        warnings.simplefilter("ignore")
        fig = plot_static_mapper_graph(pipe,
                                       X,
                                       color_variable=colors,
                                       clone_pipeline=False)
        xy = np.stack([fig.get_state()['_data'][1][c]
                       for c in ['x', 'y']]).transpose()
        assert X.shape >= xy.shape

        real_colors = fig.get_state()['_data'][1]['marker']['color']
        assert len(real_colors) == xy.shape[0]
def test_color_features_as_estimator_or_callable(color_features):
    pipe = make_mapper_pipeline()
    graph = pipe.fit_transform(X_arr)
    node_elements = graph.vs["node_elements"]

    pca = PCA(n_components=2)
    color_data_transformed = pca.fit_transform(X_arr)
    node_colors_color_features = \
        np.array([np.mean(color_data_transformed[itr, 0])
                  for itr in node_elements])

    fig = plot_static_mapper_graph(pipe, X_arr, color_data=X_arr,
                                   color_features=color_features)

    assert_almost_equal(fig.data[1].marker.color, node_colors_color_features)
    def test_is_data_present(self):
        """Verify that what we see in the graph corresponds to
        the number of samples in the graph."""
        pipe = make_mapper_pipeline()
        fig = plot_static_mapper_graph(pipe, X_arr, color_data=colors,
                                       clone_pipeline=False)
        node_trace_x = fig.data[1].x
        node_trace_y = fig.data[1].y

        assert node_trace_x.shape[0] == node_trace_y.shape[0]

        num_nodes = node_trace_x.shape[0]
        assert len(X_arr) >= num_nodes

        fig_colors = fig.data[1].marker.color
        assert len(fig_colors) == num_nodes
Beispiel #17
0
def test_interactive_plotter_attrs(X, color_data, layout_dim):
    """Simple tests on the attributes stored by MapperInteractivePlotter when
    plotting."""
    pipe = make_mapper_pipeline()
    plotter = MapperInteractivePlotter(pipe, X)
    plotter.plot(color_data=color_data, layout_dim=layout_dim)

    # 1 Test graph_
    graph = pipe.fit_transform(X)
    assert plotter.graph_.isomorphic(graph)

    # 2 Test pipeline_
    assert str(plotter.pipeline_) == str(pipe)

    # 3 Test color_features_
    if color_data is not None:
        color_data_transformed = color_data
    else:
        color_data_transformed = np.arange(len(X)).reshape(-1, 1)
    assert np.array_equal(plotter.color_features_, color_data_transformed)

    # 4 Test node_summaries_
    assert len(plotter.node_summaries_) == len(graph.vs)

    # 5 Test figure_
    static_fig = plot_static_mapper_graph(pipe, X, color_data=color_data,
                                          layout_dim=layout_dim)
    interactive_fig = plotter.figure_

    edge_trace_attrs = ["hoverinfo", "line", "name", "x", "y"]
    for attr in edge_trace_attrs:
        assert np.all(getattr(interactive_fig.data[0], attr) ==
                      getattr(static_fig.data[0], attr))

    # Excluding marker, which gets treated separately below
    node_trace_attrs = ["hoverinfo", "hovertext", "mode", "name", "x", "y"]
    for attr in node_trace_attrs:
        assert np.all(getattr(interactive_fig.data[1], attr) ==
                      getattr(static_fig.data[1], attr))

    marker_attrs = ["color", "colorbar", "colorscale", "line", "opacity",
                    "reversescale", "showscale", "size", "sizemin", "sizemode",
                    "sizeref"]
    for attr in marker_attrs:
        assert np.all(getattr(interactive_fig.data[1].marker, attr) ==
                      getattr(static_fig.data[1].marker, attr))
Beispiel #18
0
    def test_is_data_present(self):
        """Verify that what we see in the graph corresponds to
        the number of samples in the graph."""
        pipe = make_mapper_pipeline()
        warnings.simplefilter("ignore")
        fig = plot_static_mapper_graph(pipe, X,
                                       color_variable=colors,
                                       clone_pipeline=False)
        node_trace_x = fig.get_state()['_data'][1]["x"]
        node_trace_y = fig.get_state()['_data'][1]["y"]

        assert node_trace_x["shape"][0] == node_trace_y["shape"][0]

        num_nodes = node_trace_x["shape"][0]
        assert len(X) >= num_nodes

        fig_colors = fig.get_state()['_data'][1]['marker']['color']
        assert len(fig_colors) == num_nodes
def test_invalid_layout_algorithm(X):
    with pytest.raises(KeyError):
        pipe = make_mapper_pipeline()
        _ = plot_static_mapper_graph(pipe, X, layout="foobar")
def test_invalid_layout_dim(X, layout_dim):
    with pytest.raises(ValueError):
        pipe = make_mapper_pipeline()
        _ = plot_static_mapper_graph(pipe, X, layout_dim=layout_dim)
Beispiel #21
0
def get_region_plot(pipe, data, layout, node_elements, colorscale):
    '''Function to generate a figure of the mapper graph colored by identified
    regions
    
    Parameters
    ----------
    pipe : MapperPipeline
        The Mapper pipeline to compute the mapper-graph
    data : ndarray (n_samples x n_dim)
        Data used for mapper
    layout : igraph.layout.Layout
        Layout of graph
    node_elements : tuple
        Tuple of arrays where array at positin x contains the data points for
        node x
    colorscale : list
        List of colors to use for each region

    Returns
    -------
    fig : igraph object
    '''

    regions = utils.get_regions()

    # set node color:
    # 1. assign to each node of a region its color (zip())
    # 2. convert zip elements to list (map())
    # 3. flatten list (itertools.chain())
    # 4. sort values by keys
    # 5. convert to ordered dictionary
    # 6. extract values and convert to list
    node_color = np.array(
        list(
            collections.OrderedDict(
                sorted(
                    itertools.chain(*map(list, [
                        zip(regions[region],
                            itertools.repeat(colorscale[region]))
                        for region in range(len(regions))
                    ])))).values()))
    # set plotly arguments:
    # 1. set uniform node size
    # 2. hide scale of marker color
    plotly_kwargs = {
        'node_trace_marker_size': [1] * len(node_elements),
        'node_trace_marker_showscale': False,
        'node_trace_hoverlabel': node_color,
        'node_trace_marker_color': node_color
    }

    fig = plot_static_mapper_graph(pipe,
                                   data,
                                   layout,
                                   layout_dim=2,
                                   color_by_columns_dropdown=False,
                                   plotly_kwargs=plotly_kwargs)
    # update colors to fig
    fig._data[1]['marker'][
        'color'] = node_color  # hack around with the new api
    return fig
Beispiel #22
0
def test_color_features_as_columns_fails_on_series():
    pipe = make_mapper_pipeline()

    with pytest.raises(ValueError, match="If `color_data` is a pandas series"):
        plot_static_mapper_graph(pipe, X_df, color_data=X_df["a"],
                                 color_features="a")
Beispiel #23
0
def main():
    directory = DOTENV_KEY2VAL["DATA_DIR"]
    image_dir = directory + "/patch_92/"
    diagnosis_json = "collected_diagnoses_complete.json"

    (
        cn_patients,
        mci_patients,
        ad_patients,
    ) = utils.get_earliest_available_diagnosis(directory + diagnosis_json)
    images_all = utils.get_arrays_from_dir(
        image_dir, cn_patients + mci_patients + ad_patients)

    cn_patient_list = [
        1 for patient in range(len(cn_patients) - 1)
    ]  # substracting one due to unfound MRI for one CN patient
    mci_patient_list = [2 for patient in range(len(mci_patients))]
    ad_patient_list = [3 for patient in range(len(ad_patients))]

    diags = np.array(cn_patient_list + mci_patient_list +
                     ad_patient_list).reshape(-1, 1)
    ohe = OneHotEncoder()
    labels = ohe.fit_transform(diags).toarray()

    images = []
    for image in images_all:
        images.append(image.flatten())
    images_all = np.asarray(images)
    pca = PCA(n_components=440)
    pca.fit(images_all)

    fig, ax0 = plt.subplots(nrows=1, sharex=True, figsize=(6, 6))
    ax0.plot(
        np.arange(1, pca.n_components_ + 1),
        pca.explained_variance_ratio_,
        "+",
        linewidth=2,
    )
    ax0.set_ylabel("PCA explained variance ratio")
    ax0.legend(prop=dict(size=12))
    plt.savefig(DOTENV_KEY2VAL["GEN_FIGURES_DIR"] + "elbow_plot.png")

    n_components = 3
    pca = PCA(n_components=n_components)
    images_all_projected = pca.fit_transform(images_all)

    images_all_projected = np.append(images_all_projected, labels, axis=1)

    mapper_pipeline = make_mapper_pipeline(
        filter_func=Projection(columns=[index for index in range(2)]),
        cover=CubicalCover(n_intervals=10, overlap_frac=0.25),
        clusterer=DBSCAN(eps=0.5, min_samples=5),
        verbose=True,
        n_jobs=4,
    )
    plotly_params = {"node_trace": {"marker_colorscale": "Blues"}}
    fig = plot_static_mapper_graph(
        mapper_pipeline,
        images_all_projected,
        layout_dim=3,
        color_by_columns_dropdown=True,
        plotly_params=plotly_params,
    )

    fig.write_html(DOTENV_KEY2VAL["GEN_FIGURES_DIR"] +
                   "mapper_2_dimensional_reduction.html")

    images_all_projected = pd.DataFrame(images_all_projected)
    fig = px.scatter_3d(
        images_all_projected,
        x=0,
        y=1,
        z=2,
        color=3,
        title="3D scatterplot of the PCA of the image data",
    )
    fig.write_html(DOTENV_KEY2VAL["GEN_FIGURES_DIR"] +
                   "scatterplot_pca_3d.html")
Beispiel #24
0
def test_valid_color_data_transformed(color_data, color_features):
    """Test that no errors are thrown when pandas dataframes/series are passed
    as color_data and/or returned when applying color_features."""
    pipe = make_mapper_pipeline()
    plot_static_mapper_graph(pipe, X_arr, color_data=color_data,
                             color_features=color_features)
Beispiel #25
0
def get_graph_plot_colored_by_election_results(pipeline, year, df, data,
                                               keep_layout):
    '''Function make plot of US with counties colored by winner of election
    
    Parameters
    ----------
    pipe : MapperPipeline
        The Mapper pipeline to compute the mapper-graph
    year : np.int
        Color by election results from year `year`
    df : pandas data frame
        Data frame containing info of winner per county, year of election and
        number of electors in county
    data : ndarray (n_samples x n_dim)
        Data used for mapper
    keep_layout : list of two dicts, with keys 'x', 'y', and such that values are 1d arrays
        Positions of lines (keep_layout[0]) and markers respectively (keep_layout[1])
        for the mapper graph

    Returns
    -------
    fig: igraph object
    '''

    node_elements = pipeline.fit_transform(
        data)['node_metadata']['node_elements']

    # set node color to percentage of number of electors won by republicans
    node_color = np.array([
        100 * (df[df['year'] == year]['winner'].values *
               df[df['year'] == year]['n_electors'].values)[x].sum() /
        df[df['year'] == year]['n_electors'].values[x].sum()
        for x in node_elements
    ])

    data_cols = utils.get_cols_for_mapper()
    columns_to_color = dict(zip(data_cols, range(len(data_cols))))

    node_text = utils.get_node_text(
        dict(zip(range(len(node_elements)), node_elements)),
        utils.get_n_electors(
            node_elements,
            df[df['year'] == year]['n_electors'].reset_index(drop=True)),
        node_color, 'Percentage of Electors Won by Republicans')

    plotly_kwargs = {
        'node_trace_marker_colorscale':
        'RdBu',
        'node_trace_marker_reversescale':
        True,
        'node_trace_marker_cmin':
        0,
        'node_trace_marker_cmax':
        100,
        'node_trace_text':
        node_text,
        'node_trace_marker_size':
        utils.get_n_electors(
            node_elements,
            df[df['year'] == year]['n_electors'].reset_index(drop=True)),
        'node_trace_marker_sizeref':
        .5 / max(
            utils.get_n_electors(
                node_elements,
                df[df['year'] == year]['n_electors'].reset_index(drop=True)))
    }

    fig = plot_static_mapper_graph(pipeline,
                                   data,
                                   'kk',
                                   layout_dim=2,
                                   node_color_statistic=node_color,
                                   color_by_columns_dropdown=True,
                                   plotly_kwargs=plotly_kwargs)
    if keep_layout is not None:
        fig._data[0].update(keep_layout[0])
        fig._data[1].update(keep_layout[1])
    return fig
def test_color_data_invalid_length():
    pipe = make_mapper_pipeline()

    with pytest.raises(ValueError):
        plot_static_mapper_graph(pipe, X_arr, color_data=X_arr[:-1])
def test_invalid_type_node_color_statistic_static():
    pipe = make_mapper_pipeline()

    with pytest.raises(ValueError):
        plot_static_mapper_graph(pipe, X_arr, node_color_statistic="foo")
def test_invalid_color_features_types(color_features):
    pipe = make_mapper_pipeline()

    with pytest.raises(ValueError):
        plot_static_mapper_graph(pipe, X_arr,
                                 color_features=color_features)