Beispiel #1
0
def create_my_strategy(daily_returns, daily_index_returns, previous_weights):
    """
    Creates the core strategy using Topological Data Analysis
    """
    epsilon = 1.0 / (2 * daily_returns.shape[1])
    threshold = epsilon
    alpha = parameters.ALPHA_MULTIPLIER * epsilon
    beta = parameters.BETA_MULTIPLIER * epsilon
    daily_returns = np.transpose(daily_returns.values)
    mapper = km.KeplerMapper(verbose=0)
    lens1 = mapper.fit_transform(daily_returns, projection='mean')
    lens2 = mapper.fit_transform(daily_returns, projection='std')

    simplicial_complex = mapper.map(
        np.c_[lens1, lens2],
        X=daily_returns,
        clusterer=sklearn.cluster.DBSCAN(eps=0.5, min_samples=3),
        cover=km.Cover(n_cubes=np.ceil(np.power(daily_returns.shape[0], 0.25)),
                       perc_overlap=0.1))
    if create_my_strategy.counter == 0 or create_my_strategy.counter == 79 or create_my_strategy.counter == 158:
        mapper.visualize(simplicial_complex,
                         path_html=parameters.RESULT_DIRECTORY + "\\" +
                         parameters.STOCK_INDEX + "_simplex_" +
                         str(create_my_strategy.counter) + ".html")

    create_my_strategy.counter += 1

    if previous_weights is None:
        current_portfolio = _get_max_sortino_ratios(
            daily_returns, simplicial_complex['nodes']).values()
        weights = np.zeros(daily_returns.shape[0])
        for stock_index in current_portfolio:
            weights[stock_index] = 1.0
        weights /= np.sum(weights)
        return weights
    else:
        weights = previous_weights
        alpha_weights = np.zeros(weights.shape)
        beta_weights = np.zeros(weights.shape)
        current_champion_stocks = _get_max_sortino_ratios(
            daily_returns, simplicial_complex['nodes'])
        for cluster, stock_index in current_champion_stocks.items():
            if stock_index != -1:
                previous_weight_sum = np.sum(
                    previous_weights[i]
                    for i in simplicial_complex['nodes'][cluster])
                alpha_weights[stock_index] = alpha * previous_weight_sum

                links_dict = simplicial_complex['links']
                for curr_neighbour in links_dict[cluster]:
                    neighbour_stock_index = current_champion_stocks[
                        curr_neighbour]
                    if neighbour_stock_index != -1:
                        beta_weights[
                            neighbour_stock_index] += beta * previous_weight_sum

        weights = weights + alpha * alpha_weights + beta * beta_weights
        weights[weights / np.sum(weights) < threshold] = 0
        weights /= np.sum(weights)
        return weights
Beispiel #2
0
def do_analysis(data, lens, name_prefix, nc, po, metric='euclidean'):
    graph = mapper.map(lens,
                       data.values,
                       clusterer=lk.LinkageGap(verbose=0, metric=metric),
                       cover=km.Cover(n_cubes=nc, perc_overlap=po))

    name = "{}_n{}_o{}".format(name_prefix, nc, po)
    mapper.visualize(
        graph,
        # color_values=target.values,
        color_values=lens,
        color_function_name=name_prefix,
        path_html=name + "_nm_diabetes_RM.html",
        title=name + "nm_diabetes_RM")

    nx_graph_simple = expo.kmapper_to_nxmapper(graph)
    expo.cytoscapejson_dump(nx_graph_simple,
                            name + "_nm_diabetes_RM_simple.cyjs")

    extra_data = {x: list(data.loc[:, x]) for x in data.columns}
    extra_transforms = {
        x: np.mean
        for x in extra_data if x != "Clinical_classification"
    }

    nx_graph = expo.kmapper_to_nxmapper(graph,
                                        node_extra_data=extra_data,
                                        node_transforms=extra_transforms)
    expo.cytoscapejson_dump(nx_graph, name + "_nm_diabetes_RM.cyjs")
Beispiel #3
0
def get_topological_graph(data,exp,clusterer_param,projection=True,cover=[10, 0.1]):
    
    mapper = km.KeplerMapper(verbose=0)

    clusterer_param *= np.sqrt(data.shape[1])
    clusterer = sklearn.cluster.DBSCAN(eps=clusterer_param, min_samples=1) 

    if isinstance(projection, int):
        level_set = mapper.fit_transform(exp, projection=np.arange(projection).tolist())
        cover = km.Cover(cover[0],cover[1])
    else:
        level_set = exp
        cover = km.Cover(cover[0],cover[1])

    graph = mapper.map(level_set,
                       data,
                       clusterer=clusterer,
                       cover=cover)
    return remove_duplicated_links(remove_graph_duplicates(graph))
Beispiel #4
0
def do_analysis(data, lens, name_prefix, nc, po):
    name = "{}_n{}_o{}".format(name_prefix, nc, po)
    graph = mapper.map(lens,
                       data,
                       clusterer=lk.LinkageGap(verbose=0),
                       cover=km.Cover(n_cubes=nc, perc_overlap=po))
    mapper.visualize(graph,
                     color_function=lens,
                     path_html=name + "_cat.html",
                     title=name + "_cat")
Beispiel #5
0
def do_analysis(data, lens, cf, name_prefix, nc, po):
    name = "{:s}_n{:d}_o{:.2f}".format(name_prefix, nc, po)
    graph = mapper.map(lens,
                       data,
                       clusterer=lk.LinkageGap(verbose=0, metric='euclidean'),
                       cover=km.Cover(n_cubes=nc, perc_overlap=po))
    mapper.visualize(graph,
                     color_function=cf,
                     path_html=name + "_travel.html",
                     title=name + "_travel")
Beispiel #6
0
def do_analysis(data, lens, name_prefix, nc, po,metric='euclidean'):
    graph = mapper.map(lens,
                       data.values,
                       clusterer = lk.LinkageGap(verbose=0,metric=metric),
                       cover=km.Cover(n_cubes=nc, perc_overlap=po))

    name = "{}_n{}_o{}".format(name_prefix,nc, po)
    mapper.visualize(graph,
                     # color_function=target.values,
                     color_function=lens,
                     path_html=name + "_nm_diabetes_RM.html",
                     title=name + "nm_diabetes_RM")
Beispiel #7
0
def perform_TDA(data, labels, NR, PO, n_clusters, filt="knn_distance_2", save_string="TDA", title="TDA", color_values=None, color_function_name=None):
    """
    Perform Topological Data Analysis

    Args:
        data: 2-dimensional array, where first dimension is kept as member label
        names: labels for first dimension of data
        NR: number of hypercubes
        PO: percent overlap between hypercubes
        nclusters: number of clusters in hypercube
        filt: filtering scheme, default is "knn_distance_2"
        save_string: path where to save html and json
        title: title of map graph
    """

    # Step 1. initiate a Mapper
    mapper = km.KeplerMapper(verbose=2)

    # Step 2. Projection
    projected_data = mapper.fit_transform(data, projection=filt)

    # Step 3. Covering, clustering & mapping
    graph = mapper.map(projected_data, data,
                       cover=km.Cover(n_cubes=NR, perc_overlap=PO),
                       clusterer=AgglomerativeClustering(n_clusters=n_clusters,
                                                         linkage="ward",
                                                         affinity="euclidean",
                                                         memory=None,
                                                         connectivity=None,
                                                         compute_full_tree="auto",
                                                         )
                       )
    with open(save_string + ".json", "w") as f:
        json.dump(graph, f)

    if color_values is None or color_function_name is None:
        mapper.visualize(graph,
                         X_names=labels,
                         path_html=save_string + ".html",
                         title=title,
                         custom_tooltips=np.array(labels),
                         )
    else:
        mapper.visualize(graph,
                         X_names=labels,
                         path_html=save_string + ".html",
                         title=title,
                         custom_tooltips=np.array(labels),
                         color_function_name=color_function_name,
                         color_values=color_values,
                         node_color_function=['mean', 'std', 'median', 'max'],
                         )
Beispiel #8
0
def do_analysis(lens, name_prefix, nc, po):
    graph = mapper.map(
        lens,
        data,
        clusterer=lk.LinkageGap(verbose=0),
        # clusterer=sklearn.cluster.DBSCAN(1, min_samples=0),
        cover=km.Cover(n_cubes=nc, perc_overlap=po))

    name = "{}_n{}_o{}".format(name_prefix, nc, po)
    mapper.visualize(graph,
                     color_function=target,
                     path_html=name + "_iris.html",
                     title=name + "_iris")
def do_analysis(data, dists, lens, name_prefix, nc, po):
    name = "{}_n{}_o{}".format(name_prefix, nc, po)
    graph = mapper.map(lens,
                       dists,
                       clusterer=lk.LinkageGap(verbose=0,
                                               metric="precomputed"),
                       precomputed=True,
                       cover=km.Cover(n_cubes=nc, perc_overlap=po))
    mapper.visualize(graph,
                     color_values=lens,
                     color_function_name=name_prefix,
                     path_html=name + "_cat.html",
                     title=name + "_cat")
Beispiel #10
0
 def execute(self, matrix):
     matrix = matrixmodule.distMatrix(matrix)
     mapper = kmapper.KeplerMapper()
     mycover = kmapper.Cover(n_cubes=self.cover_n_cubes, perc_overlap=self.cover_perc_overlap)
     mynerve = kmapper.GraphNerve(min_intersection=self.graph_nerve_min_intersection)
     original_data = matrix if self.use_original_data else None
     projected_data = mapper.fit_transform(matrix, projection=self.projection,
                                           scaler=MapperAnalysis.scalers[self.scaler], distance_matrix=False)
     graph = mapper.map(projected_data, X=original_data, clusterer=MapperAnalysis.clusterers[self.clusterer],
                        cover=mycover, nerve=mynerve, precomputed=False,
                        remove_duplicate_nodes=self.remove_duplicate_nodes)
     output_graph = mapper.visualize(graph, save_file=True)
     self.graph = output_graph
Beispiel #11
0
def run_mapper(X=None,
               y=None,
               X_inverse=True,
               lens=None,
               zscore=False,
               verbose=0,
               **params):
    """ Wrap KeplerMapper calls
    
    Notes
    -----
    - See PCA_metadata.ipynb

    """
    X_ = np.copy(X)
    if zscore is True:
        X_ = scipy.stats.zscore(X_, axis=0)

    # init MAPPER params
    projection = params.get('projection',
                            TSNE(perplexity=50, init='pca', random_state=0))
    clusterer = params.get('clusterer', HDBSCAN(allow_single_cluster=True))
    cover = params.get('cover', km.Cover(10, 0.67))
    X_inverse = X_ if X_inverse is True else X_inverse

    # fit
    if lens is None:
        mapper = km.KeplerMapper(verbose=verbose - 1)
        lens = mapper.fit_transform(X_, projection=projection)

    # map
    mapper = km.KeplerMapper(verbose=verbose)
    graph = mapper.map(lens, X_inverse, clusterer=clusterer, coverer=cover)

    # dG
    dG = ds.DyNeuGraph(G=graph, y=y)

    # save results
    results = Bunch(
        X=X_,
        y=y,
        X_inverse=X_,
        lens=lens.copy(),
        graph=dict(graph),
        projection=projection,
        clusterer=clusterer,
        cover=cover,
        dG=dG,
    )
    return results
    def mapper(self, data):
        """Run the mapper algorithm on the data.

        Parameters
        ----------
        data : array-like
            The data to run the algorihthm on, can have almost any shape.

        Returns
        -------
        graph : The graph output from km.KeplerMapper(...).map(...)

        """
        # Initialize
        logging.info("Applying the mapping algorithm.")
        mapper = km.KeplerMapper(verbose=2)

        # We create a custom 1-D lens with Isolation Forest
        model = ensemble.IsolationForest()
        model.fit(data)
        isolation_forest = model.decision_function(data).reshape(
            (data.shape[0], 1))

        # Fit to and transform the data
        tsne_projection = mapper.fit_transform(
            data,
            projection=sklearn.manifold.TSNE(n_components=2,
                                             perplexity=20,
                                             init='pca'))

        lens = np.c_[isolation_forest, tsne_projection]

        # Create dictionary called 'graph' with nodes, edges and meta-information
        graph = mapper.map(tsne_projection,
                           coverer=km.Cover(10, 0.2),
                           clusterer=sklearn.cluster.DBSCAN(eps=1.0,
                                                            min_samples=2))

        color_function = np.array(
            [self._label_to_color(self.labels[i]) for i in range(len(data))])
        # Visualize it
        mapper.visualize(graph,
                         path_html="actions.html",
                         title="chunk",
                         custom_tooltips=self.tooltips,
                         color_function=color_function)

        return graph
Beispiel #13
0
def do_analysis(lens, name_prefix):

    viz.scatter3d(data3, lens, show=False)
    viz.plt.savefig(name_prefix + "circle.png")
    viz.plt.close("all")

    graph = mapper.map(lens,
                       data,
                       clusterer=sklearn.cluster.DBSCAN(eps=0.1,
                                                        min_samples=5),
                       cover=km.Cover(n_cubes=10, perc_overlap=0.2))
    mapper.visualize(graph,
                     color_function=lens,
                     path_html=name_prefix + "_circle_output.html",
                     title=name_prefix + " circle",
                     lens=lens)
def test_kmapper_sample():
    data = np.array([[0], [1], [2]])
    lens = data

    graph = km.KeplerMapper().map(data,
                                  data,
                                  clusterer=sklearn.cluster.DBSCAN(
                                      eps=1, min_samples=0),
                                  cover=km.Cover(n_cubes=2, perc_overlap=0.5))
    nxgraph = td.kmapper_to_nxmapper(graph)
    assert len(nxgraph.edges) == 1
    assert len(nxgraph.nodes) == 2

    for _, _, data in nxgraph.edges.data():
        assert 'membership' in data

    for _, data in nxgraph.nodes.data():
        assert 'membership' in data
Beispiel #15
0
def do_analysis(data, lens, cf, name_prefix, nc, po):
    name = "{:s}_n{:d}_o{:.2f}".format(name_prefix, nc, po)
    graph = mapper.map(lens,
                       data.values,
                       clusterer=lk.LinkageGap(verbose=0, metric='euclidean'),
                       cover=km.Cover(n_cubes=nc, perc_overlap=po))
    mapper.visualize(graph,
                     color_function=cf,
                     path_html=name + "_travel.html",
                     title=name + "_travel")

    extra_data = {x: list(data.loc[:, x]) for x in data.columns}
    extra_transforms = {x: np.mean for x in extra_data}

    nx_graph = expo.kmapper_to_nxmapper(graph,
                                        node_extra_data=extra_data,
                                        node_transforms=extra_transforms)
    expo.cytoscapejson_dump(nx_graph, name + "_travel.cyjs")
Beispiel #16
0
def run_mapper(X=None, y=None, X_inverse=True, lens=None, verbose=0, **params):
    """ Wrap KeplerMapper calls
    
    Notes
    -----
    - See PCA_metadata.ipynb

    """
    # init MAPPER params
    projection = params.get('projection',
                            TSNE(perplexity=50, init='random', random_state=0))
    clusterer = params.get('clusterer', HDBSCAN(allow_single_cluster=True))
    cover = params.get('cover', km.Cover(10, 0.67))
    X_inverse = X if X_inverse is True else X_inverse

    # fit
    if lens is None:
        mapper = km.KeplerMapper(verbose=verbose - 1)
        lens = mapper.fit_transform(X, projection=projection)

    # map
    mapper = km.KeplerMapper(verbose=verbose)
    graph = mapper.map(lens, X_inverse, clusterer=clusterer, coverer=cover)

    # dG
    dG = ds.DyNeuGraph()
    dG.fit(graph, y=y)

    # save results
    results = Bunch(X=X.copy(),
                    X_inverse=X,
                    lens=lens.copy(),
                    graph=graph,
                    params=params,
                    cover=cover,
                    dG=dG,
                    G=dG.G_,
                    TCM=dG.tcm_.copy())
    return results
def make_tda_covering_scheme(tda_lens, precfg_tda_covering_scheme, verbo_lvl):

    tda_intvls_lowerbound0 = round_dw(
        min(tda_lens[:, 0]),
        precfg_tda_covering_scheme['lens_bound_rounding0'])
    tda_intvls_upperbound0 = round_up(
        max(tda_lens[:, 0]),
        precfg_tda_covering_scheme['lens_bound_rounding0'])
    tda_intvls_lowerbound1 = round_dw(
        min(tda_lens[:, 1]),
        precfg_tda_covering_scheme['lens_bound_rounding1'])
    tda_intvls_upperbound1 = round_up(
        max(tda_lens[:, 1]),
        precfg_tda_covering_scheme['lens_bound_rounding1'])

    cfg_tda_covering_scheme = dict()

    cfg_tda_covering_scheme['bound'] = np.array(
        [[tda_intvls_lowerbound0, tda_intvls_upperbound0],
         [tda_intvls_lowerbound1, tda_intvls_upperbound1]])

    cfg_tda_covering_scheme['count'] = [
        precfg_tda_covering_scheme['intvls_count0'],
        precfg_tda_covering_scheme['intvls_count1']
    ]

    cfg_tda_covering_scheme['overlap'] = [
        precfg_tda_covering_scheme['intvls_overlap0'],
        precfg_tda_covering_scheme['intvls_overlap1']
    ]

    tda_covering_scheme = km.Cover(
        limits=cfg_tda_covering_scheme['bound'],
        n_cubes=cfg_tda_covering_scheme['count'],
        perc_overlap=cfg_tda_covering_scheme['overlap'],
        verbose=verbo_lvl)

    return tda_covering_scheme
Beispiel #18
0
lens['lens_2norm'] = mapper.fit_transform(X, projection='l2norm')

pca = sklearn.decomposition.PCA(n_components=2)
lens_pca = pd.DataFrame(pca.fit_transform(X))
for i, c in enumerate(lens_pca.columns):
    lens[f'pca{i}'] = lens_pca[c]

lens_columns = lens.columns
# lens = create_lens(df[columns])
lens['color'] = df['color']

fig = xp.scatter_matrix(lens, dimensions=lens_columns, color='color')
html_xformed = fig.to_html()
with open(f'plots/{data_name}_lensed.html', 'w') as fout:
    fout.write(html_xformed)

# TODO: plot the projected data with colors for the labels

# Create dictionary of the graph (network) of nodes, edges and meta-information
graph = mapper.map(lens[lens_columns],
                   df[columns],
                   cover=km.Cover(n_cubes=N_CUBES, perc_overlap=PERC_OVERLAP),
                   clusterer=sklearn.cluster.KMeans(n_clusters=2))

# Visualize it
html_mapper = mapper.visualize(
    graph,
    path_html=f'plots/{data_name}_kmapper.html',
    title=f"sklearn..{data_name}({argify(data_kwargs)})",
    custom_tooltips=df['color'])
Beispiel #19
0
import kmapper as km

import mapperutils.linkage_gap as lk
import mapperutils.visualization as viz

mapper = km.KeplerMapper(verbose=2)

hand = trimesh.load_mesh("../0_data/hand/hand_simplified3k5.stl")
data = np.array(hand.vertices)
lens = data[:, 1:2]

plot = True
if plot:
    viz.scatter3d(data, lens, colorsMap='viridis', show=False)
    viz.plt.gca().view_init(elev=90, azim=0)
    viz.axisEqual3D(viz.plt.gca())
    viz.plt.show()

n = 7
p = 0.2
graph = mapper.map(lens,
                   data,
                   clusterer=lk.LinkageGap(verbose=0),
                   cover=km.Cover(n_cubes=n, perc_overlap=p))

name = "n{}_p{}".format(n, p)
mapper.visualize(graph,
                 color_function=lens,
                 path_html="hand_only_" + name + ".html",
                 title="hand, " + name)
import sklearn
from sklearn import datasets

data, labels = datasets.make_circles(n_samples=5000, noise=0.05, factor=0.3)

# Initialize
mapper = km.KeplerMapper(verbose=2)

# Fit to and transform the data
projected_data = mapper.fit_transform(data, projection="dist_mean")

# Create dictionary called 'simplicial_complex' with nodes, edges and meta-information
simplicial_complex = mapper.map(
    projected_data,
    X=data,
    clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5),
    cover=km.Cover(perc_overlap=0.2),
)

# Visualize it
mapper.visualize(
    simplicial_complex,
    path_html="keplermapper-makecircles-distmean.html",
    custom_meta={
        "Data:":
        "datasets.make_circles(n_samples=5000, noise=0.05, factor=0.3)"
    },
    custom_tooltips=labels,
    color_values=labels,
)
Beispiel #21
0
import numpy as np
import sklearn
import kmapper as km

data = np.genfromtxt('lion-reference.csv',delimiter=',')

mapper = km.KeplerMapper(verbose=1)

lens = mapper.fit_transform(data)

graph = mapper.map(lens,
                   data,
                   clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5),
                   cover=km.Cover(n_cubes=10, perc_overlap=0.2))

mapper.visualize(graph,
                 path_html="lion_keplermapper_output.html")

# You may want to visualize the original point cloud data in 3D scatter too
"""
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(data[:,0],data[:,1],data[:,2])
plt.savefig("lion-reference.csv.png")
plt.show()
"""
import kmapper as km

# Make very noisy circles
import sklearn
from sklearn import datasets
data, labels = datasets.make_circles(n_samples=5000, noise=0.05, factor=0.3)

# Initialize
mapper = km.KeplerMapper(verbose=2)

# Fit to and transform the data
projected_data = mapper.fit_transform(data, projection="dist_mean")

# Create dictionary called 'simplicial_complex' with nodes, edges and meta-information
simplicial_complex = mapper.map(projected_data, X=data,
                                clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5),
                                cover=km.Cover())

# Visualize it
mapper.visualize(simplicial_complex, path_html="keplermapper-makecircles-distmean.html",
                 custom_meta={"Data:": "datasets.make_circles(n_samples=5000, noise=0.05, factor=0.3)"},
                 custom_tooltips=labels)
Beispiel #23
0
numerical_cols = [
    df.columns[pos] for pos, item in enumerate(df.dtypes)
    if item in [np.float64, np.int64]
]
print(numerical_cols)
data = df[numerical_cols].values
print(data.shape)

# In[ ]:

# In[11]:

import kmapper as km
from kmapper import jupyter
from sklearn import cluster
mapper = km.KeplerMapper(verbose=1)
projected_data = mapper.project(data, projection="sum")
graph = mapper.map(projected_data,
                   data,
                   cover=km.Cover(n_cubes=5, perc_overlap=0.75),
                   clusterer=cluster.AgglomerativeClustering(
                       n_clusters=100, affinity="cosine"))
# Visualize it
mapper.visualize(graph,
                 path_html="map-dataframe-test.html",
                 title="Map Dataframe Test")
IFrame("map-dataframe-test.html", 800, 600)

# In[ ]:
Beispiel #24
0
        fig = plt.figure()
        cd = hdb_unweighted.condensed_tree_.plot()

        # fig.suptitle('Unweighted HDBSCAN condensed tree plot')
        fig.savefig(TOPO_DIR + "HDBSCAN_%s_%s.pdf" %
                    (KERAS_MODEL_NAME.split(".")[0], layer))
        # plt.show()

        # Create the graph (we cluster on the projected data and suffer projection loss)
        graph = mapper.map(
            projected_data,
            clusterer=sklearn.cluster.DBSCAN(eps=0.8, min_samples=3),
            # clusterer=sklearn.cluster.DBSCAN(eps=5),
            #clusterer=HDBSCAN(min_cluster_size=5, gen_min_span_tree=True, allow_single_cluster=True),
            # coverer=km.Cover(35, 0.9)
            coverer=km.Cover(nr_cubes=10, overlap_perc=0.2),
        )
        print(layer, "map successfully")
        simplicial_complex = graph

        print(labels)
        print(tooltip_s)
        try:
            # mapper.visualize(graph,
            #              path_html="label_layer_%s_keplermapper_weights_visualization.html"%(layer),
            #             custom_tooltips = labels
            #                  )

            mapper.visualize(graph,
                             path_html=TOPO_DIR +
                             "picture_overlap_layer_%s_%s.html" %
Beispiel #25
0
df = pd.read_csv("data.csv")
feature_names = [c for c in df.columns if c not in ["id", "diagnosis"]]
df["diagnosis"] = df["diagnosis"].apply(lambda x: 1 if x == "M" else 0)
X = np.array(df[feature_names].fillna(0)) # quick and dirty imputation
y = np.array(df["diagnosis"])

# We create a custom 1-D lens with Isolation Forest
model = ensemble.IsolationForest(random_state=1729)
model.fit(X)
lens1 = model.decision_function(X).reshape((X.shape[0], 1))

# We create another 1-D lens with L2-norm
mapper = km.KeplerMapper(verbose=3)
lens2 = mapper.fit_transform(X, projection="l2norm")

# Combine both lenses to create a 2-D [Isolation Forest, L^2-Norm] lens
lens = np.c_[lens1, lens2]

# Create the simplicial complex
graph = mapper.map(lens,
                   X,
                   cover=km.Cover(n_cubes=15, perc_overlap=0.7),
                   clusterer=sklearn.cluster.KMeans(n_clusters=2,
                                               random_state=1618033))

# Visualization
mapper.visualize(graph,
                 path_html="breast-cancer.html",
                 title="Wisconsin Breast Cancer Dataset",
                 custom_tooltips=y)
Beispiel #26
0
def full(n_layer=2, n_file=2):
    #Get weights
    model = VGG16(weights='imagenet', include_top=False)
    weights = model.layers[n_layer].get_weights()[0]
    
    #Reshape weights
    s = weights.shape
    kern = weights.transpose([2,3,0,1]).reshape(s[2]*s[3], s[0]*s[0])
    kern_disp = kern.reshape(len(kern), s[0], s[0])
    
    #Normalize
    kern_means = kern.mean(axis=1)
    kern_std = kern.std(axis=1)
    
    kern_scaled = kern
    kern_scaled = np.asarray([kern_scaled[i]-kern_means[i] for i in range(len(kern_scaled))])
    kern_scaled = np.asarray([kern_scaled[i]/kern_std[i] for i in range(len(kern_scaled))])
    
    #Select top density points
    kern20nn  = top_dens_nn(kern_scaled, n=100, p=0.3)

    name = 'VGG_layer_'+str(n_file)
    
    proj_pca = PCA(n_components=2, whiten=False)
    
    mapper = km.KeplerMapper(verbose=1)    
    lens = mapper.fit_transform(kern20nn, projection=proj_pca, scaler=None, distance_matrix=False)
    
    plt.figure()
    plt.scatter(lens[:,0], lens[:,1], s=5)
    plt.title(name)
    
    V = kern20nn.std(axis=0)
    
#    d = distance.cdist(kern20nn, kern20nn)
#    Z = linkage(d, 'single')
#    plt.figure()
#    dendrogram(Z)    
#    plt.show()
#    lens = np.zeros(Z.shape[0] + 1)
#    lens[:-1] = Z[:, 2]
#    lens[-1] = d.max()
#    hst, bins = np.histogram(lens, bins=64)
#    plt.figure()
#    plt.hist(lens, bins=64)
#    z = np.nonzero(hst == 0)[0]
#    print(hst[z[0]:len(hst)].sum())
#    print(z.shape)
#    print(z[:10])
    
    graph = mapper.map(lens,
                       kern20nn,
                       #clusterer = AgglomerativeClustering(n_clusters=2, linkage='single', affinity='euclidean'),
                       clusterer = Single_linkage(),
                    #   clusterer = DBSCAN(metric=SNE(V)),
                       coverer=km.Cover(nr_cubes=30, overlap_perc=0.66),
                       )
    
    ht=mapper.visualize(graph,
                     path_html=name+".html",
                     title=name
                     )
    
    
    return graph
Beispiel #27
0
import numpy as np
import sklearn
import kmapper as km

data = np.genfromtxt('horse-reference.csv', delimiter=',')

mapper = km.KeplerMapper(verbose=2)

lens = mapper.fit_transform(data)

graph = mapper.map(lens,
                   data,
                   clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5),
                   cover=km.Cover(30, 0.2))

mapper.visualize(graph,
                 path_html="horse_keplermapper_output.html",
                 custom_tooltips=np.arange(len(lens)))

# You may want to visualize the original point cloud data in 3D scatter too
"""
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(data[:,0],data[:,1],data[:,2])
plt.savefig("horse-reference.csv.png")
plt.show()
"""
Beispiel #28
0
import numpy as np
import sklearn
import kmapper as km

data = np.genfromtxt(
    'C:/Users/lili/courses/ComputationalTopology/FinalProject/Mapper/kepler-mapper/examples/horse/horse-reference.csv',
    delimiter=',')

mapper = km.KeplerMapper(verbose=2)

lens = mapper.fit_transform(data)

graph = mapper.map(lens,
                   data,
                   clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5),
                   cover=km.Cover(n_cubes=30, perc_overlap=0.2))

mapper.visualize(
    graph,
    path_html=
    "C:/Users/lili/courses/ComputationalTopology/FinalProject/Mapper/horse_keplermapper_output.html",
    custom_tooltips=np.arange(len(lens)))

# You may want to visualize the original point cloud data in 3D scatter too
"""
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(data[:,0],data[:,1],data[:,2])
plt.savefig("horse-reference.csv.png")
Beispiel #29
0
            sq_distance_matrix[col, row] = distance_matrix[index]

# In[73]:

numerical_cols = [
    sub_df.columns[pos] for pos, item in enumerate(sub_df.dtypes)
    if item in [np.float64, np.int64]
]
new_data = sub_df[numerical_cols].to_numpy()
dimensional_data = np.array([row[0] for row in new_data])
print(dimensional_data)
mapper = km.KeplerMapper(verbose=1)
graph = mapper.map(dimensional_data,
                   X=sq_distance_matrix,
                   precomputed=True,
                   cover=km.Cover(n_cubes=35, perc_overlap=0.2),
                   clusterer=sklearn.cluster.DBSCAN(algorithm='auto',
                                                    eps=0.40,
                                                    leaf_size=30,
                                                    metric='precomputed',
                                                    min_samples=3,
                                                    n_jobs=4))

# In[74]:

# Visualize it
mapper.visualize(graph,
                 path_html="map-dataframe-test.html",
                 title="Map Dataframe Test",
                 color_function=dimensional_data)
IFrame("map-dataframe-test.html", 800, 600)
Beispiel #30
0
    tooltip_s.append(img_tag)
    output.close()

tooltip_s = np.array(
    tooltip_s)  # need to make sure to feed it as a NumPy array, not a list

# Initialize to use t-SNE with 2 components (reduces data to 2 dimensions). Also note high overlap_percentage.
mapper = km.KeplerMapper(verbose=2)

# Fit and transform data
projected_data = mapper.fit_transform(data, projection=sklearn.manifold.TSNE())

# Create the graph (we cluster on the projected data and suffer projection loss)
graph = mapper.map(projected_data,
                   clusterer=sklearn.cluster.DBSCAN(eps=0.3, min_samples=15),
                   cover=km.Cover(35, 0.4))

# Create the visualizations (increased the graph_gravity for a tighter graph-look.)
print("Output graph examples to html")
# Tooltips with image data for every cluster member
mapper.visualize(graph,
                 title="Handwritten digits Mapper",
                 path_html="output/digits_custom_tooltips.html",
                 color_values=labels,
                 custom_tooltips=tooltip_s)
# Tooltips with the target y-labels for every cluster member
mapper.visualize(graph,
                 title="Handwritten digits Mapper",
                 path_html="output/digits_ylabel_tooltips.html",
                 custom_tooltips=labels)