Ejemplo n.º 1
0
def run_mapper(X=None,
               y=None,
               X_inverse=True,
               lens=None,
               zscore=False,
               verbose=0,
               **params):
    """ Wrap KeplerMapper calls
    
    Notes
    -----
    - See PCA_metadata.ipynb

    """
    X_ = np.copy(X)
    if zscore is True:
        X_ = scipy.stats.zscore(X_, axis=0)

    # init MAPPER params
    projection = params.get('projection',
                            TSNE(perplexity=50, init='pca', random_state=0))
    clusterer = params.get('clusterer', HDBSCAN(allow_single_cluster=True))
    cover = params.get('cover', km.Cover(10, 0.67))
    X_inverse = X_ if X_inverse is True else X_inverse

    # fit
    if lens is None:
        mapper = km.KeplerMapper(verbose=verbose - 1)
        lens = mapper.fit_transform(X_, projection=projection)

    # map
    mapper = km.KeplerMapper(verbose=verbose)
    graph = mapper.map(lens, X_inverse, clusterer=clusterer, coverer=cover)

    # dG
    dG = ds.DyNeuGraph(G=graph, y=y)

    # save results
    results = Bunch(
        X=X_,
        y=y,
        X_inverse=X_,
        lens=lens.copy(),
        graph=dict(graph),
        projection=projection,
        clusterer=clusterer,
        cover=cover,
        dG=dG,
    )
    return results
Ejemplo n.º 2
0
def projectWords(gloveFile, destination, n_components, perplexity, n_vectors,
                 writeVectors):
    model, vectors, keys = loadGloveModel(gloveFile)
    mapper = km.KeplerMapper(verbose=2)
    np.savetxt(destination + "word-vectors" + "-c" + str(n_vectors) + ".csv",
               vectors[0:n_vectors],
               delimiter=',')
    subset = np.array(vectors[0:n_vectors])
    subsetKeys = keys[0:n_vectors]
    if writeVectors:
        thefile = open(destination + 'words' + str(n_vectors) + ".csv", 'w')
        for item in subsetKeys:
            thefile.write("%s\n" % item)
    print "projecting"
    projected_data = mapper.fit_transform(subset,
                                          projection=sklearn.manifold.TSNE(
                                              n_components=n_components,
                                              perplexity=perplexity))
    if writeVectors:
        print "saving vectors"
        np.savetxt(destination + "tsne-vectors" + "-nc" + str(n_components) +
                   "-p" + str(perplexity) + "-c" + str(n_vectors) + ".csv",
                   projected_data,
                   delimiter=',')
    return subsetKeys, projected_data
Ejemplo n.º 3
0
def test(x):
    """Basic usage"""
    # instantiate a Mapper object
    start = time.time()
    filter = Projection(ax=2)
    cover = KeplerCover(nintervals=25,
                        overlap=0.4)
    cluster = Linkage(method='single',
                      metric='euclidean',
                      cutoff=FirstGap(0.05))
    mapper = lm.Mapper(data=x,
                       filter=filter,
                       cover=cover,
                       cluster=cluster)
    mapper.fit(skeleton_only=True).plot()
    print('Martino mapper: {0:.4f} sec'.format(time.time()-start))

    start = time.time()
    mapper = km.KeplerMapper(verbose=2)
    projected_data = mapper.fit_transform(x, projection=[2])
    graph = mapper.map(
        projected_data,
        x,
        nr_cubes=25,
        overlap_perc=0.4,
        clusterer=sklearn.cluster.AgglomerativeClustering(linkage='single'))

    print('Kepler mapper: {0:.4f} sec'.format(time.time()-start))

    return 0
Ejemplo n.º 4
0
def create_my_strategy(daily_returns, daily_index_returns, previous_weights):
    """
    Creates the core strategy using Topological Data Analysis
    """
    epsilon = 1.0 / (2 * daily_returns.shape[1])
    threshold = epsilon
    alpha = parameters.ALPHA_MULTIPLIER * epsilon
    beta = parameters.BETA_MULTIPLIER * epsilon
    daily_returns = np.transpose(daily_returns.values)
    mapper = km.KeplerMapper(verbose=0)
    lens1 = mapper.fit_transform(daily_returns, projection='mean')
    lens2 = mapper.fit_transform(daily_returns, projection='std')

    simplicial_complex = mapper.map(
        np.c_[lens1, lens2],
        X=daily_returns,
        clusterer=sklearn.cluster.DBSCAN(eps=0.5, min_samples=3),
        cover=km.Cover(n_cubes=np.ceil(np.power(daily_returns.shape[0], 0.25)),
                       perc_overlap=0.1))
    if create_my_strategy.counter == 0 or create_my_strategy.counter == 79 or create_my_strategy.counter == 158:
        mapper.visualize(simplicial_complex,
                         path_html=parameters.RESULT_DIRECTORY + "\\" +
                         parameters.STOCK_INDEX + "_simplex_" +
                         str(create_my_strategy.counter) + ".html")

    create_my_strategy.counter += 1

    if previous_weights is None:
        current_portfolio = _get_max_sortino_ratios(
            daily_returns, simplicial_complex['nodes']).values()
        weights = np.zeros(daily_returns.shape[0])
        for stock_index in current_portfolio:
            weights[stock_index] = 1.0
        weights /= np.sum(weights)
        return weights
    else:
        weights = previous_weights
        alpha_weights = np.zeros(weights.shape)
        beta_weights = np.zeros(weights.shape)
        current_champion_stocks = _get_max_sortino_ratios(
            daily_returns, simplicial_complex['nodes'])
        for cluster, stock_index in current_champion_stocks.items():
            if stock_index != -1:
                previous_weight_sum = np.sum(
                    previous_weights[i]
                    for i in simplicial_complex['nodes'][cluster])
                alpha_weights[stock_index] = alpha * previous_weight_sum

                links_dict = simplicial_complex['links']
                for curr_neighbour in links_dict[cluster]:
                    neighbour_stock_index = current_champion_stocks[
                        curr_neighbour]
                    if neighbour_stock_index != -1:
                        beta_weights[
                            neighbour_stock_index] += beta * previous_weight_sum

        weights = weights + alpha * alpha_weights + beta * beta_weights
        weights[weights / np.sum(weights) < threshold] = 0
        weights /= np.sum(weights)
        return weights
Ejemplo n.º 5
0
    def test_no_warn_normally(self, recwarn):
        """ Confirm that deprecation warnings behave as expected"""
        mapper = km.KeplerMapper()
        data = np.random.rand(100, 10)
        lens = mapper.fit_transform(data)

        warnings.simplefilter('always')
        graph = mapper.map(lens, data)

        assert len(recwarn) == 0
        assert DeprecationWarning not in recwarn
Ejemplo n.º 6
0
    def test_from_kmapper_mapping_nodes(self):
        km = kmapper.KeplerMapper()
        np.random.seed(0)
        data = np.random.random((300, 5))
        lens = km.project(data)
        graph = km.map(lens, data)

        f = {k: np.random.random() for k in graph['nodes']}
        e = Extended.from_kmapper(graph, f)

        assert len(f) == len(e.f)
        assert set(e.f.values()) == set(f.values())
Ejemplo n.º 7
0
    def test_from_kmapper_mapping_lens(self):
        km = kmapper.KeplerMapper()
        np.random.seed(0)
        data = np.random.random((300, 5))
        lens = km.project(data)
        graph = km.map(lens, data)

        e = Extended.from_kmapper(graph, lens)

        assert len(graph['nodes']) == len(e.f)
        assert set(e.f.values()) == set(
            np.mean(lens[v]) for v in graph['nodes'].values())
Ejemplo n.º 8
0
def perform_TDA(data, labels, NR, PO, n_clusters, filt="knn_distance_2", save_string="TDA", title="TDA", color_values=None, color_function_name=None):
    """
    Perform Topological Data Analysis

    Args:
        data: 2-dimensional array, where first dimension is kept as member label
        names: labels for first dimension of data
        NR: number of hypercubes
        PO: percent overlap between hypercubes
        nclusters: number of clusters in hypercube
        filt: filtering scheme, default is "knn_distance_2"
        save_string: path where to save html and json
        title: title of map graph
    """

    # Step 1. initiate a Mapper
    mapper = km.KeplerMapper(verbose=2)

    # Step 2. Projection
    projected_data = mapper.fit_transform(data, projection=filt)

    # Step 3. Covering, clustering & mapping
    graph = mapper.map(projected_data, data,
                       cover=km.Cover(n_cubes=NR, perc_overlap=PO),
                       clusterer=AgglomerativeClustering(n_clusters=n_clusters,
                                                         linkage="ward",
                                                         affinity="euclidean",
                                                         memory=None,
                                                         connectivity=None,
                                                         compute_full_tree="auto",
                                                         )
                       )
    with open(save_string + ".json", "w") as f:
        json.dump(graph, f)

    if color_values is None or color_function_name is None:
        mapper.visualize(graph,
                         X_names=labels,
                         path_html=save_string + ".html",
                         title=title,
                         custom_tooltips=np.array(labels),
                         )
    else:
        mapper.visualize(graph,
                         X_names=labels,
                         path_html=save_string + ".html",
                         title=title,
                         custom_tooltips=np.array(labels),
                         color_function_name=color_function_name,
                         color_values=color_values,
                         node_color_function=['mean', 'std', 'median', 'max'],
                         )
Ejemplo n.º 9
0
    def test_from_kmapper_simplices(self):
        km = kmapper.KeplerMapper()
        data = np.random.random((300, 5))
        lens = km.project(data)
        graph = km.map(lens, data)

        e = Extended.from_kmapper(graph, lens)

        vs = [s for s in e.simplices if len(s) == 1]
        ls = [s for s in e.simplices if len(s) == 2]
        assert len(vs) == len(graph['nodes'])
        assert len(ls) == sum(len(v) for v in graph['links'].values())
        assert len(e.simplices) == len(graph['simplices'])
Ejemplo n.º 10
0
 def execute(self, matrix):
     matrix = matrixmodule.distMatrix(matrix)
     mapper = kmapper.KeplerMapper()
     mycover = kmapper.Cover(n_cubes=self.cover_n_cubes, perc_overlap=self.cover_perc_overlap)
     mynerve = kmapper.GraphNerve(min_intersection=self.graph_nerve_min_intersection)
     original_data = matrix if self.use_original_data else None
     projected_data = mapper.fit_transform(matrix, projection=self.projection,
                                           scaler=MapperAnalysis.scalers[self.scaler], distance_matrix=False)
     graph = mapper.map(projected_data, X=original_data, clusterer=MapperAnalysis.clusterers[self.clusterer],
                        cover=mycover, nerve=mynerve, precomputed=False,
                        remove_duplicate_nodes=self.remove_duplicate_nodes)
     output_graph = mapper.visualize(graph, save_file=True)
     self.graph = output_graph
Ejemplo n.º 11
0
def main(args):
    """Entry point of the program. """

    data = np.genfromtxt(args.path, delimiter=',')
    mapper = km.KeplerMapper(verbose=0)
    lens = mapper.fit_transform(data, projection='l2norm')
    graph = mapper.map(lens,
                       data,
                       nr_cubes=20,
                       overlap_perc=0.7,
                       clusterer=sklearn.cluster.KMeans(n_clusters=2))

    print(json.dumps(graph))
Ejemplo n.º 12
0
def run_mapper(X=None, y=None, X_inverse=True, lens=None, verbose=0, **params):
    """ Wrap KeplerMapper calls
    
    Notes
    -----
    - See PCA_metadata.ipynb

    """
    # init MAPPER params
    projection = params.get('projection',
                            TSNE(perplexity=50, init='random', random_state=0))
    clusterer = params.get('clusterer', HDBSCAN(allow_single_cluster=True))
    cover = params.get('cover', km.Cover(10, 0.67))
    X_inverse = X if X_inverse is True else X_inverse

    # fit
    if lens is None:
        mapper = km.KeplerMapper(verbose=verbose - 1)
        lens = mapper.fit_transform(X, projection=projection)

    # map
    mapper = km.KeplerMapper(verbose=verbose)
    graph = mapper.map(lens, X_inverse, clusterer=clusterer, coverer=cover)

    # dG
    dG = ds.DyNeuGraph()
    dG.fit(graph, y=y)

    # save results
    results = Bunch(X=X.copy(),
                    X_inverse=X,
                    lens=lens.copy(),
                    graph=graph,
                    params=params,
                    cover=cover,
                    dG=dG,
                    G=dG.G_,
                    TCM=dG.tcm_.copy())
    return results
Ejemplo n.º 13
0
    def test_new_api_old_defaults(self):
        mapper = km.KeplerMapper()
        data = np.random.rand(100, 10)
        lens = mapper.fit_transform(data)

        _ = mapper.map(lens, data, nr_cubes=10)
        c2 = mapper.coverer

        assert c2.overlap_perc == 0.1

        _ = mapper.map(lens, data, overlap_perc=0.1)
        c2 = mapper.coverer

        assert c2.nr_cubes == 10
Ejemplo n.º 14
0
    def test_warn_old_api(self):
        """ Confirm old api works but throws warning """

        mapper = km.KeplerMapper()
        data = np.random.rand(100, 10)
        lens = mapper.fit_transform(data)

        with pytest.deprecated_call():
            graph = mapper.map(lens, data, nr_cubes=10)

        with pytest.deprecated_call():
            graph = mapper.map(lens, data, overlap_perc=10)

        with pytest.deprecated_call():
            graph = mapper.map(lens, data, nr_cubes=10, overlap_perc=0.1)
    def mapper(self, data):
        """Run the mapper algorithm on the data.

        Parameters
        ----------
        data : array-like
            The data to run the algorihthm on, can have almost any shape.

        Returns
        -------
        graph : The graph output from km.KeplerMapper(...).map(...)

        """
        # Initialize
        logging.info("Applying the mapping algorithm.")
        mapper = km.KeplerMapper(verbose=2)

        # We create a custom 1-D lens with Isolation Forest
        model = ensemble.IsolationForest()
        model.fit(data)
        isolation_forest = model.decision_function(data).reshape(
            (data.shape[0], 1))

        # Fit to and transform the data
        tsne_projection = mapper.fit_transform(
            data,
            projection=sklearn.manifold.TSNE(n_components=2,
                                             perplexity=20,
                                             init='pca'))

        lens = np.c_[isolation_forest, tsne_projection]

        # Create dictionary called 'graph' with nodes, edges and meta-information
        graph = mapper.map(tsne_projection,
                           coverer=km.Cover(10, 0.2),
                           clusterer=sklearn.cluster.DBSCAN(eps=1.0,
                                                            min_samples=2))

        color_function = np.array(
            [self._label_to_color(self.labels[i]) for i in range(len(data))])
        # Visualize it
        mapper.visualize(graph,
                         path_html="actions.html",
                         title="chunk",
                         custom_tooltips=self.tooltips,
                         color_function=color_function)

        return graph
Ejemplo n.º 16
0
def visKMapper(data: np.array, id: str):
    '''
    Bundles the functions used to calculate a KMapper visualization. Exports it as an .html object.
    '''

    mapper = km.KeplerMapper(verbose=1)  # init
    projected_data = mapper.fit_transform(
        data, projection=[0, 1])  # fit, transform data to X-Y axis
    graph = mapper.map(
        projected_data,
        data,
    )  # Create dictionary called 'graph' with nodes, edges and meta-information
    mapper.visualize(
        graph,
        path_html="make_circles_keplermapper_output" + id + ".html",
        title="make_circles(n_samples=5000, noise=0.03, factor=0.3)")
    return
Ejemplo n.º 17
0
def def_lenses_features(df, fs):

    mapper = km.KeplerMapper()

    keys = []
    values = []

    for idx, col in enumerate(fs):
        keys.append("lens_{}".format(col))
        values.append(
            mapper.fit_transform(df[fs].as_matrix(),
                                 projection=[idx],
                                 scaler=MinMaxScaler()))

    lenses_features = dict(zip(keys, values))

    return (lenses_features)
def test_kmapper_sample():
    data = np.array([[0], [1], [2]])
    lens = data

    graph = km.KeplerMapper().map(data,
                                  data,
                                  clusterer=sklearn.cluster.DBSCAN(
                                      eps=1, min_samples=0),
                                  cover=km.Cover(n_cubes=2, perc_overlap=0.5))
    nxgraph = td.kmapper_to_nxmapper(graph)
    assert len(nxgraph.edges) == 1
    assert len(nxgraph.nodes) == 2

    for _, _, data in nxgraph.edges.data():
        assert 'membership' in data

    for _, data in nxgraph.nodes.data():
        assert 'membership' in data
Ejemplo n.º 19
0
def random(args):
    data, labels = datasets.make_circles(n_samples=5000,
                                         noise=0.03,
                                         factor=0.3)

    # Initialize
    mapper = km.KeplerMapper(verbose=1)

    # Fit to and transform the data
    projected_data = mapper.fit_transform(data, projection=[0, 1])  # X-Y axis

    # Create dictionary called 'graph' with nodes, edges and meta-information
    graph = mapper.map(projected_data, data, nr_cubes=10)

    # Visualize it
    mapper.visualize(
        graph,
        path_html="out/{}.html".format(args.action),
        title="make_circles(n_samples=5000, noise=0.03, factor=0.3)")
Ejemplo n.º 20
0
def get_topological_graph(data,exp,clusterer_param,projection=True,cover=[10, 0.1]):
    
    mapper = km.KeplerMapper(verbose=0)

    clusterer_param *= np.sqrt(data.shape[1])
    clusterer = sklearn.cluster.DBSCAN(eps=clusterer_param, min_samples=1) 

    if isinstance(projection, int):
        level_set = mapper.fit_transform(exp, projection=np.arange(projection).tolist())
        cover = km.Cover(cover[0],cover[1])
    else:
        level_set = exp
        cover = km.Cover(cover[0],cover[1])

    graph = mapper.map(level_set,
                       data,
                       clusterer=clusterer,
                       cover=cover)
    return remove_duplicated_links(remove_graph_duplicates(graph))
Ejemplo n.º 21
0
def def_lenses_dimred(df, fs, get_PCA, get_isomap, get_LLE, get_MDS,
                      get_spectral_embedding, get_SVD):

    scaler = MinMaxScaler()

    mapper = km.KeplerMapper()

    X = df[fs].as_matrix()

    keys = []
    values = []

    minmax_scaler = MinMaxScaler()
    df_minmax = minmax_scaler.fit_transform(df[fs].as_matrix())

    # PCA
    if get_PCA == True:
        keys.append('lens_pca_0')
        keys.append('lens_pca_1')
        pca = mapper.fit_transform(df_minmax,
                                   projection=PCA(n_components=2),
                                   scaler=None)
        values.append(scaler.fit_transform(pca[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(pca[:, 1].reshape(-1, 1)))

    # Isomap
    if get_isomap == True:
        keys.append('lens_isomap_0')
        keys.append('lens_isomap_1')
        isomap = manifold.Isomap(n_components=2,
                                 n_neighbors=3).fit_transform(df_minmax)
        values.append(scaler.fit_transform(isomap[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(isomap[:, 1].reshape(-1, 1)))

    # Locally linear embedding
    if get_LLE == True:
        keys.append('lens_LLE_0')
        keys.append('lens_LLE_1')
        LLE = manifold.locally_linear_embedding(df_minmax,
                                                n_neighbors=3,
                                                n_components=2,
                                                random_state=0)[0]
        values.append(scaler.fit_transform(LLE[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(LLE[:, 1].reshape(-1, 1)))

    # Multi-dimensional scaling
    if get_MDS == True:
        keys.append('lens_MDS_0')
        keys.append('lens_MDS_1')
        MDS = manifold.MDS(n_components=2).fit_transform(df_minmax)
        values.append(scaler.fit_transform(MDS[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(MDS[:, 1].reshape(-1, 1)))

    # Spectral embedding
    if get_spectral_embedding == True:
        keys.append('lens_spectral_embedding_0')
        keys.append('lens_spectral_embedding_1')
        L = manifold.SpectralEmbedding(n_components=2,
                                       n_neighbors=1,
                                       random_state=0).fit_transform(df_minmax)
        values.append(scaler.fit_transform(L[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(L[:, 1].reshape(-1, 1)))

    # truncated SVD
    if get_SVD == True:
        keys.append('lens_SVD_0')
        keys.append('lens_SVD_1')
        svd = TruncatedSVD(n_components=2,
                           random_state=42).fit_transform(df_minmax)
        values.append(scaler.fit_transform(svd[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(svd[:, 1].reshape(-1, 1)))

    lenses_dimred = dict(zip(keys, values))

    return (lenses_dimred)
Ejemplo n.º 22
0
                                       n_jobs=None)

tda_lens = np.log2(data[feature['5']].to_numpy())
# min(tda_lens[:,0]) == -3.77595972578207
# max(tda_lens[:,0]) == 18.194602975157967
# min(tda_lens[:,1]) == -4.832890014164741
# max(tda_lens[:,1]) == 15.189531985610547
precfg_tda_covering_scheme = dict()
precfg_tda_covering_scheme['lens_bound_rounding0'] = 0.5
precfg_tda_covering_scheme['lens_bound_rounding1'] = 0.5
precfg_tda_covering_scheme['intvls_count0'] = 8
precfg_tda_covering_scheme['intvls_count1'] = 8
precfg_tda_covering_scheme['intvls_overlap0'] = 0.4
precfg_tda_covering_scheme['intvls_overlap1'] = 0.4

tda_covering_scheme = make_tda_covering_scheme(tda_lens,
                                               precfg_tda_covering_scheme,
                                               verbo_lvl)

tda_mapper = km.KeplerMapper(verbose=verbo_lvl)

tda_model = tda_mapper.map(X=tda_data,
                           lens=tda_lens,
                           cover=tda_covering_scheme,
                           clusterer=tda_clusterer,
                           remove_duplicate_nodes=True)

tda_mapper.visualize(tda_model,
                     path_html=filename_tda_model,
                     title=title_tda_model)
Ejemplo n.º 23
0
def def_lenses_geometry(df, fs, get_density, get_eccentricity,
                        eccentricity_exponent, get_inf_centrality, others,
                        metric):

    scaler = MinMaxScaler()

    X = df[fs].as_matrix()

    if metric == 'cosine':
        X_cosine_distance = cosine_similarity(X)
        X_dist = np.abs(X_cosine_distance - 1)
    if metric == 'euclidean':
        X_dist = euclidean_distances(X)
    if metric == 'correlation':
        X_dist = pairwise_distances(X, metric='correlation')

    keys = []
    values = []

    # density - see: https://scikit-learn.org/stable/modules/density.html
    if get_density == True:
        keys.append('lens_density')

        # calc bandwidth using Scott’s Rule, see https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html
        n = np.shape(X)[0]
        d = np.shape(X)[1]
        bandwidth = n**(-1. / (d + 4))

        # calc density
        kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(X)
        density = kde.score_samples(X)
        values.append(scaler.fit_transform(density.reshape(-1, 1)))

    # eccentricity
    if get_eccentricity == True:
        keys.append('lens_eccentricity')
        a = X_dist**eccentricity_exponent
        b = np.sum(a, axis=1)
        c = b / np.shape(X_dist)[0]
        eccentricity = c**(1 / eccentricity_exponent)
        values.append(scaler.fit_transform(eccentricity.reshape(-1, 1)))

    # inf centrality
    if get_inf_centrality == True:
        keys.append('lens_inf_centrality')
        inf_centrality = np.amax(X_dist, axis=1)
        values.append(scaler.fit_transform(inf_centrality.reshape(-1, 1)))

    mapper = km.KeplerMapper()

    if others == True:
        for metric in [
                "sum", "mean", "median", "max", "min", "std", "dist_mean",
                "l2norm"
        ]:
            keys.append("lens_{}".format(metric))
            values.append(
                mapper.fit_transform(df[fs].as_matrix(),
                                     projection=metric,
                                     scaler=MinMaxScaler()))

    lenses_geometry = dict(zip(keys, values))

    return (lenses_geometry)
Ejemplo n.º 24
0
def full(n_layer=2, n_file=2):
    #Get weights
    model = VGG16(weights='imagenet', include_top=False)
    weights = model.layers[n_layer].get_weights()[0]
    
    #Reshape weights
    s = weights.shape
    kern = weights.transpose([2,3,0,1]).reshape(s[2]*s[3], s[0]*s[0])
    kern_disp = kern.reshape(len(kern), s[0], s[0])
    
    #Normalize
    kern_means = kern.mean(axis=1)
    kern_std = kern.std(axis=1)
    
    kern_scaled = kern
    kern_scaled = np.asarray([kern_scaled[i]-kern_means[i] for i in range(len(kern_scaled))])
    kern_scaled = np.asarray([kern_scaled[i]/kern_std[i] for i in range(len(kern_scaled))])
    
    #Select top density points
    kern20nn  = top_dens_nn(kern_scaled, n=100, p=0.3)

    name = 'VGG_layer_'+str(n_file)
    
    proj_pca = PCA(n_components=2, whiten=False)
    
    mapper = km.KeplerMapper(verbose=1)    
    lens = mapper.fit_transform(kern20nn, projection=proj_pca, scaler=None, distance_matrix=False)
    
    plt.figure()
    plt.scatter(lens[:,0], lens[:,1], s=5)
    plt.title(name)
    
    V = kern20nn.std(axis=0)
    
#    d = distance.cdist(kern20nn, kern20nn)
#    Z = linkage(d, 'single')
#    plt.figure()
#    dendrogram(Z)    
#    plt.show()
#    lens = np.zeros(Z.shape[0] + 1)
#    lens[:-1] = Z[:, 2]
#    lens[-1] = d.max()
#    hst, bins = np.histogram(lens, bins=64)
#    plt.figure()
#    plt.hist(lens, bins=64)
#    z = np.nonzero(hst == 0)[0]
#    print(hst[z[0]:len(hst)].sum())
#    print(z.shape)
#    print(z[:10])
    
    graph = mapper.map(lens,
                       kern20nn,
                       #clusterer = AgglomerativeClustering(n_clusters=2, linkage='single', affinity='euclidean'),
                       clusterer = Single_linkage(),
                    #   clusterer = DBSCAN(metric=SNE(V)),
                       coverer=km.Cover(nr_cubes=30, overlap_perc=0.66),
                       )
    
    ht=mapper.visualize(graph,
                     path_html=name+".html",
                     title=name
                     )
    
    
    return graph
Ejemplo n.º 25
0
def mapper_parameter_gridsearch(df, fs, labels, metric, lenses_dict,
                                parameter_values, num_connected_components,
                                filepath):

    mapper = km.KeplerMapper()

    X = np.array(df[fs])

    # for dataframe
    df_temp = []

    #     idx = 0
    for lens1, lens2, int1, int2, pc1, pc2, eps in parameter_values:

        # Combine lenses
        lens = np.c_[lenses_dict[lens1], lenses_dict[lens2]]

        if metric == 'cosine':
            X_cosine_distance = cosine_similarity(X)
            X_dist = np.abs(X_cosine_distance - 1)
            scomplex = mapper.map(lens,
                                  X_dist,
                                  cover=km.cover.Cover(n_cubes=[int1, int2],
                                                       perc_overlap=[pc1,
                                                                     pc2]),
                                  clusterer=DBSCAN(metric='precomputed',
                                                   eps=eps,
                                                   min_samples=1),
                                  precomputed=True)
        if metric == 'euclidean':
            scomplex = mapper.map(lens,
                                  X,
                                  cover=km.cover.Cover(n_cubes=[int1, int2],
                                                       perc_overlap=[pc1,
                                                                     pc2]),
                                  clusterer=DBSCAN(metric='euclidean',
                                                   eps=eps,
                                                   min_samples=1),
                                  precomputed=False)
        if metric == 'correlation':
            scomplex = mapper.map(lens,
                                  X,
                                  cover=km.cover.Cover(n_cubes=[int1, int2],
                                                       perc_overlap=[pc1,
                                                                     pc2]),
                                  clusterer=DBSCAN(metric='correlation',
                                                   eps=eps,
                                                   min_samples=1),
                                  precomputed=False)

        # Calculate number of connected components
        n_v, n_cc = count_connected_components(scomplex)

        # Append data to list for dataframe only if the simplex has num_connected_components
        # or less connected components
        if n_cc <= num_connected_components:
            df_temp.append(
                [lens1, lens2, int1, int2, pc1, pc2, eps, n_v, n_cc])

    # Create dataframe
    print('Saving to data frame...')
    columns = [
        'lens1', 'lens2', 'lens1_n_cubes', 'lens2_n_cubes',
        'lens1_perc_overlap', 'lens2_perc_overlap', 'eps', 'n_vertices',
        'n_connected_components'
    ]
    df_sc = pd.DataFrame(data=df_temp, columns=columns)

    # save df to file
    print('Saving to file...')
    df_sc.to_csv(filepath)

    print('Done...')

    return (df_sc)
Ejemplo n.º 26
0
def getMapper():
    return kmapper.KeplerMapper(verbose=0)
Ejemplo n.º 27
0
    filters, biases = layer.get_weights()
    print(layer.name, filters.shape)
    #print(filters.shape)

#print(filters)
print(model_djia.layers[5].get_weights()[0].shape)
#print(model_djia.layers[5].get_weights()[0][4][4][63])
raw_data = []
for i in range(5):
    for j in range(5):
        for k in range(64):
            raw_data.append(model_djia.layers[14].get_weights()[0][i][j][k])
raw_data_arr = numpy.array(raw_data)
print(raw_data)
#print(len(model_djia.layers[5].get_weights()[0].flatten()))
mapper = kmapper.KeplerMapper(verbose=1)  #select value for verbose
neigh.fit(raw_data_arr)
#from sklearn.decomposition import PCA
pca = PCA(n_components=2)  # select value for n_components

#data_trans = mapper.fit_transform(filters, projection=[0,1])
projected_data = mapper.project(
    raw_data_arr, projection=pca)  #choose which projection to use

#projected_data = mapper.project(raw_data_arr, "knn_distance_5") #change which kind of projection to use
#lens should be equal to the data_trans or projected_data
#choose which clusterer to use
#choose which cover to use
simplicial_complex = mapper.map(projected_data)
mapper.visualize(simplicial_complex,
                 color_function=None,
Ejemplo n.º 28
0
            for x in imgs])
        col += 1

col = 5
for t in range(10, 60, 10):
    data[:,col] = [len(np.unique(label(threshold(x, t)))) for x in imgs]
    col += 1
'''
from scipy.misc import imresize

data = arr([imresize(img, (256, 256)) for img in imgs])
data = data.reshape(data.shape[0], -1).astype(np.float64)

import sklearn

mapper = km.KeplerMapper()
data_projected = mapper.fit_transform(
    data,
    projection=[0, 1],
    #projection='knn_distance_5',
    scaler=sklearn.preprocessing.MinMaxScaler())

graph = mapper.map(
    data_projected,
    #inverse_X=data,
    nr_cubes=10,
    #perc_overlap=0.1,
    clusterer=sklearn.cluster.DBSCAN())

_ = mapper.visualize(graph, path_html="tda_white.html", inverse_X=data)
#inverse_X_names=[
Ejemplo n.º 29
0
        if row == col:
            sq_distance_matrix[row, col] = 0.0
        else:
            sq_distance_matrix[row, col] = distance_matrix[index]
            sq_distance_matrix[col, row] = distance_matrix[index]

# In[73]:

numerical_cols = [
    sub_df.columns[pos] for pos, item in enumerate(sub_df.dtypes)
    if item in [np.float64, np.int64]
]
new_data = sub_df[numerical_cols].to_numpy()
dimensional_data = np.array([row[0] for row in new_data])
print(dimensional_data)
mapper = km.KeplerMapper(verbose=1)
graph = mapper.map(dimensional_data,
                   X=sq_distance_matrix,
                   precomputed=True,
                   cover=km.Cover(n_cubes=35, perc_overlap=0.2),
                   clusterer=sklearn.cluster.DBSCAN(algorithm='auto',
                                                    eps=0.40,
                                                    leaf_size=30,
                                                    metric='precomputed',
                                                    min_samples=3,
                                                    n_jobs=4))

# In[74]:

# Visualize it
mapper.visualize(graph,
Ejemplo n.º 30
0
def mapper():
    mapper = km.KeplerMapper(verbose=0)
    data = np.random.rand(100, 2)
    graph = mapper.map(data)
    return graph