Exemple #1
0
def eccentricity(data, exponent=1.,  metricpar={}, callback=None):
    if data.ndim==1:
        assert metricpar=={}, 'No optional parameter is allowed for a dissimilarity matrix.'
        ds = squareform(data, force='tomatrix')
        if exponent in (np.inf, 'Inf', 'inf'):
            return ds.max(axis=0)
        elif exponent==1.:
            ds = np.power(ds, exponent)
            return ds.sum(axis=0)/float(np.alen(ds))
        else:
            ds = np.power(ds, exponent)
            return np.power(ds.sum(axis=0)/float(np.alen(ds)), 1./exponent)
    else:
        progress = progressreporter(callback)
        N = np.alen(data)
        ecc = np.empty(N)
        if exponent in (np.inf, 'Inf', 'inf'):
            for i in range(N):
                ecc[i] = cdist(data[(i,),:], data, **metricpar).max()
                progress((i+1)*100//N)
        elif exponent==1.:
            for i in range(N):
                ecc[i] = cdist(data[(i,),:], data, **metricpar).sum()/float(N)
                progress((i+1)*100//N)
        else:
            for i in range(N):
                dsum = np.power(cdist(data[(i,),:], data, **metricpar),
                                exponent).sum()
                ecc[i] = np.power(dsum/float(N), 1./exponent)
                progress((i+1)*100//N)
        return ecc
Exemple #2
0
def eccentricity(data, exponent=1.,  metricpar={}, callback=None):
    if data.ndim==1:
        assert metricpar=={}, 'No optional parameter is allowed for a dissimilarity matrix.'
        ds = squareform(data, force='tomatrix')
        if exponent in (np.inf, 'Inf', 'inf'):
            return ds.max(axis=0)
        elif exponent==1.:
            ds = np.power(ds, exponent)
            return ds.sum(axis=0)/float(np.alen(ds))
        else:
            ds = np.power(ds, exponent)
            return np.power(ds.sum(axis=0)/float(np.alen(ds)), 1./exponent)
    else:
        progress = progressreporter(callback)
        N = np.alen(data)
        ecc = np.empty(N)
        if exponent in (np.inf, 'Inf', 'inf'):
            for i in range(N):
                ecc[i] = cdist(data[(i,),:], data, **metricpar).max()
                progress((i+1)*100//N)
        elif exponent==1.:
            for i in range(N):
                ecc[i] = cdist(data[(i,),:], data, **metricpar).sum()/float(N)
                progress((i+1)*100//N)
        else:
            for i in range(N):
                dsum = np.power(cdist(data[(i,),:], data, **metricpar),
                                exponent).sum()
                ecc[i] = np.power(dsum/float(N), 1./exponent)
                progress((i+1)*100//N)
        return ecc
Exemple #3
0
def nearest_neighbors_from_dm(X, k, callback=None):
    ''' This is inefficient. To be done:

        (1) Do not fully sort every row of the distance matrix but find the
        first k=lo elements.

        (2) Use the compressed distance matrix, not the square form.

        Both improvements are realized in cmappertools 1.0.5.
    '''
    progress = progressreporter(callback)
    D = squareform(X, force='tomatrix')
    N = np.alen(D)
    j = np.empty((N,k), dtype=np.intp)
    d = np.empty((N,k))
    for i, row in enumerate(D):
        j[i] = np.argsort(row)[:k]
        d[i] = D[i,j[i]]
        progress((i+1)*100//N)
    return d, j
Exemple #4
0
def nearest_neighbors_from_dm(X, k, callback=None):
    ''' This is inefficient. To be done:

        (1) Do not fully sort every row of the distance matrix but find the
        first k=lo elements.

        (2) Use the compressed distance matrix, not the square form.

        Both improvements are realized in cmappertools 1.0.5.
    '''
    progress = progressreporter(callback)
    D = squareform(X, force='tomatrix')
    N = np.alen(D)
    j = np.empty((N,k), dtype=np.intp)
    d = np.empty((N,k))
    for i, row in enumerate(D):
        j[i] = np.argsort(row)[:k]
        d[i] = D[i,j[i]]
        progress((i+1)*100//N)
    return d, j
Exemple #5
0
def Gauss_density(data, sigma, metricpar={}, callback=None):
    denom = -2.*sigma*sigma
    if data.ndim==1:
        assert metricpar=={}, ('No optional parameter is allowed for a '
                               'dissimilarity matrix.')
        ds = squareform(data, force='tomatrix')
        dd = np.exp(ds*ds/denom)

        # no normalization since the dimensionality is not known
        #dd = 1/(N*(sqrt(2*pi)*sigma)^n)*exp(-ds*ds/(2*sigma*sigma)),
        # where N=#samples, n=dimensionality

        dens = dd.sum(axis=0)
    else:
        progress = progressreporter(callback)
        N = np.alen(data)
        dens = np.empty(N)
        for i in range(N):
            d = cdist(data[(i,),:], data, **metricpar)
            dens[i] = np.exp(d*d/denom).sum()
            progress(((i+1)*100//N))
        dens /= N*np.power(np.sqrt(2*np.pi)*sigma,data.shape[1])
    return dens
Exemple #6
0
def Gauss_density(data, sigma, metricpar={}, callback=None):
    denom = -2.*sigma*sigma
    if data.ndim==1:
        assert metricpar=={}, ('No optional parameter is allowed for a '
                               'dissimilarity matrix.')
        ds = squareform(data, force='tomatrix')
        dd = np.exp(ds*ds/denom)

        # no normalization since the dimensionality is not known
        #dd = 1/(N*(sqrt(2*pi)*sigma)^n)*exp(-ds*ds/(2*sigma*sigma)),
        # where N=#samples, n=dimensionality

        dens = dd.sum(axis=0)
    else:
        progress = progressreporter(callback)
        N = np.alen(data)
        dens = np.empty(N)
        for i in range(N):
            d = cdist(data[(i,),:], data, **metricpar)
            dens[i] = np.exp(d*d/denom).sum()
            progress(((i+1)*100//N))
        dens /= N*np.power(np.sqrt(2*np.pi)*sigma,data.shape[1])
    return dens
Exemple #7
0
def do_scale_graph(M, weighting='inverse', exponent=0., maxcluster=None,
                   expand_intervals=False, verbose=True, callback=None):
    '''
    Compute the scale graph from a Mapper output.
    '''
    M.add_info(cutoff="Scale graph algorithm ({0}, '{1}', {2})".\
                   format(exponent, expand_intervals, maxcluster))

    sgd = M.scale_graph_data
    sgd.maxcluster = maxcluster
    sgd.expand_intervals = expand_intervals

    dendrogram = sgd.dendrogram
    diameter = sgd.diameter
    layers = len(dendrogram)

    # Add edges
    if verbose:
        sys.stdout.write('Add edges:')
        sys.stdout.flush()

    N2, LB2, UB2, diam2 = sgd.layerdata(0)

    Dijkstra = Layered_Dijkstra(weighting=weighting)
    Dijkstra.start(N2)

    progress = progressreporter(callback)

    for i in range(1,layers):
        N1, LB1, UB1, diam1 = N2, LB2, UB2, diam2
        N2, LB2, UB2, diam2 = sgd.layerdata(i)

        Dijkstra.next_layer(N2)
        Dijkstra.add_edge(0,0)
        for j in takewhile(lambda j: LB1[j]>=diam2, range(N1)):
            Dijkstra.add_edge(j+1,0)
        for j in takewhile(lambda j: LB2[j]>=diam1, range(N2)):
            Dijkstra.add_edge(0,j+1)

        if N1 and N2:
            s0 = ( N1 if maxcluster is None else min(N1,maxcluster) ) + 1
            t0 = ( N2 if maxcluster is None else min(N2,maxcluster) ) + 1
            startk = 1
            for j in range(1, s0):
                a = LB1[j]
                b = UB1[j-1]
                for k in range(startk, t0):
                    c = LB2[k]
                    d = UB2[k-1]
                    if c>b:
                        startk += 1
                        continue
                    if d<a: break
                    maxac = max(a,c)
                    overlap = min(b,d)-maxac
                    assert overlap>=0.
                    if maxac>0:
                        Dijkstra.add_edge(j, k,
                                          overlap, np.power(maxac, exponent))
        progress(i*100//(layers-1))

    if verbose:
        print(' {0} edges in total.'.format(Dijkstra.num_edges()))

    sgd.path, sgd.infmin = Dijkstra.shortest_path()
    if verbose:
        print('Scale graph path:\n{0}'.format(sgd.path))
    sgd.edges = Dijkstra.edges