Example #1
0
def calcH2Continuous_twotails(XXT, phe, keepArr, prev, h2coeff):

	print 'computing h2 for a two-tails ascertained study...'
	
	XXT = XXT[np.ix_(keepArr, keepArr)]
	phe = phe[keepArr]	

	t1 = stats.norm(0,1).ppf(prev)
	t2 = stats.norm(0,1).isf(prev)
	phit1 = stats.norm(0,1).pdf(t1)
	phit2 = stats.norm(0,1).pdf(t2)
	
	K1 = prev
	K2 = prev
	
	xCoeff = ((phit2*t2 - phit1*t1 + K1 + K2)**2 * (K1+K2)**2 - (phit2-phit1)**4) / (K1 + K2)**4
	intersect = ((phit2-phit1) / (K1+K2))**2
		
	pheMean = 0
	pheVar = 1
	
	x = (xCoeff * h2coeff) * XXT 
	y = np.outer((phe-pheMean)/np.sqrt(pheVar), (phe-pheMean)/np.sqrt(pheVar))
	y -= intersect
	
	y = y[np.triu_indices(y.shape[0], 1)]
	x = x[np.triu_indices(x.shape[0], 1)]
	
	slope, intercept, rValue, pValue, stdErr = stats.linregress(x,y)	
	return slope
Example #2
0
def test_syrk(k, n, dtype, rng):
    tols = tolerances[dtype]

    A = np.zeros((n, k), dtype=dtype)
    C = np.zeros((n, n), dtype=dtype)
    D = np.zeros((k, k), dtype=dtype)
    A[...] = rng.uniform(-1, 1, size=A.shape)
    C[...] = rng.uniform(-1, 1, size=C.shape)
    D[...] = rng.uniform(-1, 1, size=D.shape)

    clA, clC, clD = map(to_ocl, [A, C, D])
    a = 0.9
    b = 0.5

    try:
        blas.setup()

        # normal syrk
        up = np.triu_indices(n)
        event = blas.syrk(queue, clA, clC, alpha=a, beta=b)
        assert np.allclose(clC.get()[up], (a*np.dot(A, A.T) + b*C)[up], **tols)
        assert isinstance(event, cl.Event)

        # transposed syrk
        up = np.triu_indices(k)
        blas.syrk(queue, clA, clD, transA=True, alpha=a, beta=b)
        assert np.allclose(clD.get()[up], (a*np.dot(A.T, A) + b*D)[up], **tols)
    finally:
        blas.teardown()
Example #3
0
def calcH2Binary(XXT, phe, probs, thresholds, keepArr, prev, h2coeff):
	K = prev
	P = np.sum(phe>0) / float(phe.shape[0])
	
	XXT = XXT[np.ix_(keepArr, keepArr)]
	phe = phe[keepArr]
	
	if (thresholds is None):
		t = stats.norm(0,1).isf(K)
		phit = stats.norm(0,1).pdf(t)
		xCoeff = P*(1-P) / (K**2 * (1-K)**2) * phit**2 * h2coeff
		y = np.outer((phe-P) / np.sqrt(P*(1-P)), (phe-P) / np.sqrt(P*(1-P)))
		x = xCoeff * XXT
		
	else:
		probs = probs[keepArr]
		thresholds = thresholds[keepArr]
		Ki = K*(1-P) / (P*(1-K)) * probs / (1 + K*(1-P) / (P*(1-K))*probs - probs)
		phit = stats.norm(0,1).pdf(thresholds)	
		probsInvOuter = np.outer(probs*(1-probs), probs*(1-probs))
		y = np.outer(phe-probs, phe-probs) / np.sqrt(probsInvOuter)	
		sumProbs = np.tile(np.column_stack(probs).T, (1,probs.shape[0])) + np.tile(probs, (probs.shape[0], 1))
		Atag0 = np.outer(phit, phit) * (1 - (sumProbs)*(P-K)/(P*(1-K)) + np.outer(probs, probs)*(((P-K)/(P*(1-K)))**2)) / np.sqrt(probsInvOuter)
		B0 = np.outer(Ki + (1-Ki)*(K*(1-P))/(P*(1-K)), Ki + (1-Ki)*(K*(1-P))/(P*(1-K)))
		x = (Atag0 / B0 * h2coeff) * XXT	
	
	y = y[np.triu_indices(y.shape[0], 1)]
	x = x[np.triu_indices(x.shape[0], 1)]
	
	slope, intercept, rValue, pValue, stdErr = stats.linregress(x,y)
	return slope
    def test_triu_indices(self):
        iu1 = triu_indices(4)
        iu2 = triu_indices(4, 2)

        a = np.array([[1, 2, 3, 4],
                      [5, 6, 7, 8],
                      [9, 10, 11, 12],
                      [13, 14, 15, 16]])

        # Both for indexing:
        yield (assert_array_equal, a[iu1],
               array([1, 2,  3,  4,  6, 7, 8, 11, 12, 16]))

        # And for assigning values:
        a[iu1] = -1
        yield (assert_array_equal, a,
               array([[-1, -1, -1, -1],
                      [ 5, -1, -1, -1],
                      [ 9, 10, -1, -1],
                      [13, 14, 15, -1]])  )

        # These cover almost the whole array (two diagonals right of the main one):
        a[iu2] = -10
        yield ( assert_array_equal, a,
                array([[ -1,  -1, -10, -10],
                       [  5,  -1,  -1, -10],
                       [  9,  10,  -1,  -1],
                       [ 13,  14,  15,  -1]]) )
Example #5
0
    def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwargs):
        if (p is not None) and (n is not None) and (eta is None):
            warnings.warn('Parameters to LKJCorr have changed: shape parameter n -> eta '
                          'dimension parameter p -> n. Please update your code. '
                          'Automatically re-assigning parameters for backwards compatibility.',
                          DeprecationWarning)
            self.n = p
            self.eta = n
            eta = self.eta
            n = self.n
        elif (n is not None) and (eta is not None) and (p is None):
            self.n = n
            self.eta = eta
        else:
            raise ValueError('Invalid parameter: please use eta as the shape parameter and '
                             'n as the dimension parameter.')

        n_elem = int(n * (n - 1) / 2)
        self.mean = np.zeros(n_elem, dtype=theano.config.floatX)

        if transform == 'interval':
            transform = transforms.interval(-1, 1)

        super(LKJCorr, self).__init__(shape=n_elem, transform=transform,
                                      *args, **kwargs)
        warnings.warn('Parameters in LKJCorr have been rename: shape parameter n -> eta '
                      'dimension parameter p -> n. Please double check your initialization.',
                      DeprecationWarning)
        self.tri_index = np.zeros([n, n], dtype='int32')
        self.tri_index[np.triu_indices(n, k=1)] = np.arange(n_elem)
        self.tri_index[np.triu_indices(n, k=1)[::-1]] = np.arange(n_elem)
Example #6
0
    def mvn_msr(self, corrDS=None, abstol=1e-12, reltol=1e-12, intLb=-10, intUb=10):
        systype = self.systype
        beta = self.beta
        nls = len(self.comps)
        if corrDS is None:
            correl = self.syscorrDS[np.triu_indices(nls, 1)]
        else:
            correl = corrDS[np.triu_indices(nls, 1)]
        if corrDS is None:
            corrDS = self.syscorrDS
        i = 1
        n = 10000
        syspf0 = 0.0
        dpf = 1.0
        # while i!=0:
        # n +=10000
        # v,res,i = stats.mvn.mvndst(intLb*np.ones(nls), beta, np.zeros(nls, dtype=int), correl, [nls*n,1e-12, 1e-12])
        while i != 0:
            n += 10000
            res, i = stats.mvn.mvnun(-10 * np.ones(nls), beta, np.zeros(nls), corrDS, [nls * n, abstol, reltol])
        # if abs(res-res1)/(0.5*(res+res1))>1e-3:
        # print 'warning: abnormal difference between mvnun and mvndst results'
        if systype.lower() == "series":
            syspf = 1.0 - res
            sysbeta = -stats.norm.ppf(syspf)
            results = ReliabilityResults(sysbeta, syspf)
        elif systype.lower() == "parallel":
            syspf = res
            sysbeta = -stats.norm.ppf(syspf)
            results = ReliabilityResults(sysbeta, syspf)
        else:
            print ("mvn_msr only supports series or parallel system")
            sys.exit(0)

        return results
def scoring2B_behavior():
	t_clusters = np.zeros((600,3))
	t_clusters[0:200,0] = 1
	t_clusters[200:400,1] = 1
	t_clusters[400:,2] = 1
	t_ccm = np.dot(t_clusters,t_clusters.T)

	n_uniq = len(np.triu_indices(t_ccm.shape[0],k=1)[0])
	res = []
	concentrations = [1000,100,50,25,10,5,3,1]
	for c in concentrations:
		for i in range(50):
			ccm = np.copy(t_ccm)
			ccm[np.triu_indices(t_ccm.shape[0],k=1)] -= np.random.beta(1,c,n_uniq)
			#ccm[np.tril_indices(t_ccm.shape[0],k=-1)] = ccm[np.triu_indices(t_ccm.shape[0],k=1)]
			ccm[np.tril_indices(t_ccm.shape[0],k=-1)] = 0
			ccm = ccm + ccm.T
			np.fill_diagonal(ccm,1)
			ccm = np.abs(ccm)
			res.append([c,calculate2(ccm,t_ccm)])
	res = [map(str,x) for x in res]
	res = ['\t'.join(x) for x in res]
	f = open('scoring2B_beta.tsv', 'w')
	f.write('\n'.join(res))
	f.close()	
def insertCartesainData(gt_row, xyzuvw_mean, xyzuvw_cov):
    dim = 6
    # CART_COL_NAMES = ['X', 'Y', 'Z', 'U', 'V', 'W',
    #                   'dX', 'dY', 'dZ', 'dU', 'dV', 'dW',
    #                   'c_XY', 'c_XZ', 'c_XU', 'c_XV', 'c_XW',
    #                           'c_YZ', 'c_YU', 'c_YV', 'c_YW',
    #                                   'c_ZU', 'c_ZV', 'c_ZW',
    #                                           'c_UV', 'c_UW',
    #                                                   'c_VW']

    # fill in cartesian mean
    try:
        for col_ix, col_name in enumerate(CART_COL_NAMES[:6]):
            gt_row[col_name] = xyzuvw_mean[col_ix]
    except IndexError:
        import pdb; pdb.set_trace()

    # fill in standard deviations
    xyzuvw_stds = np.sqrt(xyzuvw_cov[np.diag_indices(dim)])
    for col_ix, col_name in enumerate(CART_COL_NAMES[6:12]):
        gt_row[col_name] = xyzuvw_stds[col_ix]

    correl_matrix = xyzuvw_cov / xyzuvw_stds / xyzuvw_stds.reshape(6, 1)
    # fill in correlations
    for col_ix, col_name in enumerate(CART_COL_NAMES[12:]):
        gt_row[col_name] = correl_matrix[
            np.triu_indices(dim, k=1)[0][col_ix],
            np.triu_indices(dim, k=1)[1][col_ix]
        ]
    def loss_function(mapping12):
        """Computes the loss function of a given mapping.

        Using the graph kernel of two sets of distance.
        """
        
        global tractography1, tractography2
        global dm1_all, dm1_all_small, dm2_all, dm2_all_small
        global kdt1, kdt2
        k = 10
        
        radius = 150  
        loss = 0.0
        for sid in np.arange(len(tractography1)):               
            #idx1 = kdt1.query_radius(dm1_all_small[sid], radius)[0]
            idx1 = kdt1.query(dm1_all_small[sid], k)[1][0]            
            dm_small1 = dm1_all[idx1][:,idx1]
            e1 = dm_small1[np.triu_indices(dm_small1.shape[0],1)]
                    
            #idx2 = kdt2.query_radius(dm2_all_small[mapping12[sid]], radius)[0]
            idx2 = kdt2.query(dm2_all_small[mapping12[sid]], k)[1][0]
            dm_small2 = dm2_all[idx2][:,idx2]
            e2 = dm_small2[np.triu_indices(dm_small2.shape[0],1)]
            
            #loss = loss + Graph_KN(e1, e2, weight=1., num_bins = 128)
            #similarity = similarity + Pyramid_KN(e1, e2, weight=1., num_bins = 128)
            loss = loss +  Pyramid_KN(e1, e2, weight=1., num_bins = 128)
            
        return loss
Example #10
0
    def find_stationary_var(amat=None, bmat=None, cmat=None):
        """Find fixed point of H = CC' + AHA' + BHB' given A, B, C.

        Parameters
        ----------
        amat, bmat, cmat : (nstocks, nstocks) arrays
            Parameter matrices

        Returns
        -------
        (nstocks, nstocks) array
            Unconditional variance matrix

        """
        nstocks = amat.shape[0]
        kwargs = {'amat': amat, 'bmat': bmat, 'ccmat': cmat.dot(cmat.T)}
        fun = partial(ParamGeneric.fixed_point, **kwargs)
        try:
            with np.errstate(divide='ignore', invalid='ignore'):
                hvar = np.eye(nstocks)
                sol = sco.fixed_point(fun, hvar[np.tril_indices(nstocks)])
                hvar[np.tril_indices(nstocks)] = sol
                hvar[np.triu_indices(nstocks, 1)] \
                    = hvar.T[np.triu_indices(nstocks, 1)]
                return hvar
        except RuntimeError:
            # warnings.warn('Could not find stationary varaince!')
            return None
Example #11
0
def get_query_clusters(points, k):
    '''
    points [n,m] - array for n points with dimention m - encoded query
    '''
    # normalize input
    points = normalize(points.astype(np.float))
    # get similarity matrix (cosine distance)
    dist = points.dot(points.T)
    # initialize variables
    n_pt = len(points)
    cluster_old, cluster_new = np.ones(n_pt), np.zeros(n_pt)
    # special case, no clustering
    if k==1 or n_pt==1:
        return np.zeros(n_pt), 1 if n_pt==1 else np.mean(dist[np.triu_indices(n_pt,k=1)])
    # randomly choose k starting centroids
    centroids = points[np.random.permutation(n_pt)[:k]]
    while not np.array_equal(cluster_old, cluster_new):
        cluster_old = cluster_new
        # get cluster index for each point
        cluster_new = np.argmax(points.dot(centroids.T), axis=1)
        # get new centroids, and within class mean distance/similarity
        centroids, in_dist = [], []
        for c in np.unique(cluster_new):
            pid = cluster_new==c
            # set new centroid as the one who has minimum total distance to rest of the points in the cluster
            cid = np.argmax(np.sum(dist[np.ix_(pid, pid)], axis=1))
            centroids.append(points[pid][cid])
            in_dist.append(1 if sum(pid)==1 else np.mean(dist[np.ix_(pid,pid)][np.triu_indices(sum(pid),k=1)]))
        centroids = np.array(centroids)
        # traditional way to get new centroid, not working well for cosine distance
#         centroids = normalize([np.mean(points[cluster_new==c], axis=0) for c in np.unique(cluster_new)])

    return cluster_new, np.mean(in_dist), centroids
def dynamically_bin(hic1, hic2, chrom, binbounds):
    unbinned1, map1 = hic1.cis_heatmap(chrom, start=binbounds[0, 0], stop=binbounds[-1, 1], datatype='fend',
                                 arraytype='full', returnmapping=True)
    unbinned2, map2 = hic2.cis_heatmap(chrom, start=binbounds[0, 0], stop=binbounds[-1, 1], datatype='fend',
                                 arraytype='full', returnmapping=True)
    map1[:, 2] = (map1[:, 0] + map1[:, 1])
    map2[:, 2] = (map2[:, 0] + map2[:, 1])
    allmap = numpy.vstack((map1, map2))
    allmap = allmap[numpy.argsort(allmap[:, 2]), :]
    indices1 = numpy.searchsorted(allmap[:, 2], map1[:, 2])
    indices1_1 = (indices1.reshape(-1, 1) * allmap.shape[0] + indices1.reshape(1, -1)).ravel()
    indices2 = numpy.searchsorted(allmap[:, 2], map2[:, 2])
    indices2_1 = (indices2.reshape(-1, 1) * allmap.shape[0] + indices2.reshape(1, -1)).ravel()
    unbinned = numpy.zeros((allmap.shape[0], allmap.shape[0], 2), dtype=numpy.float32)
    unbinned[:, :, 0] += numpy.bincount(indices1_1, minlength=allmap.shape[0] ** 2,
                                        weights=unbinned1[:, :, 0].ravel()).reshape(allmap.shape[0], -1)
    unbinned[:, :, 1] += numpy.bincount(indices1_1, minlength=allmap.shape[0] ** 2,
                                        weights=unbinned1[:, :, 1].ravel()).reshape(allmap.shape[0], -1)
    unbinned[:, :, 0] += numpy.bincount(indices2_1, minlength=allmap.shape[0] ** 2,
                                        weights=unbinned2[:, :, 0].ravel()).reshape(allmap.shape[0], -1)
    unbinned[:, :, 1] += numpy.bincount(indices2_1, minlength=allmap.shape[0] ** 2,
                                        weights=unbinned2[:, :, 1].ravel()).reshape(allmap.shape[0], -1)
    indices = numpy.triu_indices(allmap.shape[0], 1)
    unbinned = unbinned[indices[0], indices[1], :]
    binned, binmap = hic1.cis_heatmap(chrom, binbounds=binbounds, datatype='fend', arraytype='full',
                                      returnmapping=True)
    binned += hic2.cis_heatmap(chrom, binbounds=binbounds, datatype='fend', arraytype='full')
    indices = numpy.triu_indices(binbounds.shape[0], 1)
    upper = binned[indices[0], indices[1], :]
    hifive.hic_binning.dynamically_bin_cis_array(unbinned, allmap, upper, binmap,
                                                 expansion_binsize=0, minobservations=25)
    binned[indices[0], indices[1], :] = upper
    binned[indices[1], indices[0], :] = upper
    return binned
    def loss_function2(mapping12):
        """Computes the loss function of a given mapping.

        Using the graph kernel of two sets of distance.
        """
        global dis_1, dis_2
        global kdt_1, kdt_2
        global dm1, dm2, dm1_all, dm2_all
        
        k = 15
        
        radius = 100  
        similarity = 0.0
        for sid in np.arange(len(pro_1)):               
            idx1 = kdt_1.query_radius(dm1[sid], radius)[0]
            #idx1 = kdt_1.query(dm1[sid], k)[1][0]            
            dm_small1 = dm1_all[idx1][:,idx1]
            e1 = dm_small1[np.triu_indices(dm_small1.shape[0],1)]
                    
            idx2 = kdt_2.query_radius(dis_2[mapping12[sid]], radius)[0]
            #idx2 = kdt_2.query(dis_2[mapping12[sid]], k)[1][0]
            dm_small2 = dm2_all[idx2][:,idx2]
            e2 = dm_small2[np.triu_indices(dm_small2.shape[0],1)]
            
            #loss = loss + Graph_KN(e1, e2, weight=1., num_bins = 128)
            similarity = similarity + Pyramid_KN(e1, e2, weight=1., num_bins = 128)
            
        return 1./similarity
Example #14
0
def sim_matrix_within_group_means(matrix, n1):
    """
    Computes the mean of the upper triangle (k=1) for the blocks
    (0,n-1)*(0,n-1) and (n,2n-1)*(n,2n-1), and their difference
    (for convenience).

    Parameters
    ----------
    matrix : 2D symmetric numpy array
        1 or 2 dimensional numpy array, the n1 first indices in
        the zeroth axis of the array, should correspond to the
        values of the first group.
        The value of ``matrix[i][j]`` should correspond to
    n1 : int
        the number of elements in the first group

    Returns
    -------
    mean1 : float
        the average similarity between members in the first group
    mean2: float
        the average similarity between members in the second group
    mean1-mean2: float
        just mean1-mean2 (as a convenience for stat. testing)
    """
    n2 = matrix.shape[0] - n1
    indices1 = np.triu_indices(n1, k=1)
    indices2base = np.triu_indices(n2, k=1)
    indices2I = indices2base[0].copy() + n1
    indices2J = indices2base[1].copy() + n1
    indices2 = (indices2I, indices2J)
    mean1 = np.average(matrix[indices1])
    mean2 = np.average(matrix[indices2])
    return mean1, mean2, mean1 - mean2
Example #15
0
def example_one():
    """
    Generates a set of sample data for the
    examples page of the hetaira web tool.
    """
    
    np.random.seed(5)
    ids = ['Pr'] + list(ascii_lowercase) + ['Sp']
    
    # make some data where all activities are the same
    data = np.ones((26,26))
    
    # make some random activites to pull from
    y = np.random.uniform(1000, 2500, (26,26))
    
    # this will replace the ones with numbers from the uniform
    # distribution, increasing by one at each column
    # using the upper triangular matrix
    data[np.triu_indices(26)] = y[np.triu_indices(26)]

    # stack a perfectly promiscuous and a perfectly (almost)
    # specific column on either side of the data
    data = np.hstack((np.full((26,1), 1e-10), data, np.ones((26,1))))
    data[0,0] = 100
    descriptors = None
    example = Promiscuity(ids, np.fliplr(data), descriptors)
    return example.hetaira_results()
 def normalization(self):
     """
     Normalize the equilibrium steady state correlations according to
     Eq 76 in Lorenzo's writeup
     """
     N = self.latsize
     #First disconnect
     self.disconnect(self.steady_state)
     norm_1 = N+np.sum(self.steady_state[2*N:3*N])
     sxxpsyy = self.steady_state[3*N:].reshape(3,3,N,N)[0,0,:,:] +\
         self.steady_state[3*N:].reshape(3,3,N,N)[1,1,:,:]
     sxymsyx = self.steady_state[3*N:].reshape(3,3,N,N)[0,1,:,:] -\
         self.steady_state[3*N:].reshape(3,3,N,N)[1,0,:,:]
     norms = []
     for kvec in self.kvecs:
         argmat = np.zeros((N,N))
         for (m,n) in combinations(np.arange(N),2):
             argmat[m,n] = kvec.dot(self.atoms[m].coords-self.atoms[n].coords)
         norm_2 = np.sum(\
                 np.cos(argmat[np.triu_indices(N, k=1)]) *\
                     sxxpsyy[np.triu_indices(N, k=1)] +\
                 np.sin(argmat[np.triu_indices(N, k=1)]) *\
                     sxymsyx[np.triu_indices(N, k=1)])
         norms.append(0.5*(norm_1+norm_2))
     #Reconnect before exit
     self.reconnect(self.steady_state)
     return np.array(norms).flatten()
Example #17
0
def calcH2Continuous(XXT, phe, keepArr, prev, h2coeff):
	t = stats.norm(0,1).isf(prev)
	phit = stats.norm(0,1).pdf(t)
	
	K1 = 1 - prev
	K2 = 1 - K1
	P = np.sum(phe<t) / float(phe.shape[0])	
	P2 = 1.0
	P1 = K2*P2*P / (K1*(1-P))
	R = P2 / P1
	
	XXT = XXT[np.ix_(keepArr, keepArr)]
	phe = phe[keepArr]
	
	xCoeff = (((R-1)*phit*t + K1 + R*K2)**2 * (K1+R*K2)**2 - ((R-1)*phit)**4) / (K1 + R*K2)**4
	x = (xCoeff * h2coeff) * XXT 
	pheMean = 0
	pheVar = 1	
	y = np.outer((phe-pheMean) / np.sqrt(pheVar), (phe-pheMean)/np.sqrt(pheVar))
	y -= ((R-1)*phit / (K1+R*K2))**2
	
	y = y[np.triu_indices(y.shape[0], 1)]
	x = x[np.triu_indices(x.shape[0], 1)]
	
	slope, intercept, rValue, pValue, stdErr = stats.linregress(x,y)
	return slope
Example #18
0
    def __init__(self, endmembers, alphas, energy_interaction, volume_interaction=None, entropy_interaction=None):

        self.n_endmembers = len(endmembers)

        # Create array of van Laar parameters
        self.alphas = np.array(alphas)

        # Create 2D arrays of interaction parameters
        self.We = np.triu(2. / (self.alphas[:, np.newaxis] + self.alphas), 1)
        self.We[np.triu_indices(self.n_endmembers, 1)] *= np.array([i for row in energy_interaction
                                                                for i in row])

        if entropy_interaction is not None:
            self.Ws = np.triu(2. / (self.alphas[:, np.newaxis] + self.alphas), 1)
            self.Ws[np.triu_indices(self.n_endmembers, 1)] *= np.array([i for row in entropy_interaction
                                                                        for i in row])
        else:
            self.Ws = np.zeros((self.n_endmembers, self.n_endmembers))

        if volume_interaction is not None:
            self.Wv = np.triu(2. / (self.alphas[:, np.newaxis] + self.alphas), 1)
            self.Wv[np.triu_indices(self.n_endmembers, 1)] *= np.array([i for row in volume_interaction
                                                                        for i in row])
        else:
            self.Wv = np.zeros((self.n_endmembers, self.n_endmembers))


        # initialize ideal solution model
        IdealSolution.__init__(self, endmembers)
Example #19
0
def angles_and_contour_lengths(spline,deriv,
                               min_change_px=0,max_change_px=np.inf):
    """
    gets Cos(Theta(i)) and L(i), where i runs along the spline order given,
    and L is the contour length between segments chosen at index i

    Args:
        spline: tuple of x_spline,y_spline -- x and y values of the line, size N
        deriv: the continuous derivative of spline, size N
        <min/max>_change_px: the minimum and maximum pixel changes
    Returns:
        tuple of angle_info object, L0_px
    """
    # get the x and y coordinates of the spline
    x_spline,y_spline = spline
    x_deriv,y_deriv = deriv
    deriv_unit_vector = np.array((x_deriv,y_deriv))
    deriv_unit_vector /= np.sqrt(np.sum(np.abs(deriv_unit_vector**2),axis=0))
    assert ((np.sum(deriv_unit_vector**2,axis=0) -1) < 1e-6).all() , \
        "Unit vectors not correct"
    # POST: unit vector are normalized, |v| = 1
    dx_spline = np.array([0] + list(np.diff(x_spline)))
    dy_spline = np.array([0] + list(np.diff(y_spline)))
    # d_spline(i) is the change from i-i to i (zero if i=0)
    d_spline = np.sqrt(dx_spline**2 + dy_spline**2)
    assert (dx_spline <= d_spline).all()
    contour_lengths = np.cumsum(d_spline)
    L0 = contour_lengths[-1]
    n = x_spline.shape[0]
    contour_length_matrix = _difference_matrix(contour_lengths,contour_lengths)
    dx_deriv = deriv_unit_vector[0, :]
    dy_deriv = deriv_unit_vector[1, :]
    angle2 = np.arctan2(dy_deriv, dx_deriv)
    angle_diff_matrix = _difference_matrix(angle2.T, angle2.T)
    # normalize to 0 to 2*pi
    where_le_0 = np.where(angle_diff_matrix < 0)
    angle_diff_matrix[where_le_0] += 2 * np.pi
    assert ((angle_diff_matrix >= 0) & (angle_diff_matrix <= 2*np.pi)).all()
    # POST: angles calculated correctly...
    # only look at the upper triangular part
    idx_upper_tri = np.triu_indices(n)
    idx_upper_tri_no_diag =np.triu_indices(n,k=1)
    # upper diagonal should have >0 contour length
    assert (contour_length_matrix[idx_upper_tri_no_diag] > 0).all() , \
        "Contour lengths should be positive"
    # POST: contour lengths and angles make sense; we only want upper triangular
    # (*including* the trivial 0,0 point along the diagonal)
    contour_length_matrix_check_valid = contour_length_matrix[idx_upper_tri]
    # POST: matrix is filled in, determine where the value are valid
    ok_idx = np.where( (contour_length_matrix_check_valid > min_change_px) &
                       (contour_length_matrix_check_valid < max_change_px))
    sanit = lambda x: x[idx_upper_tri][ok_idx].flatten()
    sort_idx = np.argsort(sanit(contour_length_matrix))
    sanit_and_sort = lambda x: sanit(x)[sort_idx]
    # return everything sorted as per sort_idx
    flat_L = sanit_and_sort(contour_length_matrix)
    flat_angle = np.arccos(np.cos(sanit_and_sort(angle_diff_matrix)))
    to_ret = angle_info(theta=flat_angle, L_px=flat_L)
    return to_ret,L0
Example #20
0
File: cluster.py Project: ctw/ptsa
def simple_neighbors_1d(n):
    """
    Return connectivity for simple 1D neighbors.
    """
    c = np.zeros((n,n))
    c[np.triu_indices(n,1)] = 1
    c[np.triu_indices(n,2)] = 0
    return c
def coefs2mats(coefs, n=8):
    const = coefs[0]
    jac = coefs[1:n+1]
    hes = np.zeros((n,n))
    hes[np.triu_indices(n)] = hes.T[np.triu_indices(n)] = coefs[n+1:]
    
    hes[np.diag_indices(n)] *= 2
    return const, jac, hes
 def from_integral(self, integral):
     Z = integral[0]
     m = integral[1: (self.dim + 1)] / Z
     V = np.zeros((self.dim, self.dim))
     idx = np.triu_indices(self.dim)
     V[idx] = integral[(self.dim + 1):] / Z
     V.T[np.triu_indices(self.dim)] = V[idx]
     V -= np.dot(m.reshape(m.size, 1), m.reshape(1, m.size))
     return Gaussian(m, V, Z=Z)
Example #23
0
def untri(vec, k=0, fill=0):
    # solution of n (n + 1) / 2 = len(vec)
    n = (np.sqrt(1 + 8 * len(vec)) - 1) / 2
    n += k
    m = np.empty((n, n))
    m.fill(fill)
    m[np.triu_indices(n, k=k)] = vec
    m.T[np.triu_indices(n, k=k)] = vec
    return m
Example #24
0
 def corrComp(dmatA, dmatB, method):
     n = dmatB.shape[0]
     if method == 'pearson':
         rho, p = stats.pearsonr(dmatA[np.triu_indices(n, k=1)], dmatB[np.triu_indices(n, k=1)])
     elif method == 'spearman':
         rho, p = stats.spearmanr(dmatA[np.triu_indices(n, k=1)], dmatB[np.triu_indices(n, k=1)])
     else:
         raise ValueError('Must specify method as "pearson" or "spearman"')
     return rho
Example #25
0
    def __init__(self, n, p, *args, **kwargs):
        self.n = n
        self.p = p
        n_elem = p * (p - 1) / 2
        self.mean = np.zeros(n_elem)
        super(LKJCorr, self).__init__(shape=n_elem, *args, **kwargs)

        self.tri_index = np.zeros([p, p], dtype=int)
        self.tri_index[np.triu_indices(p, k=1)] = np.arange(n_elem)
        self.tri_index[np.triu_indices(p, k=1)[::-1]] = np.arange(n_elem)
def prior(value, n_pix = n_pix, max_value = K, hyper_params = Lambda[np.triu_indices(n_pix)]):
    """2nd order prior for object maps"""
    #num_pairs = hyper_params.size
    if (np.min(value) < 1) or (np.max(value) > max_value):
        return -np.Inf
    else:
        on_offs = outer_map(value, n_pix, max_value)
        on_offs = on_offs[np.triu_indices(n_pix)].astype('int')
    
    return pm.bernoulli_like(on_offs, hyper_params.ravel())
Example #27
0
    def scrape_args(self, records, scale=1, guide_tree=None, niters=10, keep_topology=False):
        # local lists
        distances = []
        variances = []
        headers = []
        for rec in records:
            distances.append(rec.parameters.partitions.distances)
            variances.append(rec.parameters.partitions.variances)
            headers.append(rec.get_names())

        num_matrices = len(records)
        label_set = reduce(lambda x, y: x.union(y), (set(l) for l in headers))
        labels_len = len(label_set)

        # labels string can be built straight away
        labels_string = '{0}\n{1}\n'.format(labels_len, ' '.join(label_set))

        # distvar and genome_map need to be built up
        distvar_list = [str(num_matrices)]
        genome_map_list = ['{0} {1}'.format(num_matrices, labels_len)]

        # build up lists to turn into strings
        for i in range(num_matrices):
            labels = headers[i]
            dim = len(labels)
            dmatrix = np.array(distances[i])
            vmatrix = np.array(variances[i])
            matrix = np.zeros(dmatrix.shape)
            matrix[np.triu_indices(len(dmatrix), 1)] = dmatrix[np.triu_indices(len(dmatrix), 1)]
            matrix[np.tril_indices(len(vmatrix), -1)] = vmatrix[np.tril_indices(len(vmatrix), -1)]
            if scale:
                matrix[np.triu_indices(dim, 1)] *= scale
                matrix[np.tril_indices(dim, -1)] *= scale * scale

            if isinstance(matrix, np.ndarray):
                matrix_string = '\n'.join([' '.join(str(x) for x in row)
                                           for row in matrix]) + '\n'
            else:
                matrix_string = matrix
            distvar_list.append('{0} {0} {1}\n{2}'.format(dim, i + 1,
                                                          matrix_string))
            genome_map_entry = ' '.join((str(labels.index(lab) + 1)
                                         if lab in labels else '-1')
                                        for lab in label_set)
            genome_map_list.append(genome_map_entry)

        distvar_string = '\n'.join(distvar_list)
        genome_map_string = '\n'.join(genome_map_list)

        if guide_tree is None:
            guide_tree = Tree.new_iterative_rtree(labels_len, names=label_set, rooted=True)

        tree_string = guide_tree.scale(scale).newick.replace('\'', '')

        return distvar_string, genome_map_string, labels_string, tree_string, niters, keep_topology
Example #28
0
def corrcoef(matrix):
    r = np.corrcoef(matrix)
    rf = r[np.triu_indices(r.shape[0], 1)]
    df = matrix.shape[1] - 2
    ts = rf * rf * (df / (1 - rf * rf))
    pf = betai(0.5 * df, 0.5, df / (df + ts))
    p = np.zeros(shape=r.shape)
    p[np.triu_indices(p.shape[0], 1)] = pf
    p[np.tril_indices(p.shape[0], -1)] = pf
    p[np.diag_indices(p.shape[0])] = np.ones(p.shape[0])
    return r, p
	def __init__(self, n, p,s=2,structure = (), *args, **kwargs):
		self.n = n
		self.p = p
		self.s = s
		n_elem = structure[0]#int(p * (p - 1) / 2)
		self.mean = np.zeros((s,n_elem))
		super(LKJCorr_mult_2_structure, self).__init__(shape=(s,n_elem), *args, **kwargs)

		self.tri_index = np.zeros([p, p], dtype=int)
		self.tri_index[np.triu_indices(p, k=1)] = structure[1]
		self.tri_index[np.triu_indices(p, k=1)[::-1]] = structure[1]
Example #30
0
def _get_lvec(label_vals, pivots, scales, derivs):
    """
    Constructs a label vector for an arbitrary number of labels
    Assumes that our model is quadratic in the labels
    
    Comment: this is really slow, but we will only have to compute it once!

    Parameters
    ----------
    label_vals: numpy ndarray, shape (nstars, nlabels)
        labels 
    pivots: numpy ndarray, shape (nlabels, )
        offset we subtract from the label_vals
    scales: numpy ndarray, shape (nlabels, )
        scale we divide out of the label_vals
    derivs: return also the derivatives of the vector wrt the labels

    Returns
    -------
    lvec: numpy ndarray
        label vector
    dlvec_dl: numpy ndarray (if derivs)
        label vector derivatives
        
    Notes
    --------
    lvec_derivs and lvec is now in units of the scaled labels! 
    """
    if len(label_vals.shape) == 1:
        label_vals = np.array([label_vals])
    nlabels = label_vals.shape[1]
    nstars = label_vals.shape[0]
    # specialized to second-order model
    linear_offsets = (label_vals - pivots[None, :]) / scales[None, :]
    quadratic_offsets = np.array([np.outer(m, m)[np.triu_indices(nlabels)]
                                  for m in (linear_offsets)])
    ones = np.ones((nstars, 1))
    lvec = np.hstack((ones, linear_offsets, quadratic_offsets))
    if not derivs:
        return lvec
    ones_derivs = np.zeros((nstars, 1, nlabels))
    linear_derivs = np.zeros((nstars, nlabels, nlabels))
    for i in range(nstars):
        linear_derivs[i] = np.eye(nlabels) 
    quadratic_derivs = np.zeros((nstars, len(quadratic_offsets[1]), nlabels))
    for n in range(nstars):
        for k in range(nlabels): 
            foo = np.zeros((nlabels, nlabels))
            foo[k, :] = linear_offsets[n]
            foo[:, k] = linear_offsets[n]
            quadratic_derivs[n, :, k] = np.array(foo[np.triu_indices(nlabels)]) 
    lvec_derivs = np.hstack((ones_derivs, linear_derivs, quadratic_derivs))
    
    return lvec, lvec_derivs
def linkage_tree(
    X,
    connectivity=None,
    n_clusters=None,
    linkage="complete",
    affinity="euclidean",
    return_distance=False,
):
    """Linkage agglomerative clustering based on a Feature matrix.

    The inertia matrix uses a Heapq-based representation.

    This is the structured version, that takes into account some topological
    structure between samples.

    Read more in the :ref:`User Guide <hierarchical_clustering>`.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Feature matrix representing `n_samples` samples to be clustered.

    connectivity : sparse matrix, default=None
        Connectivity matrix. Defines for each sample the neighboring samples
        following a given structure of the data. The matrix is assumed to
        be symmetric and only the upper triangular half is used.
        Default is `None`, i.e, the Ward algorithm is unstructured.

    n_clusters : int, default=None
        Stop early the construction of the tree at `n_clusters`. This is
        useful to decrease computation time if the number of clusters is
        not small compared to the number of samples. In this case, the
        complete tree is not computed, thus the 'children' output is of
        limited use, and the 'parents' output should rather be used.
        This option is valid only when specifying a connectivity matrix.

    linkage : {"average", "complete", "single"}, default="complete"
        Which linkage criteria to use. The linkage criterion determines which
        distance to use between sets of observation.
            - "average" uses the average of the distances of each observation of
              the two sets.
            - "complete" or maximum linkage uses the maximum distances between
              all observations of the two sets.
            - "single" uses the minimum of the distances between all
              observations of the two sets.

    affinity : str or callable, default='euclidean'
        Which metric to use. Can be 'euclidean', 'manhattan', or any
        distance known to paired distance (see metric.pairwise).

    return_distance : bool, default=False
        Whether or not to return the distances between the clusters.

    Returns
    -------
    children : ndarray of shape (n_nodes-1, 2)
        The children of each non-leaf node. Values less than `n_samples`
        correspond to leaves of the tree which are the original samples.
        A node `i` greater than or equal to `n_samples` is a non-leaf
        node and has children `children_[i - n_samples]`. Alternatively
        at the i-th iteration, children[i][0] and children[i][1]
        are merged to form node `n_samples + i`.

    n_connected_components : int
        The number of connected components in the graph.

    n_leaves : int
        The number of leaves in the tree.

    parents : ndarray of shape (n_nodes, ) or None
        The parent of each node. Only returned when a connectivity matrix
        is specified, elsewhere 'None' is returned.

    distances : ndarray of shape (n_nodes-1,)
        Returned when `return_distance` is set to `True`.

        distances[i] refers to the distance between children[i][0] and
        children[i][1] when they are merged.

    See Also
    --------
    ward_tree : Hierarchical clustering with ward linkage.
    """
    X = np.asarray(X)
    if X.ndim == 1:
        X = np.reshape(X, (-1, 1))
    n_samples, n_features = X.shape

    linkage_choices = {
        "complete": _hierarchical.max_merge,
        "average": _hierarchical.average_merge,
        "single": None,
    }  # Single linkage is handled differently
    try:
        join_func = linkage_choices[linkage]
    except KeyError as e:
        raise ValueError(
            "Unknown linkage option, linkage should be one of %s, but %s was given"
            % (linkage_choices.keys(), linkage)
        ) from e

    if affinity == "cosine" and np.any(~np.any(X, axis=1)):
        raise ValueError("Cosine affinity cannot be used when X contains zero vectors")

    if connectivity is None:
        from scipy.cluster import hierarchy  # imports PIL

        if n_clusters is not None:
            warnings.warn(
                "Partial build of the tree is implemented "
                "only for structured clustering (i.e. with "
                "explicit connectivity). The algorithm "
                "will build the full tree and only "
                "retain the lower branches required "
                "for the specified number of clusters",
                stacklevel=2,
            )

        if affinity == "precomputed":
            # for the linkage function of hierarchy to work on precomputed
            # data, provide as first argument an ndarray of the shape returned
            # by sklearn.metrics.pairwise_distances.
            if X.shape[0] != X.shape[1]:
                raise ValueError(
                    f"Distance matrix should be square, got matrix of shape {X.shape}"
                )
            i, j = np.triu_indices(X.shape[0], k=1)
            X = X[i, j]
        elif affinity == "l2":
            # Translate to something understood by scipy
            affinity = "euclidean"
        elif affinity in ("l1", "manhattan"):
            affinity = "cityblock"
        elif callable(affinity):
            X = affinity(X)
            i, j = np.triu_indices(X.shape[0], k=1)
            X = X[i, j]
        if (
            linkage == "single"
            and affinity != "precomputed"
            and not callable(affinity)
            and affinity in METRIC_MAPPING
        ):

            # We need the fast cythonized metric from neighbors
            dist_metric = DistanceMetric.get_metric(affinity)

            # The Cython routines used require contiguous arrays
            X = np.ascontiguousarray(X, dtype=np.double)

            mst = _hierarchical.mst_linkage_core(X, dist_metric)
            # Sort edges of the min_spanning_tree by weight
            mst = mst[np.argsort(mst.T[2], kind="mergesort"), :]

            # Convert edge list into standard hierarchical clustering format
            out = _hierarchical.single_linkage_label(mst)
        else:
            out = hierarchy.linkage(X, method=linkage, metric=affinity)
        children_ = out[:, :2].astype(int, copy=False)

        if return_distance:
            distances = out[:, 2]
            return children_, 1, n_samples, None, distances
        return children_, 1, n_samples, None

    connectivity, n_connected_components = _fix_connectivity(
        X, connectivity, affinity=affinity
    )
    connectivity = connectivity.tocoo()
    # Put the diagonal to zero
    diag_mask = connectivity.row != connectivity.col
    connectivity.row = connectivity.row[diag_mask]
    connectivity.col = connectivity.col[diag_mask]
    connectivity.data = connectivity.data[diag_mask]
    del diag_mask

    if affinity == "precomputed":
        distances = X[connectivity.row, connectivity.col].astype(np.float64, copy=False)
    else:
        # FIXME We compute all the distances, while we could have only computed
        # the "interesting" distances
        distances = paired_distances(
            X[connectivity.row], X[connectivity.col], metric=affinity
        )
    connectivity.data = distances

    if n_clusters is None:
        n_nodes = 2 * n_samples - 1
    else:
        assert n_clusters <= n_samples
        n_nodes = 2 * n_samples - n_clusters

    if linkage == "single":
        return _single_linkage_tree(
            connectivity,
            n_samples,
            n_nodes,
            n_clusters,
            n_connected_components,
            return_distance,
        )

    if return_distance:
        distances = np.empty(n_nodes - n_samples)
    # create inertia heap and connection matrix
    A = np.empty(n_nodes, dtype=object)
    inertia = list()

    # LIL seems to the best format to access the rows quickly,
    # without the numpy overhead of slicing CSR indices and data.
    connectivity = connectivity.tolil()
    # We are storing the graph in a list of IntFloatDict
    for ind, (data, row) in enumerate(zip(connectivity.data, connectivity.rows)):
        A[ind] = IntFloatDict(
            np.asarray(row, dtype=np.intp), np.asarray(data, dtype=np.float64)
        )
        # We keep only the upper triangular for the heap
        # Generator expressions are faster than arrays on the following
        inertia.extend(
            _hierarchical.WeightedEdge(d, ind, r) for r, d in zip(row, data) if r < ind
        )
    del connectivity

    heapify(inertia)

    # prepare the main fields
    parent = np.arange(n_nodes, dtype=np.intp)
    used_node = np.ones(n_nodes, dtype=np.intp)
    children = []

    # recursive merge loop
    for k in range(n_samples, n_nodes):
        # identify the merge
        while True:
            edge = heappop(inertia)
            if used_node[edge.a] and used_node[edge.b]:
                break
        i = edge.a
        j = edge.b

        if return_distance:
            # store distances
            distances[k - n_samples] = edge.weight

        parent[i] = parent[j] = k
        children.append((i, j))
        # Keep track of the number of elements per cluster
        n_i = used_node[i]
        n_j = used_node[j]
        used_node[k] = n_i + n_j
        used_node[i] = used_node[j] = False

        # update the structure matrix A and the inertia matrix
        # a clever 'min', or 'max' operation between A[i] and A[j]
        coord_col = join_func(A[i], A[j], used_node, n_i, n_j)
        for col, d in coord_col:
            A[col].append(k, d)
            # Here we use the information from coord_col (containing the
            # distances) to update the heap
            heappush(inertia, _hierarchical.WeightedEdge(d, k, col))
        A[k] = coord_col
        # Clear A[i] and A[j] to save memory
        A[i] = A[j] = 0

    # Separate leaves in children (empty lists up to now)
    n_leaves = n_samples

    # # return numpy array for efficient caching
    children = np.array(children)[:, ::-1]

    if return_distance:
        return children, n_connected_components, n_leaves, parent, distances
    return children, n_connected_components, n_leaves, parent
    def build_eom_matrices(self,
                           excitations_list,
                           q_commutators,
                           w_commutators,
                           m_commutators,
                           v_commutators,
                           available_entry,
                           wave_fn,
                           quantum_instance=None):
        """Compute M, V, Q and W matrices.

        Args:
            excitations_list (list): single excitations list + double excitation list
            q_commutators (dict): key: a string of matrix indices; value: the commutators for Q matrix
            w_commutators (dict): key: a string of matrix indices; value: the commutators for W matrix
            m_commutators (dict): key: a string of matrix indices; value: the commutators for M matrix
            v_commutators (dict): key: a string of matrix indices; value: the commutators for V matrix
            available_entry (int): number of entries in the matrix
            wave_fn (QuantumCircuit or numpy.ndarray): the circuit generated wave function for the ground state energy
            quantum_instance (QuantumInstance): a quantum instance with configured settings

        Returns:
            numpy.ndarray: M matrix
            numpy.ndarray: V matrix
            numpy.ndarray: Q matrix
            numpy.ndarray: W matrix

        Raises:
            AquaError: wrong setting for wave_fn and quantum_instance
        """
        if isinstance(wave_fn, QuantumCircuit) and quantum_instance is None:
            raise AquaError(
                "quantum_instance is required when wavn_fn is a QuantumCircuit."
            )

        size = len(excitations_list)
        logger.info('EoM matrix size is {}x{}.'.format(size, size))

        # get all to-be-processed index
        if self._is_eom_matrix_symmetric:
            mus, nus = np.triu_indices(size)
        else:
            mus, nus = np.indices((size, size))
            mus = np.asarray(mus.flat)
            nus = np.asarray(nus.flat)

        m_mat = np.zeros((size, size), dtype=complex)
        v_mat = np.zeros((size, size), dtype=complex)
        q_mat = np.zeros((size, size), dtype=complex)
        w_mat = np.zeros((size, size), dtype=complex)
        m_mat_std, v_mat_std, q_mat_std, w_mat_std = 0, 0, 0, 0

        if quantum_instance is not None:

            circuit_names = []
            circuits = []
            for idx in range(len(mus)):
                mu = mus[idx]
                nu = nus[idx]

                for op in [
                        q_commutators[mu][nu], w_commutators[mu][nu],
                        m_commutators[mu][nu], v_commutators[mu][nu]
                ]:
                    if op is not None and not op.is_empty():
                        curr_circuits = op.construct_evaluation_circuit(
                            wave_function=wave_fn,
                            statevector_mode=quantum_instance.is_statevector)
                        for c in curr_circuits:
                            if c.name not in circuit_names:
                                circuits.append(c)
                                circuit_names.append(c.name)

            result = quantum_instance.execute(circuits)

            # evaluate results
            for idx in range(len(mus)):
                mu = mus[idx]
                nu = nus[idx]

                def _get_result(op):
                    mean, std = 0.0, 0.0
                    if op is not None and not op.is_empty():
                        mean, std = op.evaluate_with_result(
                            result=result,
                            statevector_mode=quantum_instance.is_statevector)
                    return mean, std

                q_mean, q_std = _get_result(q_commutators[mu][nu])
                w_mean, w_std = _get_result(w_commutators[mu][nu])
                m_mean, m_std = _get_result(m_commutators[mu][nu])
                v_mean, v_std = _get_result(v_commutators[mu][nu])

                q_mat[mu][nu] = q_mean if q_mean != 0.0 else q_mat[mu][nu]
                w_mat[mu][nu] = w_mean if w_mean != 0.0 else w_mat[mu][nu]
                m_mat[mu][nu] = m_mean if m_mean != 0.0 else m_mat[mu][nu]
                v_mat[mu][nu] = v_mean if v_mean != 0.0 else v_mat[mu][nu]
                q_mat_std += q_std
                w_mat_std += w_std
                m_mat_std += m_std
                v_mat_std += v_std
        else:
            for idx in range(len(mus)):
                mu = mus[idx]
                nu = nus[idx]
                q_mean, q_std = q_commutators[mu][nu].evaluate_with_statevector(wave_fn) \
                    if q_commutators[mu][nu] is not None else (0.0, 0.0)
                w_mean, w_std = w_commutators[mu][nu].evaluate_with_statevector(wave_fn) \
                    if w_commutators[mu][nu] is not None else (0.0, 0.0)
                m_mean, m_std = m_commutators[mu][nu].evaluate_with_statevector(wave_fn) \
                    if m_commutators[mu][nu] is not None else (0.0, 0.0)
                v_mean, v_std = v_commutators[mu][nu].evaluate_with_statevector(wave_fn) \
                    if v_commutators[mu][nu] is not None else (0.0, 0.0)
                q_mat[mu][nu] = q_mean if q_mean != 0.0 else q_mat[mu][nu]
                w_mat[mu][nu] = w_mean if w_mean != 0.0 else w_mat[mu][nu]
                m_mat[mu][nu] = m_mean if m_mean != 0.0 else m_mat[mu][nu]
                v_mat[mu][nu] = v_mean if v_mean != 0.0 else v_mat[mu][nu]

        if self._is_eom_matrix_symmetric:
            q_mat = q_mat + q_mat.T - np.identity(q_mat.shape[0]) * q_mat
            w_mat = w_mat + w_mat.T - np.identity(w_mat.shape[0]) * w_mat
            m_mat = m_mat + m_mat.T - np.identity(m_mat.shape[0]) * m_mat
            v_mat = v_mat + v_mat.T - np.identity(v_mat.shape[0]) * v_mat

        q_mat = np.real(q_mat)
        w_mat = np.real(w_mat)
        m_mat = np.real(m_mat)
        v_mat = np.real(v_mat)

        q_mat_std = q_mat_std / float(available_entry)
        w_mat_std = w_mat_std / float(available_entry)
        m_mat_std = m_mat_std / float(available_entry)
        v_mat_std = v_mat_std / float(available_entry)

        logger.debug("\nQ:=========================\n{}".format(q_mat))
        logger.debug("\nW:=========================\n{}".format(w_mat))
        logger.debug("\nM:=========================\n{}".format(m_mat))
        logger.debug("\nV:=========================\n{}".format(v_mat))

        return m_mat, v_mat, q_mat, w_mat, m_mat_std, v_mat_std, q_mat_std, w_mat_std
Example #33
0
init_vel_disp = 3
perc_error = 0.001

xycorr = 0.0
xzcorr = 0.0
yzcorr = 0.0

corrs = [xycorr, xzcorr, yzcorr]
stdevs = [
    init_pos_disp, init_pos_disp, init_pos_disp, init_vel_disp, init_vel_disp,
    init_vel_disp
]

cov = np.eye(6)
cov[np.tril_indices(3, -1)] = corrs
cov[np.triu_indices(3, 1)] = corrs

for i in range(3):
    cov[:3, i] *= stdevs[:3]
    cov[i, :3] *= stdevs[:3]

for i in range(3, 6):
    cov[3:6, i] *= stdevs[3:]
    cov[i, 3:6] *= stdevs[3:]

print(cov)

np.random.seed(0)
nstars = 30

# generate initial stars from an arbitrary covariance matrix
aparc = mne.read_labels_from_annot(subject, subjects_dir=subjects_dir, parc='aparc')

# nodes in one hemisphere can be plotted as well
aparc_lh = [lab for lab in aparc if lab.hemi == 'lh']

coords = []

# plot 10 nodes from left hemisphere only for better viz
for lab in aparc_lh[:10]:
    if lab.name is 'unknown-lh':
        continue
    # get the center of mass
    com = lab.center_of_mass('fsaverage')
    # obtain mni coordinated to the vertex from left hemi
    coords_ = mne.vertex_to_mni(com, hemis=0, subject=subject, subjects_dir=subjects_dir)[0]
    coords.append(coords_)

n_nodes = np.array(coords).shape[0]

# make a random connectivity matrix
con = np.random.random((n_nodes, n_nodes))
con[np.diag_indices(5)] = 0.
con[np.triu_indices(5, k=1)] = 0.
con += con.T
con[con < 0.6] = 0.

# plot the connectome on a glass brain background
plotting.plot_connectome(con, coords)
plt.show()
Example #35
0
switch = nib.load('./Neurosynth/switching_pAgF_z.nii')  # anti rt
switch = np.array(switch.dataobj)  #

switch2 = nib.load('./Neurosynth/switch_pAgF_z.nii')  # anti rt
affine = switch2.affine
switch2 = np.array(switch2.dataobj)  #

nogo = nib.load('./Neurosynth/nogo_pAgF_z.nii')  # anti(?)
nogo = np.array(nogo.dataobj)  #

#############################################
# LOADING CORRELATION MATRIX FROM OUR TASKS #
#############################################

model = np.loadtxt('task6_corr.csv', delimiter=',')
model = model[np.triu_indices(6, k=1)]  # extracting the upper triangle
#print(model)
model = np.reshape(model, (model.shape[0], 1))
model = pd.DataFrame(model)  #transform it into DataFrame type to correlate

# Initialize an array to add up the correlations
brain_corr = np.zeros((91, 109, 91))
#Initialize an array to count how many times region wass scoped for correlations
brain_count = np.zeros((91, 109, 91))

######################
#      SETTINGS      #
######################

# Steps taken per iteration
stride = 1
def _triu_indices(n):
    rows, cols = np.triu_indices(n)
    return rows * n + cols
Example #37
0
    def plot_scatter(self,
                     plot_filename,
                     plot_title='',
                     image_format=None,
                     log1p=False,
                     xRange=None,
                     yRange=None):
        """
        Plot the scatter plots of a matrix
        in which each row is a sample
        """

        num_samples = self.matrix.shape[1]
        corr_matrix = self.compute_correlation()
        grids = gridspec.GridSpec(num_samples, num_samples)
        grids.update(wspace=0, hspace=0)
        fig = plt.figure(figsize=(2 * num_samples, 2 * num_samples))
        plt.rcParams['font.size'] = 8.0
        plt.suptitle(plot_title)
        if log1p is True:
            self.matrix = np.log1p(self.matrix)
        min_xvalue = self.matrix.min()
        max_xvalue = self.matrix.max()
        min_yvalue = min_xvalue
        max_yvalue = max_xvalue
        if xRange is not None:
            min_xvalue = xRange[0]
            max_xvalue = xRange[1]
        if yRange is not None:
            min_yvalue = yRange[0]
            max_yvalue = yRange[1]
        if (min_xvalue % 2 == 0 and max_xvalue % 2 == 0) or \
                (min_xvalue % 1 == 0 and max_xvalue % 2 == 1):
            # make one value odd and the other even
            max_xvalue += 1
        if (min_yvalue % 2 == 0 and max_yvalue % 2 == 0) or \
                (min_yvalue % 1 == 0 and max_yvalue % 2 == 1):
            # make one value odd and the other even
            max_yvalue += 1

        # plotly output
        if image_format == 'plotly':
            self.plotly_scatter(plot_filename,
                                corr_matrix,
                                plot_title=plot_title,
                                minXVal=min_xvalue,
                                maxXVal=max_xvalue,
                                minYVal=min_yvalue,
                                maxYVal=max_yvalue)
            return

        rows, cols = np.triu_indices(num_samples)

        for index in range(len(rows)):
            row = rows[index]
            col = cols[index]
            if row == col:
                # add titles as
                # empty plot in the diagonal
                ax = fig.add_subplot(grids[row, col])
                ax.text(0.5,
                        0.5,
                        self.labels[row],
                        verticalalignment='center',
                        horizontalalignment='center',
                        fontsize=10,
                        fontweight='bold',
                        transform=ax.transAxes)
                ax.set_axis_off()
                continue

            ax = fig.add_subplot(grids[row, col])

            vector1 = self.matrix[:, row]
            vector2 = self.matrix[:, col]

            ax.text(0.2,
                    0.8,
                    "{}={:.2f}".format(self.corr_method, corr_matrix[row,
                                                                     col]),
                    horizontalalignment='left',
                    transform=ax.transAxes)
            ax.get_yaxis().set_tick_params(which='both',
                                           left='off',
                                           right='off',
                                           direction='out')

            ax.get_xaxis().set_tick_params(which='both',
                                           top='off',
                                           bottom='off',
                                           direction='out')
            for tick in ax.xaxis.get_major_ticks():
                tick.label.set_rotation('45')

            if col != num_samples - 1:
                ax.set_yticklabels([])
            else:
                ax.yaxis.tick_right()
                ax.get_yaxis().set_tick_params(which='both',
                                               left='off',
                                               right='on',
                                               direction='out')
            if col - row == 1:
                ax.xaxis.tick_bottom()
                ax.get_xaxis().set_tick_params(which='both',
                                               top='off',
                                               bottom='on',
                                               direction='out')
                for tick in ax.xaxis.get_major_ticks():
                    tick.label.set_rotation('45')

            else:
                ax.set_xticklabels([])

            ax.hist2d(vector1, vector2, bins=200, cmin=0.1)

            if xRange is not None:
                ax.set_xlim(xRange)
            else:
                ax.set_xlim(min_xvalue, ax.get_xlim()[1])
            if yRange is not None:
                ax.set_ylim(min_yvalue, min(yRange, ax.get_ylim()[1]))
            else:
                ax.set_ylim(min_yvalue, ax.get_ylim()[1])

        plt.savefig(plot_filename, format=image_format)
        plt.close()
Example #38
0
ax1.scatter(pos[:,0], pos[:,1], c=noisy_signal[0].tolist(), cmap=plt.cm.jet)
ax2.scatter(pos[:,0], pos[:,1], c=gl_denoised_signal[0].tolist(), cmap=plt.cm.jet)
ax1.axis('off')
ax2.axis('off')
ax1.set_title('Noisy Signal')
ax2.set_title('GL Signal')
plt.tight_layout()
plt.show()

test_item=np.random.normal(size=dimension)
true_payoff=np.dot(true_user_features, test_item)
gl_payoff=np.dot(gl_user_f, test_item)

pos=true_user_features
graph=create_networkx_graph(user_num, true_adj)
edge_color=true_adj[np.triu_indices(user_num,1)]
plt.figure(figsize=(5,5))
nodes=nx.draw_networkx_nodes(graph, pos, node_color=true_payoff, node_size=100, cmap=plt.cm.jet)
edges=nx.draw_networkx_edges(graph, pos, width=1.0, alpha=0.1, edge_color='grey')
plt.axis('off')
plt.title('True Graph', fontsize=12)
plt.show()


pos=true_user_features
graph=create_networkx_graph(user_num, gl_adj)
edge_color=gl_adj[np.triu_indices(user_num,1)]
plt.figure(figsize=(5,5))
nodes=nx.draw_networkx_nodes(graph, pos, node_color=gl_payoff, node_size=100, cmap=plt.cm.jet)
edges=nx.draw_networkx_edges(graph, pos, width=1.0, alpha=0.1, edge_color='grey')
plt.axis('off')
Example #39
0
def complexity_fd_higushi(signal, k_max):
    """
    Computes Higuchi Fractal Dimension of a signal. Based on the `pyrem <https://github.com/gilestrolab/pyrem>`_ repo by Quentin Geissmann.

    Parameters
    ----------
    signal : list or array
        List or array of values.
    k_max : int
        The maximal value of k. The point at which the FD plateaus is considered a saturation point and that kmax value should be selected (Gómez, 2009). Some studies use a value of 8 or 16 for ECG signal and other 48 for MEG.

    Returns
    ----------
    fd_higushi : float
        The Higushi Fractal Dimension as float value.


    Example
    ----------
    >>> import neurokit as nk
    >>>
    >>> signal = np.sin(np.log(np.random.sample(666)))
    >>> fd_higushi = nk.complexity_fd_higushi(signal, 8)

    Notes
    ----------
    *Details*

    - **Higushi Fractal Dimension**: Higuchi proposed in 1988 an efficient algorithm for measuring the FD of discrete time sequences. As the reconstruction of the attractor phase space is not necessary, this algorithm is simpler and faster than D2 and other classical measures derived from chaos theory. FD can be used to quantify the complexity and self-similarity of a signal. HFD has already been used to analyse the complexity of brain recordings and other biological signals.


    *Authors*

    - Quentin Geissmann (https://github.com/qgeissmann)

    *Dependencies*

    - numpy

    *See Also*

    - pyrem package: https://github.com/gilestrolab/pyrem

    References
    -----------
    - Accardo, A., Affinito, M., Carrozzi, M., & Bouquet, F. (1997). Use of the fractal dimension for the analysis of electroencephalographic time series. Biological cybernetics, 77(5), 339-350.
    - Gómez, C., Mediavilla, Á., Hornero, R., Abásolo, D., & Fernández, A. (2009). Use of the Higuchi's fractal dimension for the analysis of MEG recordings from Alzheimer's disease patients. Medical engineering & physics, 31(3), 306-313.
    """
    signal = np.array(signal)
    L = []
    x = []
    N = signal.size

    km_idxs = np.triu_indices(k_max - 1)
    km_idxs = k_max - np.flipud(np.column_stack(km_idxs)) -1
    km_idxs[:,1] -= 1


    for k in range(1, k_max):
        Lk = 0
        for m in range(0, k):
            #we pregenerate all idxs
            idxs = np.arange(1,int(np.floor((N-m)/k)))

            Lmk = np.sum(np.abs(signal[m+idxs*k] - signal[m+k*(idxs-1)]))
            Lmk = (Lmk*(N - 1)/(((N - m)/ k)* k)) / k
            Lk += Lmk

        if Lk != 0:
            L.append(np.log(Lk/(m+1)))
            x.append([np.log(1.0/ k), 1])

    (p, r1, r2, s)=np.linalg.lstsq(x, L)
    fd_higushi = p[0]
    return (fd_higushi)
Example #40
0
def cluster_compartments(cf,
                         k,
                         chrlist,
                         eig_dim=None,
                         contact_thr=1,
                         max_sample_size=50000,
                         outlier_pctl=90,
                         corr_outlier_pctl=[5, 95],
                         balance_corr_median=False,
                         coeffs=None,
                         coeffs_gw=None,
                         seed=None,
                         max_resampling_attempts=10,
                         rearrange_clusters=False,
                         use_ice=False,
                         algorithm='eigh-kmeans',
                         outdir='.',
                         out_allchr='clusters_all.txt'):
    if algorithm not in ['eigh-gmix', 'eigh-kmeans', 'spec-kmeans']:
        print "error: algorithm must be either 'eigh-gmix', 'eigh-kmeans' or 'spec-kmeans'"
        return

    print "[intrachromosomal_clusters] k={}, outdir={}, algorithm={}".format(
        k, outdir, algorithm)
    if not use_ice:
        if coeffs is None and coeffs_gw is None:
            print 'computing normalization coeffs (local masked OE)...'
            coeffs = oe_coeffs_mask(cf, cf.chromnames)
        elif coeffs is None and coeffs_gw is not None:
            print 'using user-provided global OE coeffs'
    else:
        print 'using ICE balancing coeffs from cooler file'

    if eig_dim == None:
        eig_dim = k

    clusters = {}
    sample_idx = {}
    clusters_idx = {}

    for chr in chrlist:
        if os.path.isfile('{}/clusters_{}.txt'.format(outdir, chr)):
            print "Warning: {} clusters ({}/clusters_{}.txt) already exist. Skipping chromosome.".format(
                chr, outdir, chr)
            continue
        print "[{}] balancing matrix...".format(chr)
        if not use_ice:
            m = cf.matrix(balance=False).fetch(chr)
            # Threshold contacts
            m[np.where(m < contact_thr)] = 0
            if coeffs_gw is not None:
                m_oe = oe_apply(m, coeffs_gw).toarray()
            else:
                m_oe = oe_apply(m, coeffs[chr]).toarray()
        else:
            m_oe = cf.matrix(balance=True).fetch(chr)

        # Get idx of high quality regions (measured in raw matrix).
        samp_idx = matrix_mask_idx(m_oe)
        sample_idx[chr] = samp_idx
        print "[{}] removing low-quality regions (matrix rows: {}, sample rows: {})...".format(
            chr, m.shape[0], samp_idx.shape[0])
        # High-quality matrix size
        l = len(samp_idx)
        ssize = min(l, max_sample_size)
        # Sample iteration (keep sampling while clustering fails).
        np.random.seed(seed)
        successful = False
        cnt = 0
        while not successful and cnt < max_resampling_attempts:
            cnt += 1
            # Get sample
            if ssize < l:
                s = np.sort(np.random.choice(samp_idx, ssize, replace=False))
            else:
                s = np.array(samp_idx)
            m_samp = m_oe[s, :][:, s]
            # Relax outliers
            m_max = np.percentile(m_samp[np.where(m_samp > 0)], outlier_pctl)
            m_samp[np.where(m_samp > m_max)] = m_max
            if (~m_samp.any(axis=1)).any():
                print "[{}] sample contains empty rows (singular matrix). resampling ({})...".format(
                    chr, cnt)
                continue

            # Remove diagonals before correlation (DISABLED)
            '''
            if pre_corr_diags > 0:
                m_cor = np.corrcoef(np.triu(m_samp,pre_corr_diags) + np.tril(m_samp,-pre_corr_diags))
            else:
                m_cor = np.corrcoef(m_samp)

            # Remove diagonals after correlation
            if corr_diags > 1:
                m_cor = np.triu(m_cor,corr_diags) + np.tril(m_cor,-corr_diags)
            else:
                np.fill_diagonal(m_cor,0)
            '''
            # Compute correlation and remove diagonal
            print "[{}] computing correlation matrix and balancing...".format(
                chr)
            m_cor = np.corrcoef(m_samp)
            np.fill_diagonal(m_cor, 0)

            # Increase correlation contrast (5-95 percentiles by default)
            if balance_corr_median:
                m_cor = m_cor - np.median(m_cor[np.triu_indices(ssize, 1)])
            min_cor_val = np.percentile(m_cor[np.triu_indices(ssize, 1)],
                                        corr_outlier_pctl[0])
            max_cor_val = np.percentile(m_cor[np.triu_indices(ssize, 1)],
                                        corr_outlier_pctl[1])
            m_cor[np.where(m_cor < min_cor_val)] = min_cor_val
            m_cor[np.where(m_cor > max_cor_val)] = max_cor_val

            N = m_cor.shape[0]
            eig_dim = min(N, eig_dim)
            try:
                print "[{}] computing clusters, algorithm {}...".format(
                    chr, algorithm)
                if algorithm == 'spec-kmeans':
                    # some chromosomes crash when using precomputed similarity matrices.
                    # however using RBF seems to give meaningful clustering.
                    spect_clu = SpectralClustering(n_clusters=k,
                                                   eigen_solver='arpack',
                                                   affinity='precomputed',
                                                   assign_labels='kmeans',
                                                   n_jobs=8)
                    hic_clust = spect_clu.fit_predict(m_cor)
                else:
                    print "[{}] computing eigh...".format(chr)
                    w, v = scipy.linalg.eigh(m_cor,
                                             eigvals=(N - eig_dim, N - 1))

                    if algorithm == 'eigh-gmix':
                        # Cluster eigenvectors using Gaussian Mixture
                        gmix = mixture.GaussianMixture(n_components=k,
                                                       covariance_type='full',
                                                       tol=1e-4,
                                                       max_iter=1000)
                        gmix.fit(v)
                        hic_clust = gmix.predict(v)
                    elif algorithm == 'eigh-kmeans':
                        # Cluster eigenvalue/eigenvector products with kmeans.
                        print "[{}] computing clusters (k-means)...".format(
                            chr)
                        km = KMeans(n_clusters=k, n_jobs=8)
                        weig = w * v
                        hic_clust = km.fit_predict(weig)
                        # Write weighted eigenvectors
                        with open('{}/clusters_{}.weig'.format(outdir, chr),
                                  'w') as outdata:
                            for i in xrange(0, len(hic_clust)):
                                outdata.write(
                                    str(sample_idx[chr][i]) + '\t' +
                                    str(hic_clust[i]) + '\t' +
                                    '\t'.join([str(x)
                                               for x in weig[i][::-1]]) + '\n')
            except Exception, e:
                print "[{}] error while clustering: {}".format(
                    chr, cnt, str(e))
                cnt = max_resampling_attempts
                break
            successful = True

        if cnt >= max_resampling_attempts:
            print "[{}] max reampling attempts reached, skipping chromosome.".format(
                chr)
            continue

        # Rearrange clusters for visualization
        # Make cluster index list
        clu_idx = [list() for _ in xrange(k)]
        for i, c in enumerate(hic_clust):
            clu_idx[c].append(i)

        if not rearrange_clusters:
            # Map again to matrix indices
            clusters_idx[chr] = [sample_idx[chr][x] for x in clu_idx]

        else:
            print "[{}] rearranging clusters by similarity...".format(chr)
            for i in xrange(k):
                clu_idx[i] = np.array(clu_idx[i])

            clusters[chr] = list()

            # Find most distant blocks
            l_r = (0, 0)
            val = np.inf
            d_sum = np.zeros((k, k))
            for i in xrange(k):
                l_i = len(clu_idx[i])
                for j in xrange(i + 1, k):
                    l_j = len(clu_idx[j])
                    d_sum[i, j] = np.sum(m_cor[clu_idx[i], :][:, clu_idx[j]])
                    d = float(d_sum[i, j]) / (l_i * l_j)
                    if d < val:
                        l_r = (i, j)
                        val = d

            # Pop left and right blocks (important to do it in this order for index consistency).
            r_idx = clu_idx.pop(l_r[1])
            l_idx = clu_idx.pop(l_r[0])
            r_clusters = [
                r_idx.copy(),
            ]
            l_clusters = [
                l_idx.copy(),
            ]

            iters = len(clu_idx) / 2 + len(clu_idx) % 2
            for i in xrange(iters):
                # Find nearest blocks to L/R.
                len_l = len(l_idx)
                len_r = len(r_idx)
                min_d = np.inf
                max_d = -np.inf
                min_idx = 0
                max_idx = 0
                for i in xrange(len(clu_idx)):
                    len_block = len(clu_idx[i])
                    d_l = float(np.sum(m_cor[l_idx, :][:, clu_idx[i]])) / (
                        len_l * len_block) - val
                    d_r = float(np.sum(m_cor[r_idx, :][:, clu_idx[i]])) / (
                        len_r * len_block) - val
                    r = d_l / d_r
                    if r < min_d:
                        min_idx = i
                        min_d = r
                    if r >= max_d:
                        max_idx = i
                        max_d = r
                # Pop from idx and add to L/R.
                if min_idx > max_idx:
                    r_clusters.append(clu_idx[min_idx].copy())
                    l_clusters.append(clu_idx[max_idx].copy())
                    r_idx = np.append(clu_idx.pop(min_idx), r_idx)
                    l_idx = np.append(l_idx, clu_idx.pop(max_idx))
                elif min_idx < max_idx:
                    r_clusters.append(clu_idx[min_idx].copy())
                    l_clusters.append(clu_idx[max_idx].copy())
                    l_idx = np.append(l_idx, clu_idx.pop(max_idx))
                    r_idx = np.append(clu_idx.pop(min_idx), r_idx)
                else:
                    l_clusters.append(clu_idx[max_idx].copy())
                    l_idx = np.append(l_idx, clu_idx.pop(max_idx))
            # Make final index list.
            clu_idx = np.append(l_idx, r_idx)

            # Make final cluster index list.
            clusters[chr] = l_clusters + list(reversed(r_clusters))

            # Map again to matrix indices
            clusters_idx[chr] = [sample_idx[chr][x] for x in clusters[chr]]

        # Store in disk
        print "[{}] writing clusters to {}/clusters_{}.txt...".format(
            chr, outdir, chr)
        fout = open('{}/clusters_{}.txt'.format(outdir, chr), 'w+')
        for c in clusters_idx[chr]:
            fout.write("{}\t".format(chr))
            fout.write(','.join([str(i) for i in c]))
            fout.write('\n')
        fout.close()
        fall = open('{}/{}'.format(outdir, out_allchr), "a")
        for c in clusters_idx[chr]:
            fall.write("{}\t".format(chr))
            fall.write(','.join([str(i) for i in c]))
            fall.write('\n')
        fall.close()
        '''
Example #41
0
def _from_rdkit(cls, mol, rdkit_config):
    if rdkit_config.set_hydrogen_explicit:
        mol = Chem.AddHs(mol)
    g = nx.Graph()
    # For single heavy-atom molecules, such as water, methane and metalic ion.
    # A ghost atom is created and bond to it, because there must be at least
    # two nodes and one edge in graph kernel.
    if mol.GetNumBonds() == 0:
        for i, atom in enumerate(mol.GetAtoms()):
            assert (atom.GetIdx() == i)
            g.add_node(i)
            rdkit_config.set_node(g.nodes[i], atom, mol)

        if mol.GetNumAtoms() == 1:
            ij = (0, 0)
            g.add_edge(*ij)
            rdkit_config.set_ghost_edge(g.edges[ij])
        else:
            I, J = np.triu_indices(mol.GetNumAtoms(), k=1)
            for i in range(len(I)):
                ij = (I[i], J[i])
                g.add_edge(*ij)
                rdkit_config.set_ghost_edge(g.edges[ij])
    else:
        for i, atom in enumerate(mol.GetAtoms()):
            assert (atom.GetIdx() == i)
            g.add_node(i)
            rdkit_config.set_node(g.nodes[i], atom, mol)
        for bond in mol.GetBonds():
            ij = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
            g.add_edge(*ij)
            rdkit_config.set_edge(g.edges[ij], bond)

        # set ring stereo
        if rdkit_config.set_ring_stereo:
            bond_orientation_dict = get_bond_orientation_dict(mol)
            for ring_idx in mol.GetRingInfo().AtomRings():
                atom_updown = []
                for idx in ring_idx:
                    if g.nodes[idx]['Ring_count'] != 1:
                        atom_updown.append(0)
                    else:
                        atom = mol.GetAtomWithIdx(idx)
                        atom_updown.append(
                            get_atom_ring_stereo(
                                mol,
                                atom,
                                ring_idx,
                                depth=rdkit_config.depth,
                                bond_orientation_dict=bond_orientation_dict))
                atom_updown = np.array(atom_updown)
                for j in range(len(ring_idx)):
                    b = j
                    e = j + 1 if j != len(ring_idx) - 1 else 0
                    StereoOfRingBond = float(atom_updown[b] * atom_updown[e] *
                                             len(ring_idx))
                    if ring_idx[b] < ring_idx[e]:
                        ij = (ring_idx[b], ring_idx[e])
                    else:
                        ij = (ring_idx[e], ring_idx[b])
                    if g.edges[ij]['RingStereo'] != 0.:
                        raise Exception(ij, g.edges[ij]['RingStereo'],
                                        StereoOfRingBond)
                    else:
                        g.edges[ij]['RingStereo'] = StereoOfRingBond
    # rdkit_config.set_node_propogation(g, mol, 'Chiral', depth=1)
    rdkit_config.set_node_propogation(g,
                                      mol,
                                      'AtomicNumber',
                                      depth=5,
                                      sum=False,
                                      usehash=False)
    rdkit_config.set_node_propogation(g,
                                      mol,
                                      'Hcount',
                                      depth=1,
                                      sum=True,
                                      usehash=False)
    # rdkit_config.set_node_propogation(g, mol, 'FirstNeighbors', depth=4)
    # rdkit_config.set_node_propogation(g, mol, 'Aromatic', depth=4)
    return _from_networkx(cls, g)
Example #42
0
def linkage_tree(X,
                 connectivity=None,
                 n_clusters=None,
                 linkage='complete',
                 affinity="euclidean",
                 return_distance=False):
    """Linkage agglomerative clustering based on a Feature matrix.

    The inertia matrix uses a Heapq-based representation.

    This is the structured version, that takes into account some topological
    structure between samples.

    Read more in the :ref:`User Guide <hierarchical_clustering>`.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)
        feature matrix representing n_samples samples to be clustered

    connectivity : sparse matrix (optional).
        connectivity matrix. Defines for each sample the neighboring samples
        following a given structure of the data. The matrix is assumed to
        be symmetric and only the upper triangular half is used.
        Default is None, i.e, the Ward algorithm is unstructured.

    n_clusters : int (optional)
        Stop early the construction of the tree at n_clusters. This is
        useful to decrease computation time if the number of clusters is
        not small compared to the number of samples. In this case, the
        complete tree is not computed, thus the 'children' output is of
        limited use, and the 'parents' output should rather be used.
        This option is valid only when specifying a connectivity matrix.

    linkage : {"average", "complete", "single"}, optional, default: "complete"
        Which linkage criteria to use. The linkage criterion determines which
        distance to use between sets of observation.
            - average uses the average of the distances of each observation of
              the two sets
            - complete or maximum linkage uses the maximum distances between
              all observations of the two sets.
            - single uses the minimum of the distances between all observations
              of the two sets.

    affinity : string or callable, optional, default: "euclidean".
        which metric to use. Can be "euclidean", "manhattan", or any
        distance know to paired distance (see metric.pairwise)

    return_distance : bool, default False
        whether or not to return the distances between the clusters.

    Returns
    -------
    children : 2D array, shape (n_nodes-1, 2)
        The children of each non-leaf node. Values less than `n_samples`
        correspond to leaves of the tree which are the original samples.
        A node `i` greater than or equal to `n_samples` is a non-leaf
        node and has children `children_[i - n_samples]`. Alternatively
        at the i-th iteration, children[i][0] and children[i][1]
        are merged to form node `n_samples + i`

    n_connected_components : int
        The number of connected components in the graph.

    n_leaves : int
        The number of leaves in the tree.

    parents : 1D array, shape (n_nodes, ) or None
        The parent of each node. Only returned when a connectivity matrix
        is specified, elsewhere 'None' is returned.

    distances : ndarray, shape (n_nodes-1,)
        Returned when return_distance is set to True.

        distances[i] refers to the distance between children[i][0] and
        children[i][1] when they are merged.

    See also
    --------
    ward_tree : hierarchical clustering with ward linkage
    """
    X = np.asarray(X)
    if X.ndim == 1:
        X = np.reshape(X, (-1, 1))
    n_samples, n_features = X.shape

    linkage_choices = {
        'complete': _hierarchical.max_merge,
        'average': _hierarchical.average_merge,
        'single': None
    }  # Single linkage is handled differently
    try:
        join_func = linkage_choices[linkage]
    except KeyError:
        raise ValueError('Unknown linkage option, linkage should be one '
                         'of %s, but %s was given' %
                         (linkage_choices.keys(), linkage))

    if connectivity is None:
        from scipy.cluster import hierarchy  # imports PIL

        if n_clusters is not None:
            warnings.warn(
                'Partial build of the tree is implemented '
                'only for structured clustering (i.e. with '
                'explicit connectivity). The algorithm '
                'will build the full tree and only '
                'retain the lower branches required '
                'for the specified number of clusters',
                stacklevel=2)

        if affinity == 'precomputed':
            # for the linkage function of hierarchy to work on precomputed
            # data, provide as first argument an ndarray of the shape returned
            # by pdist: it is a flat array containing the upper triangular of
            # the distance matrix.
            i, j = np.triu_indices(X.shape[0], k=1)
            X = X[i, j]
        elif affinity == 'l2':
            # Translate to something understood by scipy
            affinity = 'euclidean'
        elif affinity in ('l1', 'manhattan'):
            affinity = 'cityblock'
        elif callable(affinity):
            X = affinity(X)
            i, j = np.triu_indices(X.shape[0], k=1)
            X = X[i, j]
        out = hierarchy.linkage(X, method=linkage, metric=affinity)
        children_ = out[:, :2].astype(np.int, copy=False)

        if return_distance:
            distances = out[:, 2]
            return children_, 1, n_samples, None, distances
        return children_, 1, n_samples, None

    connectivity, n_connected_components = _fix_connectivity(X,
                                                             connectivity,
                                                             affinity=affinity)
    connectivity = connectivity.tocoo()
    # Put the diagonal to zero
    diag_mask = (connectivity.row != connectivity.col)
    connectivity.row = connectivity.row[diag_mask]
    connectivity.col = connectivity.col[diag_mask]
    connectivity.data = connectivity.data[diag_mask]
    del diag_mask

    if affinity == 'precomputed':
        distances = X[connectivity.row,
                      connectivity.col].astype('float64',
                                               **_astype_copy_false(X))
    else:
        # FIXME We compute all the distances, while we could have only computed
        # the "interesting" distances
        distances = paired_distances(X[connectivity.row],
                                     X[connectivity.col],
                                     metric=affinity)
    connectivity.data = distances

    if n_clusters is None:
        n_nodes = 2 * n_samples - 1
    else:
        assert n_clusters <= n_samples
        n_nodes = 2 * n_samples - n_clusters

    if linkage == 'single':
        return _single_linkage_tree(connectivity, n_samples, n_nodes,
                                    n_clusters, n_connected_components,
                                    return_distance)

    if return_distance:
        distances = np.empty(n_nodes - n_samples)
    # create inertia heap and connection matrix
    A = np.empty(n_nodes, dtype=object)
    inertia = list()

    # LIL seems to the best format to access the rows quickly,
    # without the numpy overhead of slicing CSR indices and data.
    connectivity = connectivity.tolil()
    # We are storing the graph in a list of IntFloatDict
    for ind, (data, row) in enumerate(zip(connectivity.data,
                                          connectivity.rows)):
        A[ind] = IntFloatDict(np.asarray(row, dtype=np.intp),
                              np.asarray(data, dtype=np.float64))
        # We keep only the upper triangular for the heap
        # Generator expressions are faster than arrays on the following
        inertia.extend(
            _hierarchical.WeightedEdge(d, ind, r) for r, d in zip(row, data)
            if r < ind)
    del connectivity

    heapify(inertia)

    # prepare the main fields
    parent = np.arange(n_nodes, dtype=np.intp)
    used_node = np.ones(n_nodes, dtype=np.intp)
    children = []

    # recursive merge loop
    for k in range(n_samples, n_nodes):
        # identify the merge
        while True:
            edge = heappop(inertia)
            if used_node[edge.a] and used_node[edge.b]:
                break
        i = edge.a
        j = edge.b

        if return_distance:
            # store distances
            distances[k - n_samples] = edge.weight

        parent[i] = parent[j] = k
        children.append((i, j))
        # Keep track of the number of elements per cluster
        n_i = used_node[i]
        n_j = used_node[j]
        used_node[k] = n_i + n_j
        used_node[i] = used_node[j] = False

        # update the structure matrix A and the inertia matrix
        # a clever 'min', or 'max' operation between A[i] and A[j]
        coord_col = join_func(A[i], A[j], used_node, n_i, n_j)
        for l, d in coord_col:
            A[l].append(k, d)
            # Here we use the information from coord_col (containing the
            # distances) to update the heap
            heappush(inertia, _hierarchical.WeightedEdge(d, k, l))
        A[k] = coord_col
        # Clear A[i] and A[j] to save memory
        A[i] = A[j] = 0

    # Separate leaves in children (empty lists up to now)
    n_leaves = n_samples

    # # return numpy array for efficient caching
    children = np.array(children)[:, ::-1]

    if return_distance:
        return children, n_connected_components, n_leaves, parent, distances
    return children, n_connected_components, n_leaves, parent
def distance_matrix(s,
                    max_dist=None,
                    max_length_diff=None,
                    window=None,
                    max_step=None,
                    penalty=None,
                    psi=None,
                    block=None,
                    parallel=False,
                    use_c=False,
                    use_nogil=False,
                    show_progress=False):
    """Distance matrix for all sequences in s.

    :param s: Iterable of series
    :param window: see :meth:`distance`
    :param max_dist: see :meth:`distance`
    :param max_step: see :meth:`distance`
    :param max_length_diff: see :meth:`distance`
    :param penalty: see :meth:`distance`
    :param psi: see :meth:`distance`
    :param block: Only compute block in matrix. Expects tuple with begin and end, e.g. ((0,10),(20,25)) will
        only compare rows 0:10 with rows 20:25.
    :param parallel: Use parallel operations
    :param use_c: Use c compiled Python functions (it is recommended to use use_nogil)
    :param use_nogil: Use pure c functions
    :param show_progress: Show progress using the tqdm library
    """
    if parallel and (not use_c or not use_nogil):
        try:
            import multiprocessing as mp
            logger.info('Using multiprocessing')
        except ImportError:
            parallel = False
            mp = None
    else:
        mp = None
    dist_opts = {
        'max_dist': max_dist,
        'max_step': max_step,
        'window': window,
        'max_length_diff': max_length_diff,
        'penalty': penalty,
        'psi': psi
    }
    s = SeriesContainer.wrap(s)
    dists = None
    if max_length_diff is None:
        max_length_diff = np.inf
    large_value = np.inf
    logger.info('Computing distances')
    if use_c:
        for k, v in dist_opts.items():
            if v is None:
                dist_opts[k] = 0.0
    if use_c and use_nogil:
        logger.info("Compute distances in pure C")
        dist_opts['block'] = block
        if parallel:
            logger.info("Use parallel computation")
            dists = dtw_c.distance_matrix_nogil_p(s, **dist_opts)
        else:
            logger.info("Use serial computation")
            dists = dtw_c.distance_matrix_nogil(s, **dist_opts)
    if use_c and not use_nogil:
        logger.info("Compute distances in Python compiled C")
        if parallel:
            logger.info("Use parallel computation")
            dists = np.zeros((len(s), len(s))) + large_value
            if block is None:
                idxs = np.triu_indices(len(s), k=1)
            else:
                idxsl_r = []
                idxsl_c = []
                for r in range(block[0][0], block[0][1]):
                    for c in range(max(r + 1, block[1][0]),
                                   min(len(s), block[1][1])):
                        idxsl_r.append(r)
                        idxsl_c.append(c)
                idxs = (np.array(idxsl_r), np.array(idxsl_c))
            with mp.Pool() as p:
                dists[idxs] = p.map(_distance_c_with_params,
                                    [(s[r], s[c], dist_opts)
                                     for c, r in zip(*idxs)])
                # pbar = tqdm(total=int((len(s)*(len(s)-1)/2)))
                # for r in range(len(s)):
                #     dists[r,r+1:len(s)] = p.map(distance, [(s[r],s[c], dist_opts) for c in range(r+1,len(cur))])
                #     pbar.update(len(s) - r - 1)
                # pbar.close()
        else:
            logger.info("Use serial computation")
            dist_opts['block'] = block
            dists = dtw_c.distance_matrix(s, **dist_opts)
    if not use_c:
        logger.info("Compute distances in Python")
        if parallel:
            logger.info("Use parallel computation")
            dists = np.zeros((len(s), len(s))) + large_value
            if block is None:
                idxs = np.triu_indices(len(s), k=1)
            else:
                idxsl_r = []
                idxsl_c = []
                for r in range(block[0][0], block[0][1]):
                    for c in range(max(r + 1, block[1][0]),
                                   min(len(s), block[1][1])):
                        idxsl_r.append(r)
                        idxsl_c.append(c)
                idxs = (np.array(idxsl_r), np.array(idxsl_c))
            with mp.Pool() as p:
                dists[idxs] = p.map(_distance_with_params,
                                    [(s[r], s[c], dist_opts)
                                     for c, r in zip(*idxs)])
                # pbar = tqdm(total=int((len(s)*(len(s)-1)/2)))
                # for r in range(len(s)):
                #     dists[r,r+1:len(s)] = p.map(distance, [(s[r],s[c], dist_opts) for c in range(r+1,len(cur))])
                #     pbar.update(len(s) - r - 1)
                # pbar.close()
        else:
            logger.info("Use serial computation")
            dists = np.zeros((len(s), len(s))) + large_value
            if block is None:
                it_r = range(len(s))
            else:
                it_r = range(block[0][0], block[0][1])
            if show_progress:
                it_r = tqdm(it_r)
            for r in it_r:
                if block is None:
                    it_c = range(r + 1, len(s))
                else:
                    it_c = range(max(r + 1, block[1][0]),
                                 min(len(s), block[1][1]))
                for c in it_c:
                    if abs(len(s[r]) - len(s[c])) <= max_length_diff:
                        dists[r, c] = distance(s[r], s[c], **dist_opts)
    return dists
Example #44
0
def Fst_predict(vector_lib,m_coeff,b,n_comp= 5,pop_max= 8,Iter= 20,bias_range= [20,300],Eigen= False, Scale= False,Centre= True,ploidy= 1):
    ### Select pre and post processing measures. 
    
    length_haps= vector_lib.shape[1]
        
    print('length haps: {}, N iterations: {}, range pops: {}'.format(length_haps,Iter,pop_max))
    
    #### Predict
    predicted= []

    #def controled_fsts(vector_lib,Eigen,length_haps,Scale,Center,N_pops,n_comp,Iter,N_sims,MixL,MixP,Pairs):
    lengths_vector= []

    ### store distances between centroids
    biased_pairwise= []

    ### store PC projection:
    dist_PC_corrected= {x:[] for x in range(n_comp)}

    ### store fsts
    fst_store= []


    ### proceed.

    for rep in range(Iter):
        
        N_pops= np.random.choice(range(3,pop_max),1,replace= False)[0]
        
        ## Population Sizes and labels
        bias_scheme= np.random.choice(range(bias_range[0],bias_range[1]),N_pops,replace= False)
        
        bias_labels= np.repeat(np.array([x for x in range(N_pops)]),bias_scheme)
        
        ### triangular matrices extract.
        iu1= np.triu_indices(N_pops,1) # for centroid comparison

        iu_bias= np.triu_indices(sum(bias_scheme),1)

        iu_control= np.triu_indices(2,1)

        Pops= np.random.choice(vector_lib.shape[0],N_pops,replace= False)
        #print('Iter: {}, vectors selected: {}, hap length: {}'.format(rep,Pops,length_haps))
        ########## FST

        freqs_selected= vector_lib[Pops,:length_haps]
        Pairwise= Ste.return_fsts2(freqs_selected)

        #fsts_compare = scale(Pairwise.fst)
        fsts_compare= Pairwise.fst
        
        fst_store.extend(fsts_compare)

        ## lengths
        lengths_vector.extend([length_haps] * len(fsts_compare))
        
        #### generate data and perform PCA
        data= []

        for k in range(N_pops):

            probs= vector_lib[Pops[k],:]

            m= bias_scheme[k]
            Haps= [[np.random.choice([ploidy,0],p= [1-probs[x],probs[x]]) for x in range(length_haps)] for acc in range(m)]

            data.extend(Haps)

        data2= np.array(data)

        if Scale:
            data2= scale(data2)

        pca = PCA(n_components=n_comp, whiten=False,svd_solver='randomized').fit(data2)

        feat_bias= pca.transform(data2)

        if Eigen:
            feat_bias= feat_bias * pca.explained_variance_ratio_

        #### Centroid distances
        
        bias_centroids= [np.mean(feat_bias[[y for y in range(feat_bias.shape[0]) if bias_labels[y] == z],:],axis= 0) for z in range(N_pops)]
        bias_centroids= np.array(bias_centroids)

        bias_pair_dist= pairwise_distances(bias_centroids,metric= 'euclidean')
        bias_pair_dist= bias_pair_dist[iu1]
        #bias_pair_dist= scale(bias_pair_dist)
        
        fst_pred= [np.exp(m_coeff*np.log(x) + b) for x in bias_pair_dist]
        predicted.extend(fst_pred)
        

    
    fig= [go.Scatter(
        x= fst_store,
        y= predicted,
        mode= 'markers'
    )]

    layout = go.Layout(
        title= 'test of prediction',
        yaxis=dict(
            title='predicted Fst'),
        xaxis=dict(
            title='observed Fst')
    )

    fig= go.Figure(data=fig, layout=layout)
    iplot(fig)
    def disthist_match(self, calls, pos):
        """
        """
        coll = []
        for _ in range(calls):
            bigb_data_idxs, bigb_data_classes = self.spc_batchfinder(
                self.bigbs)
            bigb_dict = {}
            for i, bigb_cls in enumerate(bigb_data_classes):
                if bigb_cls not in bigb_dict: bigb_dict[bigb_cls] = []
                bigb_dict[bigb_cls].append(i)

            bigbatch = self.storage[bigb_data_idxs]
            if self.low_proj_dim > 0:
                low_dim_proj = nn.Linear(bigbatch.shape[-1],
                                         self.low_proj_dim,
                                         bias=False)
                with torch.no_grad():
                    bigbatch = low_dim_proj(bigbatch)
            bigbatch = bigbatch.numpy()

            bigb_distmat_triu_idxs = np.triu_indices(len(bigbatch), 1)
            bigb_distvals = self.get_distmat(bigbatch)[bigb_distmat_triu_idxs]

            bigb_disthist_range, bigb_disthist_bins = (
                np.min(bigb_distvals), np.max(bigb_distvals)), 50
            bigb_disthist, _ = np.histogram(bigb_distvals,
                                            bins=bigb_disthist_bins,
                                            range=bigb_disthist_range)
            bigb_disthist = bigb_disthist / np.sum(bigb_disthist)

            bigb_mu = np.mean(bigbatch, axis=0)
            bigb_std = np.std(bigbatch, axis=0)

            cost_collect, bigb_idxs = [], []

            for _ in range(self.num_batch_comps):
                subset_idxs = [
                    np.random.choice(bigb_dict[np.random.choice(
                        list(bigb_dict.keys()))],
                                     self.samples_per_class,
                                     replace=False)
                    for _ in range(self.batch_size // self.samples_per_class)
                ]
                subset_idxs = [x for y in subset_idxs for x in y]
                # subset_idxs = sorted(np.random.choice(len(bigbatch), batch_size, replace=False))
                bigb_idxs.append(subset_idxs)
                subset = bigbatch[subset_idxs, :]
                subset_distmat = self.get_distmat(subset)

                subset_distmat_triu_idxs = np.triu_indices(
                    len(subset_distmat), 1)
                subset_distvals = self.get_distmat(
                    subset)[subset_distmat_triu_idxs]

                subset_disthist_range, subset_disthist_bins = (
                    np.min(subset_distvals), np.max(subset_distvals)), 50
                subset_disthist, _ = np.histogram(subset_distvals,
                                                  bins=bigb_disthist_bins,
                                                  range=bigb_disthist_range)
                subset_disthist = subset_disthist / np.sum(subset_disthist)

                subset_mu = np.mean(subset, axis=0)
                subset_std = np.std(subset, axis=0)

                dist_wd = wasserstein_distance(
                    bigb_disthist, subset_disthist) + wasserstein_distance(
                        subset_disthist, bigb_disthist)
                cost = np.linalg.norm(bigb_mu - subset_mu) + np.linalg.norm(
                    bigb_std - subset_std) + 75 * dist_wd
                cost_collect.append(cost)

            bigb_ix = bigb_idxs[np.argmin(cost_collect)]
            bigb_data_ix = bigb_data_idxs[bigb_ix]
            coll.append(bigb_data_ix)

        return coll
Example #46
0
def classification(Train_Graphs,
                   Test_Graphs,
                   Train_Labels,
                   Test_Labels,
                   FS_strategy=None,
                   K=None):
    """
	Classify (regular) unimodal connectomes and get performance scores (i.e, accuracy, sensitivity, specificity)

	Parameters:
	----------
	Train_Graphs : 3-D array with shape (~ (Fold-1)*N_m/Fold, n_m, n_m)

	Test_Graphs : 3-D array with shape (~ N_m/Fold, n_m, n_m)

	Train_Labels : 1-D label array with length ~ (Fold-1)*N_m/Fold

	Test_Labels : 1-D label array with length ~ N_m/Fold

	FS_strategy : "SNF", "Averaging" or None (default),
		Feature selection method to calculate the representative graphs from each class
		that are used to identify the most discriminative connectomic features.
		If 'SNF', the representative graphs are created with a graph fusion process
		(for more information, see "Similarity Netwok Fusion"). If "Averaging",
		the representative graphs are created by simply averaging graphs of each class.
		If "None", no feature selection method to apply and all connectomic features of
		graphs are used in classification (all upper off-diagonal elements of graphs).

	K : int or None (default),
		Number of most discriminative features set by user to be used in later classification.
		If "FS_strategy" is "SNF" or "Averaging", "K" should be a positive integer
		less than nt*(nt-1)/2. If "FS_strategy" is "None", then K should be "None" too.


	Return:
	-------
	out : array of 3 performance metrics (i.e., accuracy, sensitivity, specificity)
	"""

    if Train_Graphs.shape[1:] == Test_Graphs.shape[1:]:
        n = Train_Graphs.shape[1]
    else:
        raise ValueError(
            'Shapes of connectomes in "Train_Graphs" and "Test_Graphs" are different'
        )

    if FS_strategy in ['SNF', 'Averaging']:

        if K == None:
            raise ValueError('Provide a proper K for feature selection (FS)')

        elif 2 * K > n * (n - 1):
            raise ValueError('K is too large for the current graph size (n)')

    elif (FS_strategy == None) and (K != None):
        raise ValueError('K can be used with a feature selection (FS)')

    else:
        raise ValueError('Invalid "FS_strategy", use "SNF" or "Averaging"')

    if FS_strategy != None:

        print(f'\n\nFS: {FS_strategy}')
        indices = determine_features_to_select(Train_Graphs,
                                               Train_Labels,
                                               FS_strategy=FS_strategy,
                                               K=K)

        TR_for_SVM = np.array(
            [graph[indices[:, 0], indices[:, 1]] for graph in Train_Graphs])
        TST_for_SVM = np.array(
            [graph[indices[:, 0], indices[:, 1]] for graph in Test_Graphs])

    else:
        TR_for_SVM = np.array(
            [graph[np.triu_indices(n, 1)] for graph in Train_Graphs])
        TST_for_SVM = np.array(
            [graph[np.triu_indices(n, 1)] for graph in Test_Graphs])

    Test_Labels_Pred = predict_test_labels(TR_for_SVM, TST_for_SVM,
                                           Train_Labels)
    Scores = calculate_scores(Test_Labels, Test_Labels_Pred)

    return Scores  # Acc, Sens, Spec
Example #47
0
    #highlight = np.zeros(X_full.shape[0])
    #highlight[sorted(dense_idx)] = 1
    #
    #adata = AnnData(X=X_full)
    #adata.obs['highlight'] = highlight
    #sc.pp.neighbors(adata)
    #sc.tl.umap(adata)
    #sc.pl.scatter(
    #    adata, color='highlight', basis='umap',
    #    save='_{}_highlight_dense_all.png'.format(NAMESPACE)
    #)
    #exit()

    n_features = Xs[0].shape[0]
    n_correlations = int(comb(n_features, 2) + n_features)
    triu_idx = np.triu_indices(n_features)

    print(len(nonzero_idx))

    nonzero_tup = ([ ni[0] for ni in sorted(nonzero_idx) ],
                   [ ni[1] for ni in sorted(nonzero_idx) ])
    Xs_dimred = [
        X[nonzero_tup].A.flatten()
        for X in Xs
    ]

    #analyze_dense(Xs, Xs_dimred, sparsities, node_sizes)

    #srp = SparseRandomProjection(
    #    eps=0.1, random_state=69
    #).fit(ss.csr_matrix((len(Xs), n_correlations)))
Example #48
0
def triu_indices(*args, **kwargs):
    return tuple(map(torch.from_numpy, _np.triu_indices(*args, **kwargs)))
Example #49
0
    def setUp(self):
        # Let's consider a 100x100x10 km grid with an homogeneous velocity Vp=6
        # km/s
        self.Vp = 6

        # Seismic stations are all at z=0km, and placed every 2 km in x and y
        # directions, from 0 to 10 km positive z axis upwards
        self.sta = {}
        self.sta[1] = {'x': 0, 'y': 0, 'depth': 0, 'elev': 0, 'station': 1}
        self.sta[2] = {'x': 40, 'y': 0, 'depth': 0, 'elev': 0, 'station': 2}
        self.sta[3] = {'x': 80, 'y': 0, 'depth': 0, 'elev': 0, 'station': 3}
        self.sta[4] = {'x': 20, 'y': 20, 'depth': 0, 'elev': 0, 'station': 4}
        self.sta[5] = {'x': 60, 'y': 20, 'depth': 0, 'elev': 0, 'station': 5}
        self.sta[6] = {'x': 100, 'y': 20, 'depth': 0, 'elev': 0, 'station': 6}
        self.sta[7] = {'x': 0, 'y': 40, 'depth': 0, 'elev': 0, 'station': 7}
        self.sta[8] = {'x': 40, 'y': 40, 'depth': 0, 'elev': 0, 'station': 8}
        self.sta[9] = {'x': 80, 'y': 40, 'depth': 0, 'elev': 0, 'station': 9}
        self.sta[10] = {'x': 20, 'y': 60, 'depth': 0, 'elev': 0, 'station': 10}
        self.sta[11] = {'x': 60, 'y': 60, 'depth': 0, 'elev': 0, 'station': 11}
        self.sta[12] = {
            'x': 100,
            'y': 60,
            'depth': 0,
            'elev': 0,
            'station': 12
        }
        self.sta[13] = {'x': 0, 'y': 80, 'depth': 0, 'elev': 0, 'station': 13}
        self.sta[14] = {'x': 40, 'y': 80, 'depth': 0, 'elev': 0, 'station': 14}
        self.sta[15] = {'x': 80, 'y': 80, 'depth': 0, 'elev': 0, 'station': 15}
        self.sta[16] = {
            'x': 20,
            'y': 100,
            'depth': 0,
            'elev': 0,
            'station': 16
        }
        self.sta[17] = {
            'x': 60,
            'y': 100,
            'depth': 0,
            'elev': 0,
            'station': 17
        }
        self.sta[18] = {
            'x': 100,
            'y': 100,
            'depth': 0,
            'elev': 0,
            'station': 18
        }

        self.area = [0, 100, 0, 100, -10, 0]

        # Let's assume 5 seismic events occurring at the same place
        # (x=50,y=50,z=-5) but not at the same time
        self.cluster = [1, 2, 3, 4, 5]
        self.N = len(self.cluster)

        # Define true hypocentral parameters
        # positive z axis downwards
        self.locs_true = []
        self.locs_true.append({
            'x_mean':
            50.2,
            'y_mean':
            49.7,
            'z_mean':
            4.5,
            'o_time':
            utcdatetime.UTCDateTime('2010-01-01T12: 00: 00.0000Z')
        })
        self.locs_true.append({
            'x_mean':
            50.3,
            'y_mean':
            49.9,
            'z_mean':
            4.75,
            'o_time':
            utcdatetime.UTCDateTime('2010-01-01T12: 01: 00.0000Z')
        })
        self.locs_true.append({
            'x_mean':
            49.8,
            'y_mean':
            50.1,
            'z_mean':
            5.25,
            'o_time':
            utcdatetime.UTCDateTime('2010-01-01T12: 02: 00.0000Z')
        })
        self.locs_true.append({
            'x_mean':
            49.7,
            'y_mean':
            50.4,
            'z_mean':
            5.5,
            'o_time':
            utcdatetime.UTCDateTime('2010-01-01T12: 03: 00.0000Z')
        })
        self.locs_true.append({
            'x_mean':
            50.0,
            'y_mean':
            49.9,
            'z_mean':
            5,
            'o_time':
            utcdatetime.UTCDateTime('2010-01-01T12: 04: 00.0000Z')
        })

        centroid_x_true = np.mean([loc['x_mean'] for loc in self.locs_true])
        centroid_y_true = np.mean([loc['y_mean'] for loc in self.locs_true])
        centroid_z_true = np.mean([loc['z_mean'] for loc in self.locs_true])

        # Measured hypocentral parameters
        # positive z-axis downwards
        err_x = [0, 0, 0, 0, 0]
        err_y = [0, 0, 0, 0, 0]
        err_z = [0, 0, 0, 0, 0]
        err_to = [0, 0, 0, 0, 0]

        err_x = [0.2, 0.3, -0.2, -0.3, 0]
        err_y = [-0.3, -0.1, 0.1, 0.4, -0.1]
        err_z = [-0.5, -0.25, 0.25, 0.5, 0]
        err_to = [2, 4, -2, 1, -4]

        self.locs_mes = []
        for i in range(len(self.locs_true)):
            self.locs_mes.append({
                'x_mean':
                self.locs_true[i]['x_mean'] + err_x[i],
                'y_mean':
                self.locs_true[i]['y_mean'] + err_y[i],
                'z_mean':
                self.locs_true[i]['z_mean'] + err_z[i],
                'o_time':
                self.locs_true[i]['o_time'] + err_to[i]
            })

        centroid_x_mes = np.mean([loc['x_mean'] for loc in self.locs_mes])
        centroid_y_mes = np.mean([loc['y_mean'] for loc in self.locs_mes])
        centroid_z_mes = np.mean([loc['z_mean'] for loc in self.locs_mes])

        # Input parameters
        self.threshold = 0.8
        self.nbmin = 3

        # Compute the traveltimes and arrival times
        self.ttimes_true = {}
        self.atimes_true = {}
        self.ttimes_mes = {}
        self.atimes_mes = {}
        for staname in self.sta.keys():
            xsta = self.sta[staname]['x']
            ysta = self.sta[staname]['y']
            zsta = -self.sta[staname]['elev']  # positive z-axis downwards
            self.ttimes_true[staname] = []
            self.atimes_true[staname] = []
            self.ttimes_mes[staname] = []
            self.atimes_mes[staname] = []
            for j in range(self.N):
                d_true = np.sqrt((xsta - self.locs_true[j]['x_mean'])**2 +
                                 (ysta - self.locs_true[j]['y_mean'])**2 +
                                 (zsta - self.locs_true[j]['z_mean'])**2)
                self.ttimes_true[staname].append(d_true / self.Vp)
                self.atimes_true[staname].append(self.locs_true[j]['o_time'] +
                                                 self.ttimes_true[staname][j])
                d_mes = np.sqrt((xsta - self.locs_mes[j]['x_mean'])**2 +
                                (ysta - self.locs_mes[j]['y_mean'])**2 +
                                (zsta - self.locs_mes[j]['z_mean'])**2)
                self.ttimes_mes[staname].append(d_mes / self.Vp)
                self.atimes_mes[staname].append(self.locs_mes[j]['o_time'] +
                                                self.ttimes_mes[staname][j])

        self.coeff = {}
        self.delay = {}
        for staname in self.sta.keys():
            self.coeff[staname] = np.zeros((self.N, self.N))
            up_tr = np.triu_indices(self.N)
            self.coeff[staname][up_tr] = 1
            self.delay[staname] = np.zeros((self.N, self.N))
            for i in range(self.N):
                for j in range(i + 1, self.N):
                    self.delay[staname][i][j] = \
                        self.ttimes_true[staname][i] - \
                        self.ttimes_true[staname][j] + err_to[j]-err_to[i]

        self.locs_expected = []
        for i in range(len(self.locs_true)):
            self.locs_expected.append({
                'x_mean':
                self.locs_true[i]['x_mean'] +
                (centroid_x_mes - centroid_x_true),
                'y_mean':
                self.locs_true[i]['y_mean'] +
                (centroid_y_mes - centroid_y_true),
                'z_mean':
                self.locs_true[i]['z_mean'] +
                (centroid_z_mes - centroid_z_true),
                'o_time':
                self.locs_true[i]['o_time'] + np.mean(err_to)
            })
Example #50
0
    def stat_cb(self, stat_msg):

        data_t = stat_msg.data.copy().reshape(self.datagram_size, 1)
        # Rotate AUV trajectory to place wrt odom in the image
        data_t[2:5] = self.m2o_mat[0:3, 0:3].dot(data_t[2:5])
        data_t[5:8] = self.m2o_mat[0:3, 0:3].dot(data_t[5:8])
        data_t[8:11] = self.m2o_mat[0:3, 0:3].dot(data_t[8:11])

        # Reconstruct 3x3 covariance matrix
        # Not account for z values atm
        cov_mat = np.zeros((3, 3))
        cov_mat[np.triu_indices(3,
                                0)] = np.asarray(data_t[11:17]).reshape(1, 6)
        cov_mat[1, 0] = cov_mat[0, 1]
        cov_mat = (self.m2o_mat[0:3, 0:3].transpose().dot(cov_mat)).dot(
            self.m2o_mat[0:3, 0:3])
        data_t[11:17] = cov_mat[np.triu_indices(3)].reshape(6, 1)
        self.cov_traces.append(np.trace(cov_mat))

        self.filt_vec = np.hstack((self.filt_vec, data_t))
        self.filter_cnt += 1
        if self.filter_cnt > 0:

            plt.gcf().canvas.mpl_connect(
                'key_release_event',
                lambda event: [exit(0) if event.key == 'escape' else None])

            # Plot x,y from GT, odom and PF
            if False:
                plt.cla()
                #  Center image on odom frame
                plt.imshow(self.img,
                           extent=[
                               -647 - self.m2o_mat[0, 3],
                               1081 - self.m2o_mat[0, 3],
                               -1190 - self.m2o_mat[1, 3],
                               523 - self.m2o_mat[1, 3]
                           ])
                #  plt.imshow(self.img, extent=[-740, 980, -690, 1023])
                plt.plot(self.filt_vec[2, :], self.filt_vec[3, :], "-k")

                plt.plot(self.filt_vec[5, :], self.filt_vec[6, :], "-b")

                plt.plot(self.filt_vec[8, :], self.filt_vec[9, :], "-r")

                self.plot_covariance_ellipse(self.filt_vec[5:7, -1],
                                             self.filt_vec[11:17, -1])

            # Plot error between DR PF and GT
            if False:
                plt.subplot(3, 1, 1)
                plt.cla()
                plt.plot(
                    np.linspace(0, self.filter_cnt, self.filter_cnt),
                    np.sqrt(
                        np.sum((self.filt_vec[2:4, :] -
                                self.filt_vec[8:10, :])**2,
                               axis=0)), "-k")
                plt.grid(True)

                # Error between PF and GT
                plt.subplot(3, 1, 2)
                plt.cla()
                plt.plot(
                    np.linspace(0, self.filter_cnt, self.filter_cnt),
                    np.sqrt(
                        np.sum(
                            (self.filt_vec[2:4, :] - self.filt_vec[5:7, :])**2,
                            axis=0)), "-b")

                plt.grid(True)

                # Plot trace of cov matrix
                plt.subplot(3, 1, 3)
                plt.cla()
                plt.plot(np.linspace(0, self.filter_cnt, self.filter_cnt),
                         np.asarray(self.cov_traces), "-k")
                plt.grid(True)

            # Plot real pings vs expected meas
            if True:
                plt.subplot(1, 1, 1)
                plt.cla()
                plt.plot(self.pings_vec[:, 1], self.pings_vec[:, 2], "-k")
                plt.plot(self.pings_vec[:, 4], self.pings_vec[:, 5], "-b")

                # For debugging
                #  print (self.pings_vec[:, 2])
                #  print (self.pings_vec[:, 5])
                #  print (self.pings_vec[:, 2] - self.pings_vec[:, 5])
                #  print (np.linalg.norm(self.pings_vec[:, 2] - self.pings_vec[:, 5]))
                #  print(np.gradient(exp_mbes_ranges) - np.gradient(real_mbes_ranges))

                #  print(self.meas_cov)
                #  print (np.linalg.norm(exp_mbes_ranges - real_mbes_ranges))
                #  print (np.linalg.norm(np.gradient(real_mbes_ranges)
                #  - np.gradient(exp_mbes_ranges)))

                plt.grid(True)

            plt.pause(0.0001)

            if self.survey_finished:
                plt.savefig(self.survey_name + "_tracks.png")
Example #51
0
    def parse_roa(self):
        """
        Parse the :class:`~exatomic.core.tensor.Polarizability` dataframe. This will parse the
        output from the Raman Optical Activity outputs.

        Note:
            We generate a 3D tensor with the 2D tensor code. 3D tensors will have 3 rows labeled
            with the same name.
        """
        _reroa = 'roa begin'
        _reare = 'alpha real'
        _reaim = 'alpha im'
        #        _reombre = 'beta real'
        #        _reombim = 'beta im'
        _reombre = 'omega beta(real)'
        _reombim = 'omega beta(imag)'
        _redqre = 'dipole-quadrupole real (Cartesian)'
        _redqim = 'dipole-quadrupole imag (Cartesian)'

        if not self.find(_reroa):
            return
        found_2d = self.find(_reare,
                             _reaim,
                             _reombre,
                             _reombim,
                             keys_only=True)
        found_3d = self.find(_redqre, _redqim, keys_only=True)
        data = {}
        start = np.array(list(found_2d.values())).reshape(4, ) + 1
        end = np.array(list(found_2d.values())).reshape(4, ) + 10
        columns = ['x', 'val']
        data = [
            self.pandas_dataframe(s, e, columns) for s, e in zip(start, end)
        ]
        df = pd.concat([dat for dat in data]).reset_index(drop=True)
        df['grp'] = [i for i in range(4) for j in range(9)]
        df = df[['val', 'grp']]
        df = pd.DataFrame(
            df.groupby('grp').apply(
                lambda x: x.unstack().values[:-9]).values.tolist(),
            columns=['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz'])
        # find the electric dipole-quadrupole polarizability
        # NWChem gives this as a list of 18 values assuming the matrix to be symmetric
        # for our implementation we need to extend it to 27 elements
        # TODO: check that NWChem does assume that the 3D tensors are symmetric
        start = np.sort(np.array(list(found_3d.values())).reshape(2, )) + 1
        end = np.sort(np.array(list(found_3d.values())).reshape(2, )) + 19
        data = [
            self.pandas_dataframe(s, e, columns) for s, e in zip(start, end)
        ]
        df3 = pd.concat([dat for dat in data]).reset_index(drop=True)
        vals = df3['val'].values.reshape(2, 3, 6)
        adx = np.triu_indices(3)
        mat = np.zeros((2, 3, 3, 3))
        for i in range(2):
            for j in range(3):
                mat[i][j][adx] = vals[i][j]
                mat[i][j] = mat[i][j] + np.transpose(
                    mat[i][j]) - np.identity(3) * mat[i][j]
        mat = mat.reshape(18, 3)
        df3 = pd.DataFrame(mat, columns=['x', 'y', 'z'])
        df3['grp1'] = [i for i in range(2) for j in range(9)]
        df3['grp2'] = [j for i in range(2) for j in range(3) for n in range(3)]
        df3 = pd.DataFrame(
            df3.groupby([
                'grp1', 'grp2'
            ]).apply(lambda x: x.unstack().values[:-6]).values.tolist(),
            columns=['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz'],
            index=[
                'Ax_real', 'Ay_real', 'Az_real', 'Ax_imag', 'Ay_imag',
                'Az_imag'
            ])
        split_label = np.transpose([i.split('_') for i in df3.index.values])
        label = split_label[0]
        types = split_label[1]
        df['label'] = found_2d.keys()
        df['label'].replace(
            [_reare, _reombre, _reaim, _reombim],
            ['alpha-real', 'g_prime-real', 'alpha-imag', 'g_prime-imag'],
            inplace=True)
        df['type'] = [i.split('-')[-1] for i in df['label'].values]
        df['label'] = [i.split('-')[0] for i in df['label'].values]
        df['frame'] = np.repeat([0], len(df.index))
        df3['label'] = label
        df3['type'] = types
        df3['frame'] = np.repeat([0], len(df3.index))
        self.roa = pd.concat([df, df3], ignore_index=True)
Example #52
0
 def symmetry(self) -> Tuple[Real, ...]:
     indices = zip(*np.triu_indices(self.dimension, 1))
     return tuple(
         self._graph.edges.get(i, {"weight": 2})["weight"] for i in indices)
Example #53
0
def _bs_three_point(cols):
    """deconvolution function for three_point.

    Parameters
    ----------
    cols : int
        width of the image
    """

    # function Eq. (7)  for j >= i
    def I0diag(i, j):
        return np.log((np.sqrt((2 * j + 1)**2 - 4 * i**2) + 2 * j + 1) /
                      (2 * j)) / (2 * np.pi)

    # j > i
    def I0(i, j):
        return np.log(
            ((np.sqrt((2 * j + 1)**2 - 4 * i**2) + 2 * j + 1)) /
            (np.sqrt((2 * j - 1)**2 - 4 * i**2) + 2 * j - 1)) / (2 * np.pi)

    # i = j  NB minus -2I_ij typo in Dasch paper
    def I1diag(i, j):
        return np.sqrt(
            (2 * j + 1)**2 - 4 * i**2) / (2 * np.pi) - 2 * j * I0diag(i, j)

    # j > i
    def I1(i, j):
        return (np.sqrt((2 * j + 1)**2 - 4 * i**2) - np.sqrt(
            (2 * j - 1)**2 - 4 * i**2)) / (2 * np.pi) - 2 * j * I0(i, j)

    D = np.zeros((cols, cols))

    # matrix indices ------------------
    # i = j
    I, J = np.diag_indices(cols)
    I = I[1:]
    J = J[1:]  # drop special cases (0,0), (0,1)

    # j = i - 1
    Ib, Jb = I, J - 1

    # j = i + 1
    Iu, Ju = I - 1, J
    Iu = Iu[1:]  # drop special case (0, 1)
    Ju = Ju[1:]

    # j > i + 1
    Iut, Jut = np.triu_indices(cols, k=2)
    Iut = Iut[1:]  # drop special case (0, 2)
    Jut = Jut[1:]

    # D operator matrix ------------------
    # j = i - 1
    D[Ib, Jb] = I0diag(Ib, Jb + 1) - I1diag(Ib, Jb + 1)

    # j = i
    D[I, J] = I0(I, J + 1) - I1(I, J + 1) + 2 * I1diag(I, J)

    # j = i + 1
    D[Iu, Ju] = I0(Iu, Ju+1) - I1(Iu, Ju+1) + 2*I1(Iu, Ju) -\
                I0diag(Iu, Ju-1) - I1diag(Iu, Ju-1)

    # j > i + 1
    D[Iut, Jut] = I0(Iut, Jut+1) - I1(Iut, Jut+1) + 2*I1(Iut, Jut) -\
                  I0(Iut, Jut-1) - I1(Iut, Jut-1)

    # special cases (that switch between I0, I1 cases)
    D[0, 2] = I0(0, 3) - I1(0, 3) + 2 * I1(0, 2) - I0(0, 1) - I1(0, 1)
    D[0, 1] = I0(0, 2) - I1(0, 2) + 2 * I1(0, 1) - 1 / np.pi
    D[0, 0] = I0(0, 1) - I1(0, 1) + 1 / np.pi

    return D
Example #54
0
 def base_angles(self) -> np.ndarray:
     return self._primal_gramian[np.triu_indices(self.dimension, 1)].copy()
Example #55
0
def test_cov_estimation_on_raw(method, tmpdir):
    """Test estimation from raw (typically empty room)."""
    if method == 'shrunk':
        try:
            import sklearn  # noqa: F401
        except Exception as exp:
            pytest.skip('sklearn is required, got %s' % (exp, ))
    raw = read_raw_fif(raw_fname, preload=True)
    cov_mne = read_cov(erm_cov_fname)
    method_params = dict(shrunk=dict(shrinkage=[0]))

    # The pure-string uses the more efficient numpy-based method, the
    # the list gets triaged to compute_covariance (should be equivalent
    # but use more memory)
    with pytest.warns(None):  # can warn about EEG ref
        cov = compute_raw_covariance(raw,
                                     tstep=None,
                                     method=method,
                                     rank='full',
                                     method_params=method_params)
    assert_equal(cov.ch_names, cov_mne.ch_names)
    assert_equal(cov.nfree, cov_mne.nfree)
    assert_snr(cov.data, cov_mne.data, 1e6)

    # test equivalence with np.cov
    cov_np = np.cov(raw.copy().pick_channels(cov['names']).get_data(), ddof=1)
    if method != 'shrunk':  # can check all
        off_diag = np.triu_indices(cov_np.shape[0])
    else:
        # We explicitly zero out off-diag entries between channel types,
        # so let's just check MEG off-diag entries
        off_diag = np.triu_indices(
            len(pick_types(raw.info, meg=True, exclude=())))
    for other in (cov_mne, cov):
        assert_allclose(np.diag(cov_np), np.diag(other.data), rtol=5e-6)
        assert_allclose(cov_np[off_diag], other.data[off_diag], rtol=4e-3)
        assert_snr(cov.data, other.data, 1e6)

    # tstep=0.2 (default)
    with pytest.warns(None):  # can warn about EEG ref
        cov = compute_raw_covariance(raw,
                                     method=method,
                                     rank='full',
                                     method_params=method_params)
    assert_equal(cov.nfree, cov_mne.nfree - 120)  # cutoff some samples
    assert_snr(cov.data, cov_mne.data, 170)

    # test IO when computation done in Python
    cov.save(tmpdir.join('test-cov.fif'))  # test saving
    cov_read = read_cov(tmpdir.join('test-cov.fif'))
    assert cov_read.ch_names == cov.ch_names
    assert cov_read.nfree == cov.nfree
    assert_array_almost_equal(cov.data, cov_read.data)

    # test with a subset of channels
    raw_pick = raw.copy().pick_channels(raw.ch_names[:5])
    raw_pick.info.normalize_proj()
    cov = compute_raw_covariance(raw_pick,
                                 tstep=None,
                                 method=method,
                                 rank='full',
                                 method_params=method_params)
    assert cov_mne.ch_names[:5] == cov.ch_names
    assert_snr(cov.data, cov_mne.data[:5, :5], 5e6)
    cov = compute_raw_covariance(raw_pick,
                                 method=method,
                                 rank='full',
                                 method_params=method_params)
    assert_snr(cov.data, cov_mne.data[:5, :5], 90)  # cutoff samps
    # make sure we get a warning with too short a segment
    raw_2 = read_raw_fif(raw_fname).crop(0, 1)
    with pytest.warns(RuntimeWarning, match='Too few samples'):
        cov = compute_raw_covariance(raw_2,
                                     method=method,
                                     method_params=method_params)
    # no epochs found due to rejection
    pytest.raises(ValueError,
                  compute_raw_covariance,
                  raw,
                  tstep=None,
                  method='empirical',
                  reject=dict(eog=200e-6))
    # but this should work
    with pytest.warns(None):  # sklearn
        cov = compute_raw_covariance(raw.copy().crop(0, 10.),
                                     tstep=None,
                                     method=method,
                                     reject=dict(eog=1000e-6),
                                     method_params=method_params,
                                     verbose='error')
Example #56
0
def computeParameters(neighbors, potential_config):
    # First we need a list of every unique combination of
    # two neighbors, not considering [0, 1] to be unique
    # compared to [1, 0]. More specifically, a different
    # order does not make the pair unique.
    length = neighbors.shape[0]

    grid = np.mgrid[0:length, 0:length]
    grid = grid.swapaxes(0, 2).swapaxes(0, 1)
    m = grid.shape[0]
    r, c = np.triu_indices(m, 1)
    combinations = grid[r, c]

    left_array = neighbors[combinations[:, 0]]
    right_array = neighbors[combinations[:, 1]]

    # Now we use these pairs of vectors to compute and array of
    # cos(theta) values. Strangely enough, this appears to be the
    # fastest way to do this.
    dot_products = np.einsum('ij,ij->i', left_array, right_array)

    # This is the magnitude of all of the vectors in the left array.
    left_magnitudes = np.linalg.norm(left_array, axis=1)

    # This is the magnitude of all of the vectors in the right array.
    right_magnitudes = np.linalg.norm(right_array, axis=1)

    # The following two lines are essentially computing
    # (r_i * r_j) / (|r_i||r_j|) where '*' denotes the dot product.
    magnitude_products = left_magnitudes * right_magnitudes
    angular_values = dot_products / magnitude_products

    # Here we skip some steps and just add an array of 1.0 onto
    # the array of cos(theta) values. This is for all cases where
    # i = j, so we know for a fact that theta = 0 and cos(theta) = 1.0
    dupl_indices = np.arange(0, length, 1)
    dupl_magnitudes = np.linalg.norm(neighbors[dupl_indices], axis=1)
    angular_values = np.concatenate((angular_values, np.tile([1.0], length)))

    # angular values now holds an array of cos(theta_ijk) for all unique i, j.

    # Next, we need to compute and array of radial terms for each r0 value.
    s2 = 1.0 / (potential_config.gi_sigma**2)

    # This is an array of all radial terms for
    # all values of r0.
    radial_terms = []

    # These operations are not done inside of the subsequent loop, because
    # their values do not vary with respect to r0. It is worth noting that
    # you could do this inside of the loop without any slowdown, but that is
    # because numpy will cache the values and does not compute them again when
    # it doesn't need to.

    # The computation involving tanh at the end of the cutoff function
    # terms is just a mathematical way of making fc be zero if r > rc.
    # Adding an if statement would require numpy to jump out of c code and
    # in to python code in order to evaluate it. This would significantly
    # slow down the operation. (During testing slowdown was 50 - 100 times)
    # see https://www.desmos.com/calculator/puz9hpi090
    # This has been thoroughly tested against some c code that uses an if
    # statement. The results are bitwise identical for 9 large test cases.
    d4 = np.square(np.square(potential_config.truncation_distance))
    left_r_rc_unmodified = left_magnitudes - potential_config.cutoff_distance
    left_r_rc_terms = np.square(np.square(left_r_rc_unmodified))
    left_fc = (left_r_rc_terms / (d4 + left_r_rc_terms))
    left_fc *= (0.5 * np.tanh(-1e6 * (left_r_rc_unmodified)) + 0.5)

    right_r_rc_unmodified = right_magnitudes - potential_config.cutoff_distance
    right_r_rc_terms = np.square(np.square(right_r_rc_unmodified))
    right_fc = (right_r_rc_terms / (d4 + right_r_rc_terms))
    right_fc *= (0.5 * np.tanh(-1e6 * (right_r_rc_unmodified)) + 0.5)

    r_rc_unmodified = dupl_magnitudes - potential_config.cutoff_distance
    r_rc_terms = np.square(np.square(r_rc_unmodified))
    fc = (r_rc_terms / (d4 + r_rc_terms))
    fc *= (0.5 * np.tanh(-1e6 * (r_rc_unmodified)) + 0.5)

    # Here we calculate the radial term for all values of r0.
    for r0n in potential_config.r0:

        # The left_* and right_* arrays correspond to cases where
        # r_i != r_j. In these cases, we need to calculate both of
        # the functions (f) independently.
        left_term = np.exp(-s2 * np.square(left_magnitudes - r0n))
        full_left_term = left_term * left_fc

        right_term = np.exp(-s2 * np.square(right_magnitudes - r0n))
        full_right_term = right_term * right_fc

        # These two arrays correspond to cases where r_i = r_j and we
        # know that we just need to square the value of the function
        # (f) after computing it once.
        term = np.exp(-s2 * np.square(dupl_magnitudes - r0n))
        full_term = term * fc

        # In this statement, we multiply the radial term by 2, because
        # cases where r_i != r_j are supposed to be repeated, with the
        # vectors swapped. Since the function we are computing on them
        # is commutative, we  can just compute one case of r_i != r_j
        # and double it to account for the case where r_i is swapped with
        # r_j. This cuts the computation time in half.
        to_add = np.concatenate(
            (2 * full_right_term * full_left_term, np.square(full_term)))
        radial_terms.append(to_add)

    # Now radial_terms is an array where each first index corresponds
    # to an r0 value and each second index corresponds to the product
    # of the radial terms for a unique combination of neighbors.

    # For each r0 and for each combination of neigbors, we now
    # Need to compute the m-th Legendre polynomial of the cosine
    # of the angle between the two.

    # This uses the recursive definition of the Legendre Polynomials
    # in order to generalize to any specified order in the nn file.

    max_pm = max(potential_config.legendre_orders)

    legendre_polynomials = np.zeros((max_pm + 1, len(angular_values)))
    legendre_polynomials[0] = np.ones(len(angular_values))
    legendre_polynomials[1] = angular_values

    for order in range(1, max_pm):
        current_pm = (2 * order +
                      1) * angular_values * legendre_polynomials[order]
        current_pm -= order * legendre_polynomials[order - 1]
        current_pm /= (order + 1)
        legendre_polynomials[order + 1] = current_pm

    # Now we multiply the Legendre Polynomial terms by the radial terms and
    # sum them. This also selects the desired legendre polynomials from the
    # list of those computed. Since the recursive definition is used, legendre
    # polynomials may be computed that aren't actually used in the final
    # result.
    len_pm = len(potential_config.legendre_orders)
    structural_parameters = np.zeros(len_pm * len(potential_config.r0))
    idx = 0
    for order in potential_config.legendre_orders:
        for r0n in radial_terms:
            current_param = np.sum(legendre_polynomials[order] * r0n)
            structural_parameters[idx] = current_param
            idx += 1

    # The following lines exist to adhere to a combination of the
    # hyperparameter definition in the network potential file and the
    # configuration values specified in the config file.
    sp = structural_parameters
    sp /= np.square(np.tile(potential_config.r0, len_pm))

    if potential_config.gi_mode == 5:
        return np.arcsinh(sp)
    else:
        return sp
    def build_all_commutators(self, excitations_list, hopping_operators,
                              type_of_commutativities):
        """Building all commutators for Q, W, M, V matrices.

        Args:
            excitations_list (list): single excitations list + double excitation list
            hopping_operators (dict): all hopping operators based on excitations_list,
                                      key is the string of single/double excitation; value is corresponding operator.
            type_of_commutativities: if tapering is used, it records the commutativities of hopping operators with the
                                     Z2 symmetries found in the original operator.
        Returns:
            dict: key: a string of matrix indices; value: the commutators for Q matrix
            dict: key: a string of matrix indices; value: the commutators for W matrix
            dict: key: a string of matrix indices; value: the commutators for M matrix
            dict: key: a string of matrix indices; value: the commutators for V matrix
            int: number of entries in the matrix
        """
        size = len(excitations_list)
        m_commutators = np.empty((size, size), dtype=object)
        v_commutators = np.empty((size, size), dtype=object)
        q_commutators = np.empty((size, size), dtype=object)
        w_commutators = np.empty((size, size), dtype=object)
        # get all to-be-processed index
        if self._is_eom_matrix_symmetric:
            mus, nus = np.triu_indices(size)
        else:
            mus, nus = np.indices((size, size))
            mus = np.asarray(mus.flat)
            nus = np.asarray(nus.flat)

        def _build_one_sector(available_hopping_ops):

            to_be_computed_list = []
            for idx in range(len(mus)):
                mu = mus[idx]
                nu = nus[idx]
                left_op = available_hopping_ops.get(
                    '_'.join([str(x) for x in excitations_list[mu]]), None)
                right_op_1 = available_hopping_ops.get(
                    '_'.join([str(x) for x in excitations_list[nu]]), None)
                right_op_2 = available_hopping_ops.get(
                    '_'.join([str(x) for x in reversed(excitations_list[nu])]),
                    None)
                to_be_computed_list.append(
                    (mu, nu, left_op, right_op_1, right_op_2))

            if logger.isEnabledFor(logging.INFO):
                logger.info("Building all commutators:")
                TextProgressBar(sys.stderr)
            results = parallel_map(
                QEquationOfMotion._build_commutator_rountine,
                to_be_computed_list,
                task_args=(self._untapered_op, self._z2_symmetries))
            for result in results:
                mu, nu, q_mat_op, w_mat_op, m_mat_op, v_mat_op = result
                q_commutators[mu][
                    nu] = op_converter.to_tpb_grouped_weighted_pauli_operator(
                        q_mat_op,
                        TPBGroupedWeightedPauliOperator.sorted_grouping
                    ) if q_mat_op is not None else q_commutators[mu][nu]
                w_commutators[mu][
                    nu] = op_converter.to_tpb_grouped_weighted_pauli_operator(
                        w_mat_op,
                        TPBGroupedWeightedPauliOperator.sorted_grouping
                    ) if w_mat_op is not None else w_commutators[mu][nu]
                m_commutators[mu][
                    nu] = op_converter.to_tpb_grouped_weighted_pauli_operator(
                        m_mat_op,
                        TPBGroupedWeightedPauliOperator.sorted_grouping
                    ) if m_mat_op is not None else m_commutators[mu][nu]
                v_commutators[mu][
                    nu] = op_converter.to_tpb_grouped_weighted_pauli_operator(
                        v_mat_op,
                        TPBGroupedWeightedPauliOperator.sorted_grouping
                    ) if v_mat_op is not None else v_commutators[mu][nu]

        available_entry = 0
        if not self._z2_symmetries.is_empty():
            for targeted_tapering_values in itertools.product(
                [1, -1], repeat=len(self._z2_symmetries.symmetries)):
                logger.info("In sector: ({})".format(','.join(
                    [str(x) for x in targeted_tapering_values])))
                # remove the excited operators which are not suitable for the sector
                available_hopping_ops = {}
                targeted_sector = (np.asarray(targeted_tapering_values) == 1)
                for key, value in type_of_commutativities.items():
                    value = np.asarray(value)
                    if np.all(value == targeted_sector):
                        available_hopping_ops[key] = hopping_operators[key]
                _build_one_sector(available_hopping_ops)
                available_entry += len(available_hopping_ops) * len(
                    available_hopping_ops)
        else:
            available_hopping_ops = hopping_operators
            _build_one_sector(available_hopping_ops)
            available_entry = len(available_hopping_ops) * len(
                available_hopping_ops)

        return q_commutators, w_commutators, m_commutators, v_commutators, available_entry
Example #58
0
def local_variation(data):
    r"""
    Calculates the local variaiont of inter-contact times. [LV-1]_, [LV-2]_

    Parameters
    ----------

    data : array, dict
        This is either (1) temporal network input (graphlet or contact) with nettype: 'bu', 'bd'.
        (2) dictionary of ICTs (output of *intercontacttimes*).


    Returns
    -------
    LV : array
        Local variation per edge.


    Notes
    ------

    The local variation is like the bursty coefficient and quantifies if a series of inter-contact times are periodic, random or Poisson distributed or bursty.

    It is defined as:

    .. math:: LV = {3 \over {n-1}}\sum_{i=1}^{n-1}{{{\iota_i - \iota_{i+1}} \over {\iota_i + \iota_{i+1}}}^2}

    Where :math:`\iota` are inter-contact times and i is the index of the inter-contact time (not a node index).
    n is the number of events, making n-1 the number of inter-contact times.

    The possible range is: :math:`0 \geq LV \gt 3`.

    When periodic, LV=0, Poisson, LV=1 Larger LVs indicate bursty process.


    Examples
    ---------

    First import all necessary packages

    >>> import teneto
    >>> import numpy as np

    Now create 2 temporal network of 2 nodes and 60 time points. The first has periodict edges, repeating every other time-point:

    >>> G_periodic = np.zeros([2, 2, 60])
    >>> ts_periodic = np.arange(0, 60, 2)
    >>> G_periodic[:,:,ts_periodic] = 1

    The second has a more bursty pattern of edges:

    >>> ts_bursty = [1, 8, 9, 32, 33, 34, 39, 40, 50, 51, 52, 55]
    >>> G_bursty = np.zeros([2, 2, 60])
    >>> G_bursty[:,:,ts_bursty] = 1

    Now we call local variation for each edge.

    >>> LV_periodic = teneto.networkmeasures.local_variation(G_periodic)
    >>> LV_periodic
    array([[nan,  0.],
           [ 0., nan]])

    Above we can see that between node 0 and 1, LV=0 (the diagonal is nan).
    This is indicative of a periodic contacts (which is what we defined).
    Doing the same for the second example:

    >>> LV_bursty = teneto.networkmeasures.local_variation(G_bursty)
    >>> LV_bursty
    array([[       nan, 1.28748748],
           [1.28748748,        nan]])

    When the value is greater than 1, it indicates a bursty process.

    nans are returned if there are no intercontacttimes

    References
    ----------

    .. [LV-1]

        Shinomoto et al (2003)
        Differences in spiking patterns among cortical neurons.
        Neural Computation 15.12
        [`Link <https://www.mitpressjournals.org/doi/abs/10.1162/089976603322518759>`_]

    .. [LV-2]

        Followed eq., 4.34 in Masuda N & Lambiotte (2016)
        A guide to temporal networks. World Scientific.
        Series on Complex Networks. Vol 4
        [`Link <https://www.worldscientific.com/doi/abs/10.1142/9781786341150_0001>`_]

    """
    ict = 0  # are ict present
    if isinstance(data, dict):
        # This could be done better
        if [k for k in list(data.keys())
                if k == 'intercontacttimes'] == ['intercontacttimes']:
            ict = 1
    # if shortest paths are not calculated, calculate them
    if ict == 0:
        data = intercontacttimes(data)

    if data['nettype'][1] == 'u':
        ind = np.triu_indices(data['intercontacttimes'].shape[0], k=1)
    if data['nettype'][1] == 'd':
        triu = np.triu_indices(data['intercontacttimes'].shape[0], k=1)
        tril = np.tril_indices(data['intercontacttimes'].shape[0], k=-1)
        ind = [[], []]
        ind[0] = np.concatenate([tril[0], triu[0]])
        ind[1] = np.concatenate([tril[1], triu[1]])
        ind = tuple(ind)

    ict_shape = data['intercontacttimes'].shape

    lv = np.zeros(ict_shape)

    for n in range(len(ind[0])):
        icts = data['intercontacttimes'][ind[0][n], ind[1][n]]
        # make sure there is some contact
        if icts is not None:
            lv_nonnorm = np.sum(
                np.power((icts[:-1] - icts[1:]) / (icts[:-1] + icts[1:]), 2))
            lv[ind[0][n], ind[1][n]] = (3 / len(icts)) * lv_nonnorm
        else:
            lv[ind[0][n], ind[1][n]] = np.nan

    # Make symetric if undirected
    if data['nettype'][1] == 'u':
        lv = lv + lv.transpose()

    for n in range(lv.shape[0]):
        lv[n, n] = np.nan

    return lv
Example #59
0
def Euc_to_fst(vector_lib,n_comp= 5,pop_max= 8,Iter= 20,bias_range= [20,300],Eigen= False, Scale= False,Centre= True,ploidy= 1):
    ### Select pre and post processing measures. 
        
    length_haps= vector_lib.shape[1]
    
    Iter= 20 # repeats
    
    #### Predict
    predicted= []

    #def controled_fsts(vector_lib,Eigen,length_haps,Scale,Center,N_pops,n_comp,Iter,N_sims,MixL,MixP,Pairs):
    lengths_vector= []

    ### store distances between centroids
    biased_pairwise= []

    ### store PC projection:
    dist_PC_corrected= {x:[] for x in range(n_comp)}

    ### store fsts
    fst_store= []


    ### proceed.

    for rep in range(Iter):
        clear_output()
        
        N_pops= np.random.choice(range(3,pop_max),1,replace= False)[0]
        
        ## Population Sizes and labels
        bias_scheme= np.random.choice(range(bias_range[0],bias_range[1]),N_pops,replace= False)
        
        bias_labels= np.repeat(np.array([x for x in range(N_pops)]),bias_scheme)
        
        ### triangular matrices extract.
        iu1= np.triu_indices(N_pops,1) # for centroid comparison

        iu_bias= np.triu_indices(sum(bias_scheme),1)

        iu_control= np.triu_indices(2,1)

        Pops= np.random.choice(vector_lib.shape[0],N_pops,replace= False)
        print('Iter: {}, vectors selected: {}, hap length: {}'.format(rep,Pops,length_haps))
        ########## FST

        freqs_selected= vector_lib[Pops,:length_haps]
        Pairwise= Ste.return_fsts2(freqs_selected)

        #fsts_compare = scale(Pairwise.fst)
        fsts_compare= Pairwise.fst
        
        fst_store.extend(fsts_compare)

        ## lengths
        lengths_vector.extend([length_haps] * len(fsts_compare))
        
        #### generate data and perform PCA
        data= []

        for k in range(N_pops):

            probs= vector_lib[Pops[k],:]

            m= bias_scheme[k]
            Haps= [[np.random.choice([ploidy,0],p= [1-probs[x],probs[x]]) for x in range(length_haps)] for acc in range(m)]

            data.extend(Haps)

        data2= np.array(data)

        if Scale:
            data2= scale(data2)

        pca = PCA(n_components=n_comp, whiten=False,svd_solver='randomized').fit(data2)

        feat_bias= pca.transform(data2)

        if Eigen:
            feat_bias= feat_bias * pca.explained_variance_ratio_

        #### Centroid distances
        
        bias_centroids= [np.mean(feat_bias[[y for y in range(feat_bias.shape[0]) if bias_labels[y] == z],:],axis= 0) for z in range(N_pops)]
        bias_centroids= np.array(bias_centroids)

        bias_pair_dist= pairwise_distances(bias_centroids,metric= 'euclidean')
        bias_pair_dist= bias_pair_dist[iu1]
        #bias_pair_dist= scale(bias_pair_dist)
        
        biased_pairwise.extend(bias_pair_dist)

    
    Size= length_haps
    fst_lm_range= [0,.3]
    
    Lindexes= [x for x in range(len(lengths_vector)) if lengths_vector[x] == Size and fst_store[x] >= fst_lm_range[0] and fst_store[x] <= fst_lm_range[1]]
    y_true= [np.log(biased_pairwise[x]) for x in Lindexes]
    fst_x= [np.log(fst_store[x]) for x in Lindexes]
    m_coeff,b= np.polyfit(y_true,fst_x,1)
    
    return m_coeff, b, biased_pairwise, fst_x, y_true
train_label = train_label["all_label"]
train_label = np.reshape(train_label, (-1, ))

# Calculate edges for all parcellations for training data
num_sub = np.shape(train_mats_aal)[2]
num_node_aal = np.shape(train_mats_aal)[0]
num_edge_aal = num_node_aal * (num_node_aal - 1) // 2
num_node_ho = np.shape(train_mats_ho)[0]
num_edge_ho = num_node_ho * (num_node_ho - 1) // 2
num_node_cc = np.shape(train_mats_cc)[0]
num_edge_cc = num_node_cc * (num_node_cc - 1) // 2

train_edges = np.zeros([num_edge_aal + num_edge_ho + num_edge_cc, num_sub])

for i_sub in range(num_sub):
    iu_aal = np.triu_indices(num_node_aal, 1)
    iu_ho = np.triu_indices(num_node_ho, 1)
    iu_cc = np.triu_indices(num_node_cc, 1)
    train_edges[0:num_edge_aal, i_sub] = train_mats_aal[iu_aal[0], iu_aal[1],
                                                        i_sub]
    train_edges[num_edge_aal:(num_edge_aal + num_edge_ho),
                i_sub] = train_mats_ho[iu_ho[0], iu_ho[1], i_sub]
    train_edges[(num_edge_aal + num_edge_ho):(num_edge_aal + num_edge_ho +
                                              num_edge_cc),
                i_sub] = train_mats_cc[iu_cc[0], iu_cc[1], i_sub]

train_edges_aal = train_edges[0:num_edge_aal, ]
train_edges_ho = train_edges[num_edge_aal:(num_edge_aal + num_edge_ho), ]
train_edges_cc = train_edges[(num_edge_aal +
                              num_edge_ho):(num_edge_aal + num_edge_ho +
                                            num_edge_cc), ]