Example #1
0
 def __init__(self, value):
     self.value = value
     self.value_ = np.linalg.cholesky(value)[np.tril_indices(value.shape[0])]
     self.shape = value.shape
     self.size = value.size
     self.free = np.resize(True, self.value_.shape)
     self.to_external = lambda val: np.linalg.cholesky(val)[np.tril_indices(self.shape[0])]
Example #2
0
def actor_critic_update(X0, A, R, X1, gamma=0.99, learnrate=0.0001):
    global WP1, WP2, WV_A, WV_b, WV
    global grad_WP1, grad_WP2, grad_N
    global grad_WP1s, grad_WP2s
    global noise_cov
    S0 = XtoS(X0)
    S1 = XtoS(X1)
    SS0 = np.outer(S0, S0)[np.tril_indices(7)]
    SS1 = np.outer(S1, S1)[np.tril_indices(7)]

    deltaV = SS0 - gamma * SS1
    WV_A += np.outer(deltaV, deltaV)
    WV_b += R * deltaV
    WV = np.linalg.solve(WV_A, WV_b)

    advantage = R + gamma * np.dot(SS1, WV) - np.dot(SS0, WV)
    g1 = -0.5*np.outer(A, A) * advantage
    grad_WP1 += g1
    grad_WP1s += g1*g1
    g2 = np.outer(A, SS0) * advantage
    grad_WP2 += g2
    grad_WP2s += g2*g2
    grad_N += 1
    # if X0[4] > 4:
    #    print 'V(s0)', np.dot(SS0, WV), 'V(s1)', np.dot(SS1, WV), 'R', R
    #    print 'A', A, 'adv', advantage # , 'g_wp1\n', grad_WP1, 'g_wp2\n', grad_WP2
    noise_cov = np.linalg.inv(np.linalg.cholesky(
        WP1 + 1e-2 + np.eye(2))).T
Example #3
0
    def test_frozen(self):
        # Test that the frozen and non-frozen inverse Wishart gives the same
        # answers

        # Construct an arbitrary positive definite scale matrix
        dim = 4
        scale = np.diag(np.arange(dim)+1)
        scale[np.tril_indices(dim, k=-1)] = np.arange(dim*(dim-1)/2)
        scale = np.dot(scale.T, scale)

        # Construct a collection of positive definite matrices to test the PDF
        X = []
        for i in range(5):
            x = np.diag(np.arange(dim)+(i+1)**2)
            x[np.tril_indices(dim, k=-1)] = np.arange(dim*(dim-1)/2)
            x = np.dot(x.T, x)
            X.append(x)
        X = np.array(X).T

        # Construct a 1D and 2D set of parameters
        parameters = [
            (10, 1, np.linspace(0.1, 10, 5)),  # 1D case
            (10, scale, X)
        ]

        for (df, scale, x) in parameters:
            iw = invwishart(df, scale)
            assert_equal(iw.var(), invwishart.var(df, scale))
            assert_equal(iw.mean(), invwishart.mean(df, scale))
            assert_equal(iw.mode(), invwishart.mode(df, scale))
            assert_allclose(iw.pdf(x), invwishart.pdf(x, df, scale))
Example #4
0
def amplitudes_to_cisdvec(c0, c1, c2):
    nocc, nvir = c1.shape
    ooidx = numpy.tril_indices(nocc, -1)
    vvidx = numpy.tril_indices(nvir, -1)
    c2tril = lib.take_2d(c2.reshape(nocc**2,nvir**2),
                         ooidx[0]*nocc+ooidx[1], vvidx[0]*nvir+vvidx[1])
    return numpy.hstack((c0, c1.ravel(), c2tril.ravel()))
Example #5
0
    def find_stationary_var(amat=None, bmat=None, cmat=None):
        """Find fixed point of H = CC' + AHA' + BHB' given A, B, C.

        Parameters
        ----------
        amat, bmat, cmat : (nstocks, nstocks) arrays
            Parameter matrices

        Returns
        -------
        (nstocks, nstocks) array
            Unconditional variance matrix

        """
        nstocks = amat.shape[0]
        kwargs = {'amat': amat, 'bmat': bmat, 'ccmat': cmat.dot(cmat.T)}
        fun = partial(ParamGeneric.fixed_point, **kwargs)
        try:
            with np.errstate(divide='ignore', invalid='ignore'):
                hvar = np.eye(nstocks)
                sol = sco.fixed_point(fun, hvar[np.tril_indices(nstocks)])
                hvar[np.tril_indices(nstocks)] = sol
                hvar[np.triu_indices(nstocks, 1)] \
                    = hvar.T[np.triu_indices(nstocks, 1)]
                return hvar
        except RuntimeError:
            # warnings.warn('Could not find stationary varaince!')
            return None
def test_tril_indices():
    # indices without and with offset
    il1 = tril_indices(4)
    il2 = tril_indices(4, 2)

    a = np.array([[1, 2, 3, 4],
                  [5, 6, 7, 8],
                  [9, 10, 11, 12],
                  [13, 14, 15, 16]])

    # indexing:
    yield (assert_array_equal, a[il1],
           array([ 1,  5,  6,  9, 10, 11, 13, 14, 15, 16]) )

    # And for assigning values:
    a[il1] = -1
    yield (assert_array_equal, a,
    array([[-1,  2,  3,  4],
           [-1, -1,  7,  8],
           [-1, -1, -1, 12],
           [-1, -1, -1, -1]]) )

    # These cover almost the whole array (two diagonals right of the main one):
    a[il2] = -10
    yield (assert_array_equal, a,
    array([[-10, -10, -10,   4],
           [-10, -10, -10, -10],
           [-10, -10, -10, -10],
           [-10, -10, -10, -10]]) )
Example #7
0
    def test_equivalence(self):
        """
        The Equivalence covariance structure can represent an
        exchangeable covariance structure.  Here we check that the
        results are identical using the two approaches.
        """

        np.random.seed(3424)
        endog = np.random.normal(size=20)
        exog = np.random.normal(size=(20, 2))
        exog[:, 0] = 1
        groups = np.kron(np.arange(5), np.ones(4))
        groups[12:] = 3 # Create unequal size groups

        # Set up an Equivalence covariance structure to mimic an
        # Exchangeable covariance structure.
        pairs = {}
        start = [0, 4, 8, 12]
        for k in range(4):
            pairs[k] = {}

            # Diagonal values (variance parameters)
            if k < 3:
                pairs[k][0] = (start[k] + np.r_[0, 1, 2, 3],
                               start[k] + np.r_[0, 1, 2, 3])
            else:
                pairs[k][0] = (start[k] + np.r_[0, 1, 2, 3, 4, 5, 6, 7],
                               start[k] + np.r_[0, 1, 2, 3, 4, 5, 6, 7])

            # Off-diagonal pairs (covariance parameters)
            if k < 3:
                a, b = np.tril_indices(4, -1)
                pairs[k][1] = (start[k] + a, start[k] + b)
            else:
                a, b = np.tril_indices(8, -1)
                pairs[k][1] = (start[k] + a, start[k] + b)

        ex = sm.cov_struct.Exchangeable()
        model1 = sm.GEE(endog, exog, groups, cov_struct=ex)
        result1 = model1.fit()

        for return_cov in False, True:

            ec = sm.cov_struct.Equivalence(pairs, return_cov=return_cov)
            model2 = sm.GEE(endog, exog, groups, cov_struct=ec)
            result2 = model2.fit()

            # Use large atol/rtol for the correlation case since there
            # are some small differences in the results due to degree
            # of freedom differences.
            if return_cov == True:
                atol, rtol = 1e-6, 1e-6
            else:
                atol, rtol = 1e-3, 1e-3
            assert_allclose(result1.params, result2.params, atol=atol, rtol=rtol)
            assert_allclose(result1.bse, result2.bse, atol=atol, rtol=rtol)
            assert_allclose(result1.scale, result2.scale, atol=atol, rtol=rtol)
Example #8
0
def full_to_unique(y_full, feedmap, feedmask=None):

    if feedmask is None:
        feedmask = np.ones(feedmap.shape, dtype=np.bool)

    y_full[np.tril_indices(feedmap.shape[0])] = y_full[np.tril_indices(feedmap.shape[0])].conj()
    y_unique = y_full[np.where(feedmask)][np.unique(feedmap[np.where(feedmask)], return_index=True)[1]]
    
    return y_unique
Example #9
0
 def tril_index_matrix(self):
     n = self.global_size
     num_tril_entries = self.num_tril_entries
     tril_index_matrix = np.zeros([n, n], dtype=int)
     tril_index_matrix[np.tril_indices(n)] = np.arange(num_tril_entries)
     tril_index_matrix[
         np.tril_indices(n)[::-1]
     ] = np.arange(num_tril_entries)
     return tril_index_matrix
Example #10
0
def unique_to_full(y_unique, feedmap, feedmask=None):

    y_full = y_unique[feedmap]
    y_full[np.tril_indices(feedmap.shape[0])] = y_full[np.tril_indices(feedmap.shape[0])].conj()
    
    if feedmask is not None:
        y_full[np.where(np.logical_not(feedmask))] = 0.0

    return y_full
Example #11
0
    def scrape_args(self, records, scale=1, guide_tree=None, niters=10, keep_topology=False):
        # local lists
        distances = []
        variances = []
        headers = []
        for rec in records:
            distances.append(rec.parameters.partitions.distances)
            variances.append(rec.parameters.partitions.variances)
            headers.append(rec.get_names())

        num_matrices = len(records)
        label_set = reduce(lambda x, y: x.union(y), (set(l) for l in headers))
        labels_len = len(label_set)

        # labels string can be built straight away
        labels_string = '{0}\n{1}\n'.format(labels_len, ' '.join(label_set))

        # distvar and genome_map need to be built up
        distvar_list = [str(num_matrices)]
        genome_map_list = ['{0} {1}'.format(num_matrices, labels_len)]

        # build up lists to turn into strings
        for i in range(num_matrices):
            labels = headers[i]
            dim = len(labels)
            dmatrix = np.array(distances[i])
            vmatrix = np.array(variances[i])
            matrix = np.zeros(dmatrix.shape)
            matrix[np.triu_indices(len(dmatrix), 1)] = dmatrix[np.triu_indices(len(dmatrix), 1)]
            matrix[np.tril_indices(len(vmatrix), -1)] = vmatrix[np.tril_indices(len(vmatrix), -1)]
            if scale:
                matrix[np.triu_indices(dim, 1)] *= scale
                matrix[np.tril_indices(dim, -1)] *= scale * scale

            if isinstance(matrix, np.ndarray):
                matrix_string = '\n'.join([' '.join(str(x) for x in row)
                                           for row in matrix]) + '\n'
            else:
                matrix_string = matrix
            distvar_list.append('{0} {0} {1}\n{2}'.format(dim, i + 1,
                                                          matrix_string))
            genome_map_entry = ' '.join((str(labels.index(lab) + 1)
                                         if lab in labels else '-1')
                                        for lab in label_set)
            genome_map_list.append(genome_map_entry)

        distvar_string = '\n'.join(distvar_list)
        genome_map_string = '\n'.join(genome_map_list)

        if guide_tree is None:
            guide_tree = Tree.new_iterative_rtree(labels_len, names=label_set, rooted=True)

        tree_string = guide_tree.scale(scale).newick.replace('\'', '')

        return distvar_string, genome_map_string, labels_string, tree_string, niters, keep_topology
Example #12
0
def test_cl_ldl(AA):
    """ Test the CL implentation of LDL algorithm.

    This tests a series (cl_size) of matrices against the Python implementation.
    """
    # Convert to single float
    AA = AA.astype(DTYPE)
    # First calculate the Python based values for each matrix in AA
    py_ldl_D = np.empty((AA.shape[0], AA.shape[2]), dtype=AA.dtype)
    py_ldl_L = np.empty(AA.shape, dtype=AA.dtype)
    for i in range(AA.shape[2]):
        py_ldl_D[..., i], py_ldl_L[..., i] = ldl(AA[..., i])

    # Setup CL context
    import pyopencl as cl
    from pycllp.ldl import cl_krnl_ldl
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    # Result arrays
    m, n, cl_size = AA.shape
    L = np.empty(cl_size*m*(m+1)/2, dtype=DTYPE)
    D = np.empty(cl_size*m, dtype=DTYPE)

    mf = cl.mem_flags
    A_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=AA)
    # Create and compile kernel
    prg = cl_krnl_ldl(ctx)
    L_g = cl.Buffer(ctx, mf.READ_WRITE, L.nbytes)
    D_g = cl.Buffer(ctx, mf.READ_WRITE, D.nbytes)

    # Test normal LDL (unmodified)
    prg.ldl(queue, (cl_size,), None, np.int32(m), np.int32(n), A_g, L_g, D_g)

    cl.enqueue_copy(queue, L, L_g)
    cl.enqueue_copy(queue, D, D_g)

    # Compare each matrix decomposition with the python equivalent.
    for i in range(cl_size):
        np.testing.assert_allclose(py_ldl_D[..., i], D[i::cl_size], rtol=1e-6, atol=1e-7)
        np.testing.assert_allclose(py_ldl_L[..., i][np.tril_indices(m)], L[i::cl_size], rtol=1e-6, atol=1e-7)

    # Now test the modified algorithm ...
    beta = np.sqrt(np.amax(AA))
    prg.modified_ldl(queue, (cl_size,), None, np.int32(m), np.int32(n), A_g, L_g, D_g,
                     DTYPE(beta), DTYPE(1e-6))

    cl.enqueue_copy(queue, L, L_g)
    cl.enqueue_copy(queue, D, D_g)

    # Compare each matrix decomposition with the python equivalent.
    for i in range(cl_size):
        np.testing.assert_allclose(py_ldl_D[..., i], D[i::cl_size], rtol=1e-6, atol=1e-7)
        np.testing.assert_allclose(py_ldl_L[..., i][np.tril_indices(m)], L[i::cl_size], rtol=1e-6, atol=1e-7)
Example #13
0
def impute_missing_bins(hic_matrix, regions=None, per_chromosome=True, stat=np.ma.mean):
    """
    Impute missing contacts in a Hi-C matrix.

    For inter-chromosomal data uses the mean of all inter-chromosomal contacts,
    for intra-chromosomal data uses the mean of intra-chromosomal counts at the corresponding diagonal.

    :param hic_matrix: A square numpy array
    :param regions: A list of :class:`~GenomicRegion`s - if omitted, will create a dummy list
    :param per_chromosome: Do imputation on a per-chromosome basis (recommended)
    :param stat: The aggregation statistic to be used for imputation, defaults to the mean.
    """
    if regions is None:
        for i in range(hic_matrix.shape[0]):
            regions.append(GenomicRegion(chromosome='', start=i, end=i))

    chr_bins = dict()
    for i, region in enumerate(regions):
        if region.chromosome not in chr_bins:
            chr_bins[region.chromosome] = [i, i]
        else:
            chr_bins[region.chromosome][1] = i

    n = len(regions)
    if not hasattr(hic_matrix, "mask"):
        hic_matrix = masked_matrix(hic_matrix)

    imputed = hic_matrix.copy()
    if per_chromosome:
        for c_start, c_end in chr_bins.itervalues():
            # Correcting intrachromoc_startmal contacts by mean contact count at each diagonal
            for i in range(c_end - c_start):
                ind = kth_diag_indices(c_end - c_start, -i)
                diag = imputed[c_start:c_end, c_start:c_end][ind]
                diag[diag.mask] = stat(diag)
                imputed[c_start:c_end, c_start:c_end][ind] = diag
            # Correcting interchromoc_startmal contacts by mean of all contact counts between
            # each set of chromoc_startmes
            for other_start, other_end in chr_bins.itervalues():
                # Only correct upper triangle
                if other_start <= c_start:
                    continue
                inter = imputed[c_start:c_end, other_start:other_end]
                inter[inter.mask] = stat(inter)
                imputed[c_start:c_end, other_start:other_end] = inter
    else:
        for i in range(n):
            diag = imputed[kth_diag_indices(n, -i)]
            diag[diag.mask] = stat(diag)
            imputed[kth_diag_indices(n, -i)] = diag
    # Copying upper triangle to lower triangle
    imputed[np.tril_indices(n)] = imputed.T[np.tril_indices(n)]
    return imputed
Example #14
0
	def structure_function(self, bins):
		"""
		compute the structure function of the light curve at given time lags
		"""
		dt =  np.subtract.outer(self.t,self.t)[np.tril_indices(self.t.shape[0], k=-1)]
		dm =  np.subtract.outer(self.y,self.y)[np.tril_indices(self.y.shape[0], k=-1)]
		sqrsum, bins, _ = binned_statistic(dt, dm**2, bins=bins, statistic='sum')
		n, _, _ = binned_statistic(dt, dm**2, bins=bins, statistic='count')
		SF = np.sqrt(sqrsum/n)
		lags = 0.5*(bins[1:] + bins[:-1])

		return lags, SF
Example #15
0
 def get_grad_tril(mo_coeff_kpts, mo_occ_kpts, fock):
     if is_khf:
         grad_kpts = []
         for k, mo in enumerate(mo_coeff_kpts):
             f_mo = reduce(numpy.dot, (mo.T.conj(), fock[k], mo))
             nmo = f_mo.shape[0]
             grad_kpts.append(f_mo[numpy.tril_indices(nmo, -1)])
         return numpy.hstack(grad_kpts)
     else:
         f_mo = reduce(numpy.dot, (mo_coeff_kpts.T.conj(), fock, mo_coeff_kpts))
         nmo = f_mo.shape[0]
         return f_mo[numpy.tril_indices(nmo, -1)]
Example #16
0
File: data.py Project: elim723/MCEq
    def get_y_matrix(self, projectile, daughter):
        """Returns a ``DIM x DIM`` yield matrix.

        Args:
          projectile (int): PDG ID of projectile particle
          daughter (int): PDG ID of final state daughter/secondary particle
        Returns:
          numpy.array: yield matrix

        Note:
          In the current version, the matrices have to be multiplied by the 
          bin widths. In later versions they will be stored with the multiplication
          carried out. 
        """
        # TODO: modify yields to include the bin size
        if not self.band:
            return self.yields[(projectile, daughter)].dot(self.weights)
        else:
            m = self.yields[(projectile, daughter)].dot(self.weights)
            # set all elements except those inside selected xf band to 0
            
            m[np.tril_indices(self.dim, -2 - self.band[1])] = 0
            if self.band[0] < 0:
                m[np.triu_indices(self.dim, -self.band[0])] = 0
            return m
Example #17
0
def threePointsToStandard(e, p, q, r):
    """Return a projective transformation that maps three points on a conic to
the conic xy + yz + xz = 0.
    
    Keyword arguments:
    e -- a projective conic
    p -- the first point on e
    q -- the second point on e
    r -- the third point on e
    """
    coeffs = e
    p, q, r = np.matrix(p), np.matrix(q), np.matrix(r)
    
    # Determine a matrix A associated with a projective transformation that
    # maps P, Q, and R onto [1, 0, 0], [0, 1, 0], and [0, 0, 1], respectively.
    A = np.linalg.inv(np.vstack([p, q, r]))
    
    # Determine the equation bx'y' + fx'z' + gy'z' = 0 of t(E), for some real
    # numbers b, f, and g.
    M = sum([coeff * u.T * v
             for coeff, (u, v)
             in zip(coeffs, combinations_with_replacement((p, q, r), 2))])
    
    # Get B from M by adding like terms to find b, f, and g and then
    # constructing a diagonal matrix from the flat [1/g, 1/f, 1/b].
    B = np.diagflat([1 / (u + v)
                    for u, v
                    in reversed(zip(np.array(M)[np.triu_indices(3, 1)],
                                    np.array(M)[np.tril_indices(3, -1)]))])
    
    return B * A
Example #18
0
    def update_nogrid(self, params):

        endog = self.model.endog_li
        cached_means = self.model.cached_means
        varfunc = self.model.family.variance

        dep_params = np.zeros(self.max_lag + 1)
        dn = np.zeros(self.max_lag + 1)
        for i in range(self.model.num_group):

            expval, _ = cached_means[i]
            stdev = np.sqrt(varfunc(expval))
            resid = (endog[i] - expval) / stdev

            j1, j2 = np.tril_indices(len(expval))
            dx = np.abs(self.time[i][j1] - self.time[i][j2])
            ii = np.flatnonzero(dx <= self.max_lag)
            j1 = j1[ii]
            j2 = j2[ii]
            dx = dx[ii]

            vs = np.bincount(dx, weights=resid[
                             j1] * resid[j2], minlength=self.max_lag + 1)
            vd = np.bincount(dx, minlength=self.max_lag + 1)

            ii = np.flatnonzero(vd > 0)
            dn[ii] += 1
            if len(ii) > 0:
                dep_params[ii] += vs[ii] / vd[ii]

        dep_params /= dn
        self.dep_params = dep_params[1:] / dep_params[0]
Example #19
0
        def multivar_norm_cdf(upper, cov_matrix):
            """CDF of multivariate Gaussian centered at 0 with covariance matrix cov_matrix. CDF is taken from -inf to u."""
            if upper.size == 1:
                return scipy.stats.norm.cdf(upper[0], 0, numpy.sqrt(cov_matrix[0, 0]))

            # Standardize the upper bound u using the standard deviation
            std = numpy.sqrt(numpy.diag(cov_matrix))
            std_upper = upper / std

            # Convert covariance matrix into correlation matrix: http://en.wikipedia.org/wiki/Correlation_and_dependence#Correlation_matrices
            corr_matrix = cov_matrix / std / std.reshape(upper.size, 1)  # standardize -> correlation matrix

            # Indices for traversing the strict lower triangular elements of corr_matrix in column major, as required by the fortran mvndst function.
            strict_lower_diag_indices = numpy.tril_indices(upper.size, -1)

            # Call into the scipy wrapper for the fortran method "mvndst"
            # Link: http://www.math.wsu.edu/faculty/genz/software/fort77/mvtdstpack.f
            out = scipy.stats.kde.mvn.mvndst(
                 numpy.zeros(upper.size, dtype=int),  # The lower bound of integration. We initialize with 0 because it is ignored (because of the third argument).
                 std_upper,  # The upper bound of integration
                 numpy.zeros(upper.size, dtype=int),  # For each dim, 0 means -inf for lower bound
                 corr_matrix[strict_lower_diag_indices],  # The vector of strict lower triangular correlation coefficients
                 maxpts=self._mvndst_parameters.maxpts_per_dim * upper.size,  # Maximum number of iterations for the mvndst function
                 releps=self._mvndst_parameters.releps,  # The error allowed relative to actual value
                 abseps=self._mvndst_parameters.abseps,  # The absolute error allowed
                 )
            return out[1]  # Index 1 corresponds to the actual value. 0 has the error, and 2 is a flag denoting whether releps was reached
def vector_from_array(array):
    """Get triangle of output in vector from a correlation-type array.

    Parameters
    ----------
    array : np.array

    Returns
    -------
    vector (np.array)

    Notes
    -----
    Old Matlab code indexes by column (aka.Fortran-style), so to get the indices
    of the top triangle, we have to do some reshaping.
    Otherwise, if the vector made up by rows is OK, then simply :
    triangle = np.triu_indices(array.size, k=1), out = array[triangle]
    """
    triangle_lower = np.tril_indices(array.shape[0], k=-1)
    flatten_idx = np.arange(array.size).reshape(array.shape)[triangle_lower]
    triangle = np.unravel_index(flatten_idx, array.shape, order='F')

    # triangle = np.triu_indices(array.size, k=1)
    # out = array[triangle]

    return array[triangle]
Example #21
0
 def _getSignificantData(self, sig_lvl):
     '''Find which edges significant at passed level and set self properties.
     '''
     rows,cols = self.data.shape #rows = cols
     mask = zeros((rows,cols))
     mask[tril_indices(rows,0)] = 1 #preparing mask
     cvals = unique(self.data[triu_indices(rows,1)]) # cvals is sorted
     # calculate upper bound, i.e. what value in the distribution of values 
     # has sig_lvl fraction of the data higher than or equal to it. this is
     # not guaranteed to be precise because of repeated values. for instance 
     # assume the distribution of dissimilarity values is:
     # [.1, .2, .2, .2, .2, .3, .4, .5, .6, .6, .6, .6, .6, .6, .7] 
     # and you want sig_lvl=.2, i.e. you get 20 percent of the linkages as 
     # significant. this would result in choosing the score .6 since its the
     # third in the ordered list (of 15 elements, 3/15=.2). but, since there
     # is no a-priori way to tell which of the multiple .6 linkages are 
     # significant, we select all of them, forcing our lower bound to 
     # encompass 7/15ths of the data. the round call on the ub is to avoid
     # documented numpy weirdness where it will misassign >= calls for long
     # floats. 
     ub = round(cvals[-round(sig_lvl*len(cvals))],7)
     mdata = ma(self.data, mask)
     self.actual_sig_lvl = \
         (mdata >= ub).sum()/float(mdata.shape[0]*(mdata.shape[0]-1)/2)
     self.sig_edges = where(mdata >= ub, 1, 0).nonzero()
     self.otu1 = [self.otu_ids[i] for i in self.sig_edges[0]]
     self.otu2 = [self.otu_ids[i] for i in self.sig_edges[1]]
     self.sig_otus = list(set(self.otu1+self.otu2))
     self.edges = zip(self.otu1, self.otu2)
     self.cvals = mdata[self.sig_edges[0], self.sig_edges[1]]
def scoring2B_behavior():
	t_clusters = np.zeros((600,3))
	t_clusters[0:200,0] = 1
	t_clusters[200:400,1] = 1
	t_clusters[400:,2] = 1
	t_ccm = np.dot(t_clusters,t_clusters.T)

	n_uniq = len(np.triu_indices(t_ccm.shape[0],k=1)[0])
	res = []
	concentrations = [1000,100,50,25,10,5,3,1]
	for c in concentrations:
		for i in range(50):
			ccm = np.copy(t_ccm)
			ccm[np.triu_indices(t_ccm.shape[0],k=1)] -= np.random.beta(1,c,n_uniq)
			#ccm[np.tril_indices(t_ccm.shape[0],k=-1)] = ccm[np.triu_indices(t_ccm.shape[0],k=1)]
			ccm[np.tril_indices(t_ccm.shape[0],k=-1)] = 0
			ccm = ccm + ccm.T
			np.fill_diagonal(ccm,1)
			ccm = np.abs(ccm)
			res.append([c,calculate2(ccm,t_ccm)])
	res = [map(str,x) for x in res]
	res = ['\t'.join(x) for x in res]
	f = open('scoring2B_beta.tsv', 'w')
	f.write('\n'.join(res))
	f.close()	
def test_overlap(xSer):
    'test the set overlap among items in a Series \
    -return set of all pairwise overlap values'
    ns = len(xSer)
    if ns >1:
        # overlapMtrx = pd.DataFrame(data=np.zeros([ns,ns]),
        #         index=xSer.index,
        #         columns=xSer.index)
        overlapMtrx = np.zeros([ns,ns])
        for i1, ix1 in enumerate(xSer.index):
            s1 = xSer[ix1]
            for i2, ix2 in enumerate(xSer.index):
                s2 = xSer[ix2]
                nO = len(s1.intersection(s2))
                # overlapMtrx.ix[ix1,ix2] = nO
                # overlapMtrx.ix[ix2,ix1] = nO
                overlapMtrx[i1,i2] = nO
                overlapMtrx[i2,i1] = nO                
        iUp = np.tril_indices(ns)
        overlapMtrx[iUp] = np.nan
        overlapArray = overlapMtrx.flatten()
        overlapArray = overlapArray[~np.isnan(overlapArray)]
        #
        return overlapArray
    else:
        # return pd.DataFrame()
        return np.zeros(0)
Example #24
0
def calculate_forces(clf, numbers, coords, meta=None, h=1e-6):
    """
    A function that uses finite differences to calculate forces of a molecule
    using the given clf. The default feature vector for this to use is the
    coulomb matrix.
    """
    if meta is None:
        meta = [1]

    vectors = []
    for i, coord in enumerate(coords):
        for j in xrange(3):
            for sign in [-1, 1]:
                new_coords = coords.copy()
                new_coords[i, j] += sign * h / 2
                mat = get_coulomb_matrix(numbers, new_coords)
                mat[mat < 0] = 0
                vectors.append(mat[numpy.tril_indices(mat.shape[0])].tolist() + meta)

    results = clf.predict(numpy.matrix(vectors))

    forces = numpy.zeros(coords.shape)
    for i, coord in enumerate(coords):
        for j in xrange(3):
            forces[i, j] = (results[i * len(coord) * 2 + j * 2 + 1] - results[i * len(coord) * 2 + j * 2]) / h
    return forces
Example #25
0
def calculate_surface(clf, numbers, coords, atom_idx, max_displacement=0.5, steps=25, meta=None):
    """
    A function that uses plots the value of the clf as a function of `atom_ix`
    displacement.
    """
    if meta is None:
        meta = [1]

    values = numpy.linspace(-max_displacement, max_displacement, steps)

    results = numpy.zeros((steps, steps))
    for i, x in enumerate(values):
        for j, y in enumerate(values):
            new_coords = coords.copy()
            new_coords[atom_idx, 0] += x
            new_coords[atom_idx, 1] += y
            mat = get_coulomb_matrix(numbers, new_coords)
            mat[mat < 0] = 0
            vector = mat[numpy.tril_indices(mat.shape[0])].tolist() + meta
            results[i, j] = clf.predict(numpy.matrix(vector))[0]

    extent = [-max_displacement, max_displacement, -max_displacement, max_displacement]
    get_matrix_plot(results, extent)
    print results.max(), results.min(), results.std()
    return results
Example #26
0
def pack_tril(mat, axis=-1, out=None):
    '''flatten the lower triangular part of a matrix.
    Given mat, it returns mat[...,numpy.tril_indices(mat.shape[0])]

    Examples:

    >>> pack_tril(numpy.arange(9).reshape(3,3))
    [0 3 4 6 7 8]
    '''
    if mat.size == 0:
        return numpy.zeros(mat.shape+(0,), dtype=mat.dtype)

    if mat.ndim == 2:
        count, nd = 1, mat.shape[0]
        shape = nd*(nd+1)//2
    else:
        count, nd = mat.shape[:2]
        shape = (count, nd*(nd+1)//2)

    if mat.ndim == 2 or axis == -1:
        mat = numpy.asarray(mat, order='C')
        out = numpy.ndarray(shape, mat.dtype, buffer=out)
        if mat.dtype == numpy.double:
            fn = _np_helper.NPdpack_tril_2d
        else:
            fn = _np_helper.NPzpack_tril_2d
        fn(ctypes.c_int(count), ctypes.c_int(nd),
           out.ctypes.data_as(ctypes.c_void_p),
           mat.ctypes.data_as(ctypes.c_void_p))
        return out

    else:  # pack the leading two dimension
        assert(axis == 0)
        out = mat[numpy.tril_indices(nd)]
        return out
Example #27
0
    def test_is_scaled_chisquared(self):
        # The 2-dimensional Wishart with an arbitrary scale matrix can be
        # transformed to a scaled chi-squared distribution.
        # For :math:`S \sim W_p(V,n)` and :math:`\lambda \in \mathbb{R}^p` we have
        # :math:`\lambda' S \lambda \sim \lambda' V \lambda \times \chi^2(n)`
        np.random.seed(482974)

        sn = 500
        df = 10
        dim = 4
        # Construct an arbitrary positive definite matrix
        scale = np.diag(np.arange(4)+1)
        scale[np.tril_indices(4, k=-1)] = np.arange(6)
        scale = np.dot(scale.T, scale)
        # Use :math:`\lambda = [1, \dots, 1]'`
        lamda = np.ones((dim,1))
        sigma_lamda = lamda.T.dot(scale).dot(lamda).squeeze()
        w = wishart(df, sigma_lamda)
        c = chi2(df, scale=sigma_lamda)

        # Statistics
        assert_allclose(w.var(), c.var())
        assert_allclose(w.mean(), c.mean())
        assert_allclose(w.entropy(), c.entropy())

        # PDF
        X = np.linspace(0.1,10,num=10)
        assert_allclose(w.pdf(X), c.pdf(X))

        # rvs
        rvs = w.rvs(size=sn)
        args = (df,0,sigma_lamda)
        alpha = 0.01
        check_distribution_rvs('chi2', args, alpha, rvs)
Example #28
0
    def initialize(self, model):

        super(GlobalOddsRatio, self).initialize(model)

        if self.model.weights is not None:
            warnings.warn("weights not implemented for GlobalOddsRatio "
                          "cov_struct, using unweighted covariance estimate",
                          NotImplementedWarning)

        # Need to restrict to between-subject pairs
        cpp = []
        for v in model.endog_li:

            # Number of subjects in this group
            m = int(len(v) / self._ncut)
            i1, i2 = np.tril_indices(m, -1)

            cpp1 = {}
            for k1 in range(self._ncut):
                for k2 in range(k1 + 1):
                    jj = np.zeros((len(i1), 2), dtype=np.int64)
                    jj[:, 0] = i1 * self._ncut + k1
                    jj[:, 1] = i2 * self._ncut + k2
                    cpp1[(k2, k1)] = jj

            cpp.append(cpp1)

        self.cpp = cpp

        # Initialize the dependence parameters
        self.crude_or = self.observed_crude_oddsratio()
        if self.model.update_dep:
            self.dep_params = self.crude_or
def bic(em_fit_result_dict, LL_all):
  '''
        Compute the Bayesian Information Criterion score

        Split it, associating the parameters to the number of datapoint they really take care of.
    '''

  # Number of parameters:
  # - mixt_target_tr: 1
  # - mixt_random_tr: 1
  # - mixt_nontarget_trk: 1
  # - alpha: 1
  # - beta: 1
  # - gamma: 1


  # First count the Loglikelihood
  bic_tot = -2. * np.nansum(LL_all[np.tril_indices(LL_all.shape[0])])

  # Then count alpha, beta and gamma, for all datapoints appropriately
  K = 3
  bic_tot += K * np.log(np.nansum(np.isfinite(LL_all)))

  # Now do the mixture proportions per condition
  for nitems_i, nitems in enumerate(em_fit_result_dict['T_space']):
    for trecall_i, trecall in enumerate(em_fit_result_dict['T_space']):
      if trecall <= nitems:
        K = 3
        bic_tot += K * np.log(np.nansum(np.isfinite(LL_all[nitems_i, trecall_i])))

  return bic_tot
Example #30
0
def symmetrize(m, use_triangle='lower'):
    """Symmetrize a square NumPy array by reflecting one triangular
    section across the diagonal to the other.
    """

    if use_triangle not in ('lower', 'upper'):
        raise ValueError
    if not len(m.shape) == 2:
        raise ValueError
    if not (m.shape[0] == m.shape[1]):
        raise ValueError

    dim = m.shape[0]

    lower_indices = numpy.tril_indices(dim, k=-1)
    upper_indices = numpy.triu_indices(dim, k=1)

    ms = m.copy()

    if use_triangle == 'lower':
        ms[upper_indices] = ms[lower_indices]
    if use_triangle == 'upper':
        ms[lower_indices] = ms[upper_indices]

    return ms
Example #31
0
def overlap(cibra, ciket, nmo, nocc, s=None):
    '''Overlap between two CISD wavefunctions.

    Args:
        s : 2D array
            The overlap matrix of non-orthogonal one-particle basis
    '''
    if s is None:
        return dot(cibra, ciket, nmo, nocc)

    DEBUG = True

    nvir = nmo - nocc
    nov = nocc * nvir
    bra0, bra1, bra2 = cisdvec_to_amplitudes(cibra, nmo, nocc)
    ket0, ket1, ket2 = cisdvec_to_amplitudes(ciket, nmo, nocc)

    # Sort the ket orbitals to make the orbitals in bra one-one mapt to orbitals
    # in ket.
    if ((not DEBUG) and abs(numpy.linalg.det(s[:nocc, :nocc]) - 1) < 1e-2
            and abs(numpy.linalg.det(s[nocc:, nocc:]) - 1) < 1e-2):
        ket_orb_idx = numpy.where(abs(s) > 0.9)[1]
        s = s[:, ket_orb_idx]
        oidx = ket_orb_idx[:nocc]
        vidx = ket_orb_idx[nocc:] - nocc
        ket1 = ket1[oidx[:, None], vidx]
        ket2 = ket2[oidx[:, None, None, None], oidx[:, None, None],
                    vidx[:, None], vidx]

    ooidx = numpy.tril_indices(nocc, -1)
    vvidx = numpy.tril_indices(nvir, -1)
    bra2aa = bra2 - bra2.transpose(1, 0, 2, 3)
    bra2aa = lib.take_2d(bra2aa.reshape(nocc**2,
                                        nvir**2), ooidx[0] * nocc + ooidx[1],
                         vvidx[0] * nvir + vvidx[1])
    ket2aa = ket2 - ket2.transpose(1, 0, 2, 3)
    ket2aa = lib.take_2d(ket2aa.reshape(nocc**2,
                                        nvir**2), ooidx[0] * nocc + ooidx[1],
                         vvidx[0] * nvir + vvidx[1])

    occlist0 = numpy.arange(nocc).reshape(1, nocc)
    occlists = numpy.repeat(occlist0, 1 + nov + bra2aa.size, axis=0)
    occlist0 = occlists[:1]
    occlist1 = occlists[1:1 + nov]
    occlist2 = occlists[1 + nov:]

    ia = 0
    for i in range(nocc):
        for a in range(nocc, nmo):
            occlist1[ia, i] = a
            ia += 1

    ia = 0
    for i in range(nocc):
        for j in range(i):
            for a in range(nocc, nmo):
                for b in range(nocc, a):
                    occlist2[ia, i] = a
                    occlist2[ia, j] = b
                    ia += 1

    na = len(occlists)
    if DEBUG:
        trans = numpy.empty((na, na))
        for i, idx in enumerate(occlists):
            s_sub = s[idx].T.copy()
            minors = s_sub[occlists]
            trans[i, :] = numpy.linalg.det(minors)

        # Mimic the transformation einsum('ab,ap->pb', FCI, trans).
        # The wavefunction FCI has the [excitation_alpha,excitation_beta]
        # representation.  The zero blocks like FCI[S_alpha,D_beta],
        # FCI[D_alpha,D_beta], are explicitly excluded.
        bra_mat = numpy.zeros((na, na))
        bra_mat[0, 0] = bra0
        bra_mat[0, 1:1 + nov] = bra_mat[1:1 + nov, 0] = bra1.ravel()
        bra_mat[0, 1 + nov:] = bra_mat[1 + nov:, 0] = bra2aa.ravel()
        bra_mat[1:1 + nov, 1:1 + nov] = bra2.transpose(0, 2, 1,
                                                       3).reshape(nov, nov)
        ket_mat = numpy.zeros((na, na))
        ket_mat[0, 0] = ket0
        ket_mat[0, 1:1 + nov] = ket_mat[1:1 + nov, 0] = ket1.ravel()
        ket_mat[0, 1 + nov:] = ket_mat[1 + nov:, 0] = ket2aa.ravel()
        ket_mat[1:1 + nov, 1:1 + nov] = ket2.transpose(0, 2, 1,
                                                       3).reshape(nov, nov)
        ovlp = lib.einsum('ab,ap,bq,pq->', bra_mat, trans, trans, ket_mat)

    else:
        nov1 = 1 + nov
        noovv = bra2aa.size
        bra_SS = numpy.zeros((nov1, nov1))
        bra_SS[0, 0] = bra0
        bra_SS[0, 1:] = bra_SS[1:, 0] = bra1.ravel()
        bra_SS[1:, 1:] = bra2.transpose(0, 2, 1, 3).reshape(nov, nov)
        ket_SS = numpy.zeros((nov1, nov1))
        ket_SS[0, 0] = ket0
        ket_SS[0, 1:] = ket_SS[1:, 0] = ket1.ravel()
        ket_SS[1:, 1:] = ket2.transpose(0, 2, 1, 3).reshape(nov, nov)

        trans_SS = numpy.empty((nov1, nov1))
        trans_SD = numpy.empty((nov1, noovv))
        trans_DS = numpy.empty((noovv, nov1))
        occlist01 = occlists[:nov1]
        for i, idx in enumerate(occlist01):
            s_sub = s[idx].T.copy()
            minors = s_sub[occlist01]
            trans_SS[i, :] = numpy.linalg.det(minors)

            minors = s_sub[occlist2]
            trans_SD[i, :] = numpy.linalg.det(minors)

            s_sub = s[:, idx].copy()
            minors = s_sub[occlist2]
            trans_DS[:, i] = numpy.linalg.det(minors)

        ovlp = lib.einsum('ab,ap,bq,pq->', bra_SS, trans_SS, trans_SS, ket_SS)
        ovlp += lib.einsum('ab,a ,bq, q->', bra_SS, trans_SS[:, 0], trans_SD,
                           ket2aa.ravel())
        ovlp += lib.einsum('ab,ap,b ,p ->', bra_SS, trans_SD, trans_SS[:, 0],
                           ket2aa.ravel())

        ovlp += lib.einsum(' b, p,bq,pq->', bra2aa.ravel(), trans_SS[0, :],
                           trans_DS, ket_SS)
        ovlp += lib.einsum(' b, p,b ,p ->', bra2aa.ravel(), trans_SD[0, :],
                           trans_DS[:, 0], ket2aa.ravel())

        ovlp += lib.einsum('a ,ap, q,pq->', bra2aa.ravel(), trans_DS,
                           trans_SS[0, :], ket_SS)
        ovlp += lib.einsum('a ,a , q, q->', bra2aa.ravel(), trans_DS[:, 0],
                           trans_SD[0, :], ket2aa.ravel())

        # FIXME: whether to approximate the overlap between double excitation coefficients
        if numpy.linalg.norm(bra2aa) * numpy.linalg.norm(ket2aa) < 1e-4:
            # Skip the overlap if coefficients of double excitation are small enough
            pass
        if (abs(numpy.linalg.det(s[:nocc, :nocc]) - 1) < 1e-2
                and abs(numpy.linalg.det(s[nocc:, nocc:]) - 1) < 1e-2):
            # If the overlap matrix close to identity enough, use the <D|D'> overlap
            # for orthogonal single-particle basis to approximate the overlap
            # for non-orthogonal basis.
            ovlp += numpy.dot(bra2aa.ravel(), ket2aa.ravel()) * trans_SS[0,
                                                                         0] * 2
        else:
            from multiprocessing import sharedctypes, Process
            buf_ctypes = sharedctypes.RawArray('d', noovv)
            trans_ket = numpy.ndarray(noovv, buffer=buf_ctypes)

            def trans_dot_ket(i0, i1):
                for i in range(i0, i1):
                    s_sub = s[occlist2[i]].T.copy()
                    minors = s_sub[occlist2]
                    trans_ket[i] = numpy.linalg.det(minors).dot(ket2aa.ravel())

            nproc = lib.num_threads()
            if nproc > 1:
                seg = (noovv + nproc - 1) // nproc
                ps = []
                for i0, i1 in lib.prange(0, noovv, seg):
                    p = Process(target=trans_dot_ket, args=(i0, i1))
                    ps.append(p)
                    p.start()
                [p.join() for p in ps]
            else:
                trans_dot_ket(0, noovv)

            ovlp += numpy.dot(bra2aa.ravel(), trans_ket) * trans_SS[0, 0] * 2

    return ovlp
Example #32
0
def s2kl_s1(symmetry, eri, norb):
    idx = numpy.tril_indices(norb)
    eri1 = numpy.empty((norb, norb, norb, norb))
    eri1[:, :, idx[0], idx[1]] = eri.reshape(norb, norb, -1)
    eri1[:, :, idx[1], idx[0]] = eri.reshape(norb, norb, -1)
    return eri1
Example #33
0
def main():
    usage = '''Usage: python %s [-h] [-s samples] [-k clusters] [-d dims] [-p band positions] fileName\n
            spatial and spectral dimensions are lists, e.g., -d [0,0,400,400]''' % sys.argv[
        0]
    options, args = getopt.getopt(sys.argv[1:], 'hs:k:d:p:')
    dims = None
    pos = None
    K = 8
    m = 1000
    for option, value in options:
        if option == '-h':
            print usage
            return
        elif option == '-d':
            dims = eval(value)
        elif option == '-p':
            pos = eval(value)
        elif option == '-k':
            K = eval(value)
        elif option == '-s':
            m = eval(value)
    gdal.AllRegister()
    infile = args[0]
    inDataset = gdal.Open(infile, GA_ReadOnly)
    cols = inDataset.RasterXSize
    rows = inDataset.RasterYSize
    bands = inDataset.RasterCount
    if dims:
        x0, y0, cols, rows = dims
    else:
        x0 = 0
        y0 = 0
    if pos is not None:
        bands = len(pos)
    else:
        pos = range(1, bands + 1)
    path = os.path.dirname(infile)
    basename = os.path.basename(infile)
    root, ext = os.path.splitext(basename)
    outfile = path + '/' + root + '_hcl' + ext
    print '------- Hierarchical clustering ---------'
    print time.asctime()
    print 'Input: %s' % infile
    print 'Clusters: %i' % K
    print 'Samples: %i' % m
    start = time.time()
    GG = np.zeros((cols * rows, bands))
    k = 0
    for b in pos:
        band = inDataset.GetRasterBand(b)
        band = band.ReadAsArray(x0, y0, cols, rows).astype(float)
        GG[:, k] = np.ravel(band)
        k += 1


#  training data
    idx = np.random.randint(0, rows * cols, size=m)
    G = GG[idx, :]
    ones = np.mat(np.ones(m, dtype=np.int))
    mults = np.ones(m, dtype=np.int)
    Ls = np.array(range(m))
    #  initial cost array
    G2 = np.mat(np.sum(G**2, 1))
    Delta = G2.T * ones
    Delta = Delta + Delta.T
    Delta = Delta - 2 * np.mat(G) * np.mat(G).T
    idx = np.tril_indices(m)
    Delta[idx] = 10e10
    Delta = Delta.A
    #  begin iteration
    cost = 0.0
    costarray = []
    c = m
    while c > K:
        bm = best_merge(Delta)
        j = bm[0]
        i = bm[1]
        #      j > i
        costarray.append(cost + Delta[i, j])
        #      re-label
        idx = np.where(Ls == j)[0]
        Ls[idx] = i
        idx = np.where(Ls > j)[0]
        Ls[idx] -= 1
        #      pre-merge multiplicities
        ni = mults[i]
        nj = mults[j]
        #      update merge-cost array, k = i+1 ... c-1
        if c - i - 1 == 0:
            k = [i + 1]
        else:
            k = i + 1 + range(c - i - 1)
        nk = mults[k]
        Dkj = np.minimum(Delta[k, j].ravel(), Delta[j, k].ravel())
        idx = np.where(k == j)[0]
        Dkj[idx] = 0
        Delta[i, k] = ((ni + nk) * Delta[i, k] +
                       (nj + nk) * Dkj - nk * Delta[i, j]) / (ni + nj + nk)
        #     update merge-cost array, k = 0 ... i-1
        if i == 0:
            k = [0]
        else:
            k = range(i - 1)
        nk = mults[k]
        Dkj = np.minimum(Delta[k, j].ravel(), Delta[j, k].ravel())
        idx = np.where(k == j)[0]
        Dkj[idx] = 0
        Delta[k, i] = ((ni + nk) * Delta[k, i] +
                       (nj + nk) * Dkj - nk * Delta[i, j]) / (ni + nj + nk)
        #      update multiplicities
        mults[i] = mults[i] + mults[j]
        #      delete the upper cluster
        idx = np.ones(c)
        idx[j] = 0
        idx = np.where(idx == 1)[0]
        mults = mults[idx]
        Delta = Delta[:, idx]
        Delta = Delta[idx, :]
        c -= 1
    print 'classifying...'
    labs = []
    for L in Ls:
        lab = np.zeros(K)
        lab[L] = 1.0
        labs.append(lab)
    labs = np.array(labs)
    classifier = sc.Maxlike(G, labs)
    if classifier.train():
        driver = gdal.GetDriverByName('GTiff')
        outDataset = driver.Create(outfile, cols, rows, 1, GDT_Byte)
        projection = inDataset.GetProjection()
        geotransform = inDataset.GetGeoTransform()
        if geotransform is not None:
            gt = list(geotransform)
            gt[0] = gt[0] + x0 * gt[1]
            gt[3] = gt[3] + y0 * gt[5]
            outDataset.SetGeoTransform(tuple(gt))
        if projection is not None:
            outDataset.SetProjection(projection)
        cls, _ = classifier.classify(GG)
        outBand = outDataset.GetRasterBand(1)
        outBand.WriteArray(np.reshape(cls, (rows, cols)), 0, 0)
        outBand.FlushCache()
        outDataset = None
        inDataset = None
        ymax = np.max(costarray)
        plt.loglog(range(K, m), list(reversed(costarray)))
        p = plt.gca()
        p.set_title('Merge cost')
        p.set_xlabel('Custers')
        p.set_ylim((1, ymax))
        plt.show()
        print 'result written to: ' + outfile
        print 'elapsed time: ' + str(time.time() - start)
    else:
        print 'classification failed'
Example #34
0
def sim_block_study(X, afreq, ldscore, n_study, n_blocks, n_causal_per_block,
                    block_p, pve, effect_distribution, min_r2, max_r2,
                    min_ldscore, max_ldscore):
    """
    each block has a causal snps, each study assigned to a main block
    tissues within a block share the causal snp
    tissues out of block have the causal snp with probability block_p

    min_r2, max_r2: pairwise r2 values allowed for causal snps
    min_lscore, max_ldscore: ldscore range allowed for causal variants
    """
    n_variants = X.shape[1]
    n_samples = X.shape[0]
    n_causal = n_blocks * n_causal_per_block

    R2 = np.corrcoef(X.T)**2

    active = (ldscore > min_ldscore) & (ldscore < max_ldscore)
    causal_snps = select_causal_snps(R2, n_causal, min_r2, max_r2,
                                     active).reshape(n_causal_per_block, -1)

    # draw block ids and causal snps
    # ensure each block gets at least one tissue
    block_id = np.sort(
        np.concatenate([
            np.arange(n_blocks),
            np.random.choice(n_blocks, n_study - n_blocks)
        ]))

    # make block probability matrix
    causal_p = np.eye(n_blocks)
    causal_p[causal_p == 0] = block_p

    results = []
    for t in range(n_study):
        # sample causal snps for tissue
        causal_idx = np.random.binomial(1, causal_p[block_id[t]]) == 1
        causal_in_study = np.concatenate(
            [cs[causal_idx] for cs in causal_snps])
        results.append(
            sim_expression_single_study(X, afreq, causal_in_study, pve,
                                        effect_distribution))

    expression = np.atleast_2d(np.array([x[0] for x in results]))
    true_effects = np.atleast_2d(np.array([x[1] for x in results]))
    residual_variance = np.array([x[2] for x in results])

    # trim down to the causal snps we actually used
    causal_snps = np.arange(n_variants)[np.any(true_effects != 0, 0)]

    tril = np.tril_indices(n_study, k=-1)
    true_coloc = (true_effects @ true_effects.T != 0)[tril]

    return {
        'expression': expression,
        'true_effects': true_effects,
        'true_coloc': true_coloc,
        'residual_variance': residual_variance,
        'causal_snps': causal_snps,
        'n_causal': causal_snps.size,
        'K': int(np.ceil(causal_snps.size / 10) * 10),
        'ldscore': ldscore
    }
Example #35
0
    def __init__(self,
                 endog,
                 exog=None,
                 order=(1, 0),
                 trend='c',
                 error_cov_type='unstructured',
                 measurement_error=False,
                 enforce_stationarity=True,
                 enforce_invertibility=True,
                 **kwargs):

        # Model parameters
        self.error_cov_type = error_cov_type
        self.measurement_error = measurement_error
        self.enforce_stationarity = enforce_stationarity
        self.enforce_invertibility = enforce_invertibility

        # Save the given orders
        self.order = order
        self.trend = trend

        # Model orders
        self.k_ar = int(order[0])
        self.k_ma = int(order[1])
        self.k_trend = int(self.trend == 'c')

        # Check for valid model
        if trend not in ['c', 'nc']:
            raise ValueError('Invalid trend specification.')
        if error_cov_type not in ['diagonal', 'unstructured']:
            raise ValueError('Invalid error covariance matrix type'
                             ' specification.')
        if self.k_ar == 0 and self.k_ma == 0:
            raise ValueError('Invalid VARMAX(p,q) specification; at least one'
                             ' p,q must be greater than zero.')

        # Warn for VARMA model
        if self.k_ar > 0 and self.k_ma > 0:
            warn(
                'Estimation of VARMA(p,q) models is not generically robust,'
                ' due especially to identification issues.', EstimationWarning)

        # Exogenous data
        self.k_exog = 0
        if exog is not None:
            exog_is_using_pandas = _is_using_pandas(exog, None)
            if not exog_is_using_pandas:
                exog = np.asarray(exog)

            # Make sure we have 2-dimensional array
            if exog.ndim == 1:
                if not exog_is_using_pandas:
                    exog = exog[:, None]
                else:
                    exog = pd.DataFrame(exog)

            self.k_exog = exog.shape[1]

        # Note: at some point in the future might add state regression, as in
        # SARIMAX.
        self.mle_regression = self.k_exog > 0

        # We need to have an array or pandas at this point
        if not _is_using_pandas(endog, None):
            endog = np.asanyarray(endog)

        # Model order
        # Used internally in various places
        _min_k_ar = max(self.k_ar, 1)
        self._k_order = _min_k_ar + self.k_ma

        # Number of states
        k_endog = endog.shape[1]
        k_posdef = k_endog
        k_states = k_endog * self._k_order

        # By default, initialize as stationary
        kwargs.setdefault('initialization', 'stationary')

        # By default, use LU decomposition
        kwargs.setdefault('inversion_method', INVERT_UNIVARIATE | SOLVE_LU)

        # Initialize the state space model
        super(VARMAX, self).__init__(endog,
                                     exog=exog,
                                     k_states=k_states,
                                     k_posdef=k_posdef,
                                     **kwargs)

        # Set as time-varying model if we have time-trend or exog
        if self.k_exog > 0 or self.k_trend > 1:
            self.ssm._time_invariant = False

        # Initialize the parameters
        self.parameters = OrderedDict()
        self.parameters['trend'] = self.k_endog * self.k_trend
        self.parameters['ar'] = self.k_endog**2 * self.k_ar
        self.parameters['ma'] = self.k_endog**2 * self.k_ma
        self.parameters['regression'] = self.k_endog * self.k_exog
        if self.error_cov_type == 'diagonal':
            self.parameters['state_cov'] = self.k_endog
        # These parameters fill in a lower-triangular matrix which is then
        # dotted with itself to get a positive definite matrix.
        elif self.error_cov_type == 'unstructured':
            self.parameters['state_cov'] = (int(self.k_endog *
                                                (self.k_endog + 1) / 2))
        self.parameters['obs_cov'] = self.k_endog * self.measurement_error
        self.k_params = sum(self.parameters.values())

        # Initialize known elements of the state space matrices

        # If we have exog effects, then the state intercept needs to be
        # time-varying
        if self.k_exog > 0:
            self.ssm['state_intercept'] = np.zeros((self.k_states, self.nobs))

        # The design matrix is just an identity for the first k_endog states
        idx = np.diag_indices(self.k_endog)
        self.ssm[('design', ) + idx] = 1

        # The transition matrix is described in four blocks, where the upper
        # left block is in companion form with the autoregressive coefficient
        # matrices (so it is shaped k_endog * k_ar x k_endog * k_ar) ...
        if self.k_ar > 0:
            idx = np.diag_indices((self.k_ar - 1) * self.k_endog)
            idx = idx[0] + self.k_endog, idx[1]
            self.ssm[('transition', ) + idx] = 1
        # ... and the  lower right block is in companion form with zeros as the
        # coefficient matrices (it is shaped k_endog * k_ma x k_endog * k_ma).
        idx = np.diag_indices((self.k_ma - 1) * self.k_endog)
        idx = (idx[0] + (_min_k_ar + 1) * self.k_endog,
               idx[1] + _min_k_ar * self.k_endog)
        self.ssm[('transition', ) + idx] = 1

        # The selection matrix is described in two blocks, where the upper
        # block selects the all k_posdef errors in the first k_endog rows
        # (the upper block is shaped k_endog * k_ar x k) and the lower block
        # also selects all k_posdef errors in the first k_endog rows (the lower
        # block is shaped k_endog * k_ma x k).
        idx = np.diag_indices(self.k_endog)
        self.ssm[('selection', ) + idx] = 1
        idx = idx[0] + _min_k_ar * self.k_endog, idx[1]
        if self.k_ma > 0:
            self.ssm[('selection', ) + idx] = 1

        # Cache some indices
        if self.trend == 'c' and self.k_exog == 0:
            self._idx_state_intercept = np.s_['state_intercept', :k_endog]
        elif self.k_exog > 0:
            self._idx_state_intercept = np.s_['state_intercept', :k_endog, :]
        if self.k_ar > 0:
            self._idx_transition = np.s_['transition', :k_endog, :]
        else:
            self._idx_transition = np.s_['transition', :k_endog, k_endog:]
        if self.error_cov_type == 'diagonal':
            self._idx_state_cov = (('state_cov', ) +
                                   np.diag_indices(self.k_endog))
        elif self.error_cov_type == 'unstructured':
            self._idx_lower_state_cov = np.tril_indices(self.k_endog)
        if self.measurement_error:
            self._idx_obs_cov = ('obs_cov', ) + np.diag_indices(self.k_endog)

        # Cache some slices
        def _slice(key, offset):
            length = self.parameters[key]
            param_slice = np.s_[offset:offset + length]
            offset += length
            return param_slice, offset

        offset = 0
        self._params_trend, offset = _slice('trend', offset)
        self._params_ar, offset = _slice('ar', offset)
        self._params_ma, offset = _slice('ma', offset)
        self._params_regression, offset = _slice('regression', offset)
        self._params_state_cov, offset = _slice('state_cov', offset)
        self._params_obs_cov, offset = _slice('obs_cov', offset)
Example #36
0
def test_gradient():
    from brainiak.reprsimil.brsa import BRSA
    import brainiak.utils.utils as utils
    import scipy.stats
    import numpy as np
    import os.path
    import numdifftools as nd
    np.random.seed(100)
    file_path = os.path.join(os.path.dirname(__file__), "example_design.1D")
    # Load an example design matrix
    design = utils.ReadDesign(fname=file_path)
    # concatenate it by 4 times, mimicking 4 runs of itenditcal timing
    design.design_used = np.tile(design.design_used[:, 0:17], [4, 1])
    design.n_TR = design.n_TR * 4

    # start simulating some data
    n_V = 200
    n_C = np.size(design.design_used, axis=1)
    n_T = design.n_TR

    noise_bot = 0.5
    noise_top = 1.5
    noise_level = np.random.rand(n_V) * (noise_top - noise_bot) + noise_bot
    # noise level is random.

    # AR(1) coefficient
    rho1_top = 0.8
    rho1_bot = -0.2
    rho1 = np.random.rand(n_V) * (rho1_top - rho1_bot) + rho1_bot

    # generating noise
    noise = np.zeros([n_T, n_V])
    noise[0, :] = np.random.randn(n_V) * noise_level / np.sqrt(1 - rho1**2)
    for i_t in range(1, n_T):
        noise[i_t, :] = noise[i_t -
                              1, :] * rho1 + np.random.randn(n_V) * noise_level

    # ideal covariance matrix
    ideal_cov = np.zeros([n_C, n_C])
    ideal_cov = np.eye(n_C) * 0.6
    ideal_cov[0, 0] = 0.2
    ideal_cov[5:9, 5:9] = 0.6
    for cond in range(5, 9):
        ideal_cov[cond, cond] = 1
    idx = np.where(np.sum(np.abs(ideal_cov), axis=0) > 0)[0]
    L_full = np.linalg.cholesky(ideal_cov)

    # generating signal
    snr_level = 5.0  # test with high SNR
    # snr = np.random.rand(n_V)*(snr_top-snr_bot)+snr_bot
    # Notice that accurately speaking this is not snr. the magnitude of signal depends
    # not only on beta but also on x.
    inten = np.random.randn(n_V) * 20.0

    # parameters of Gaussian process to generate pseuso SNR
    tau = 0.8
    smooth_width = 5.0
    inten_kernel = 1.0

    coords = np.arange(0, n_V)[:, None]

    dist2 = np.square(coords - coords.T)

    inten_tile = np.tile(inten, [n_V, 1])
    inten_diff2 = (inten_tile - inten_tile.T)**2

    K = np.exp(-dist2 / smooth_width**2 / 2.0 - inten_diff2 / inten_kernel**2 /
               2.0) * tau**2 + np.eye(n_V) * tau**2 * 0.001

    L = np.linalg.cholesky(K)
    snr = np.exp(np.dot(L, np.random.randn(n_V))) * snr_level
    sqrt_v = noise_level * snr
    betas_simulated = np.dot(L_full, np.random.randn(n_C, n_V)) * sqrt_v
    signal = np.dot(design.design_used, betas_simulated)

    # Adding noise to signal as data
    Y = signal + noise

    scan_onsets = np.linspace(0, design.n_TR, num=5)

    # Test fitting with GP prior.
    brsa = BRSA(GP_space=True, GP_inten=True, verbose=False, n_iter=200)

    # test if the gradients are correct
    XTY, XTDY, XTFY, YTY_diag, YTDY_diag, YTFY_diag, XTX, XTDX, XTFX = brsa._prepare_data(
        design.design_used, Y, n_T, n_V, scan_onsets)
    l_idx = np.tril_indices(n_C)
    n_l = np.size(l_idx[0])

    idx_param_sing, idx_param_fitU, idx_param_fitV = brsa._build_index_param(
        n_l, n_V, 2)

    # Initial parameters are correct parameters with some perturbation
    param0_fitU = np.random.randn(n_l + n_V) * 0.1
    param0_fitV = np.random.randn(n_V + 1) * 0.1
    param0_fitV[:n_V - 1] += np.log(snr[:n_V - 1]) * 2
    param0_fitV[n_V - 1] += np.log(smooth_width) * 2
    param0_fitV[n_V] += np.log(inten_kernel) * 2

    # log likelihood and derivative at the initial parameters
    ll0, deriv0 = brsa._loglike_AR1_diagV_fitU(param0_fitU, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, \
                XTY, XTDY, XTFY, np.log(snr)*2,  l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)

    # We test if the numerical and analytical gradient wrt to the first element of Cholesky factor is correct
    vec = np.zeros(np.size(param0_fitU))
    vec[idx_param_fitU['Cholesky'][0]] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag,\
                                                                YTFY_diag, XTY, XTDY, XTFY, np.log(snr)*2,\
                                                                l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)[0], param0_fitU, vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec),
        rtol=0.01), 'gradient of fitU wrt Cholesky factor incorrect'

    # We test the gradient wrt the reparametrization of AR(1) coefficient of noise.
    vec = np.zeros(np.size(param0_fitU))
    vec[idx_param_fitU['a1'][0]] = 1
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag,\
                                                                YTFY_diag, XTY, XTDY, XTFY, np.log(snr)*2,\
                                                                l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)[0], param0_fitU, vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec),
        rtol=0.01), 'gradient of fitU wrt to AR(1) coefficient incorrect'

    # Test on a random direction
    vec = np.random.randn(np.size(param0_fitU))
    vec = vec / np.linalg.norm(vec)
    dd = nd.directionaldiff(lambda x: brsa._loglike_AR1_diagV_fitU(x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag,\
                                                                YTFY_diag, XTY, XTDY, XTFY, np.log(snr)*2,\
                                                                l_idx,n_C,n_T,n_V,idx_param_fitU,n_C)[0], param0_fitU, vec)
    assert np.isclose(dd, np.dot(deriv0, vec),
                      rtol=0.01), 'gradient of fitU incorrect'

    # We test the gradient of _fitV wrt to log(SNR^2) assuming no GP prior.
    ll0, deriv0 = brsa._loglike_AR1_diagV_fitV(
        param0_fitV[idx_param_fitV['log_SNR2']], XTX, XTDX, XTFX, YTY_diag,
        YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx],
        np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV, n_C,
        False, False)
    vec = np.zeros(np.size(param0_fitV[idx_param_fitV['log_SNR2']]))
    vec[idx_param_fitV['log_SNR2'][0]] = 1
    dd = nd.directionaldiff(
        lambda x: brsa._loglike_AR1_diagV_fitV(
            x, XTX, XTDX, XTFX, YTY_diag, YTDY_diag, YTFY_diag, XTY, XTDY,
            XTFY, L_full[l_idx], np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T,
            n_V, idx_param_fitV, n_C, False, False)[0],
        param0_fitV[idx_param_fitV['log_SNR2']], vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec), rtol=0.01
    ), 'gradient of fitV wrt log(SNR2) incorrect for model without GP'

    # We test the gradient of _fitV wrt to log(SNR^2) assuming GP prior.
    ll0, deriv0 = brsa._loglike_AR1_diagV_fitV(param0_fitV, XTX, XTDX, XTFX,
                                               YTY_diag, YTDY_diag, YTFY_diag,
                                               XTY, XTDY, XTFY, L_full[l_idx],
                                               np.tan(rho1 * np.pi / 2), l_idx,
                                               n_C, n_T, n_V, idx_param_fitV,
                                               n_C, True, True, dist2,
                                               inten_diff2, 100, 100)
    vec = np.zeros(np.size(param0_fitV))
    vec[idx_param_fitV['log_SNR2'][0]] = 1
    dd = nd.directionaldiff(
        lambda x: brsa._loglike_AR1_diagV_fitV(
            x, XTX, XTDX, XTFX, YTY_diag,
            YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx],
            np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV,
            n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV,
        vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec), rtol=0.01
    ), 'gradient of fitV srt log(SNR2) incorrect for model with GP'

    # We test the graident wrt spatial length scale parameter of GP prior
    vec = np.zeros(np.size(param0_fitV))
    vec[idx_param_fitV['c_space']] = 1
    dd = nd.directionaldiff(
        lambda x: brsa._loglike_AR1_diagV_fitV(
            x, XTX, XTDX, XTFX, YTY_diag,
            YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx],
            np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV,
            n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV,
        vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec),
        rtol=0.01), 'gradient of fitV wrt spatial length scale of GP incorrect'

    # We test the graident wrt intensity length scale parameter of GP prior
    vec = np.zeros(np.size(param0_fitV))
    vec[idx_param_fitV['c_inten']] = 1
    dd = nd.directionaldiff(
        lambda x: brsa._loglike_AR1_diagV_fitV(
            x, XTX, XTDX, XTFX, YTY_diag,
            YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx],
            np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV,
            n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV,
        vec)
    assert np.isclose(
        dd, np.dot(deriv0, vec), rtol=0.01
    ), 'gradient of fitV wrt intensity length scale of GP incorrect'

    # We test the graident on a random direction
    vec = np.random.randn(np.size(param0_fitV))
    vec = vec / np.linalg.norm(vec)
    dd = nd.directionaldiff(
        lambda x: brsa._loglike_AR1_diagV_fitV(
            x, XTX, XTDX, XTFX, YTY_diag,
            YTDY_diag, YTFY_diag, XTY, XTDY, XTFY, L_full[l_idx],
            np.tan(rho1 * np.pi / 2), l_idx, n_C, n_T, n_V, idx_param_fitV,
            n_C, True, True, dist2, inten_diff2, 100, 100)[0], param0_fitV,
        vec)
    assert np.isclose(dd, np.dot(deriv0, vec),
                      rtol=0.01), 'gradient of fitV incorrect'
Example #37
0
def _make_eris_outcore(mycc, mo_coeff=None):
    cput0 = (time.clock(), time.time())
    log = logger.Logger(mycc.stdout, mycc.verbose)

    eris = _PhysicistsERIs()
    eris._common_init_(mycc, mo_coeff)
    nocc = eris.nocc
    nao, nmo = eris.mo_coeff.shape
    nvir = nmo - nocc
    assert (eris.mo_coeff.dtype == np.double)
    mo_a = eris.mo_coeff[:nao // 2]
    mo_b = eris.mo_coeff[nao // 2:]
    orbspin = eris.orbspin

    feri = eris.feri = lib.H5TmpFile()
    dtype = np.result_type(eris.mo_coeff).char
    eris.oooo = feri.create_dataset('oooo', (nocc, nocc, nocc, nocc), dtype)
    eris.ooov = feri.create_dataset('ooov', (nocc, nocc, nocc, nvir), dtype)
    eris.oovv = feri.create_dataset('oovv', (nocc, nocc, nvir, nvir), dtype)
    eris.ovov = feri.create_dataset('ovov', (nocc, nvir, nocc, nvir), dtype)
    eris.ovvo = feri.create_dataset('ovvo', (nocc, nvir, nvir, nocc), dtype)
    eris.ovvv = feri.create_dataset('ovvv', (nocc, nvir, nvir, nvir), dtype)

    if orbspin is None:
        orbo_a = mo_a[:, :nocc]
        orbv_a = mo_a[:, nocc:]
        orbo_b = mo_b[:, :nocc]
        orbv_b = mo_b[:, nocc:]

        max_memory = mycc.max_memory - lib.current_memory()[0]
        blksize = min(nocc, max(2, int(max_memory * 1e6 / 8 / (nmo**3 * 2))))
        max_memory = max(MEMORYMIN, max_memory)

        fswap = lib.H5TmpFile()
        ao2mo.kernel(mycc.mol, (orbo_a, mo_a, mo_a, mo_a),
                     fswap,
                     'aaaa',
                     max_memory=max_memory,
                     verbose=log)
        ao2mo.kernel(mycc.mol, (orbo_a, mo_a, mo_b, mo_b),
                     fswap,
                     'aabb',
                     max_memory=max_memory,
                     verbose=log)
        ao2mo.kernel(mycc.mol, (orbo_b, mo_b, mo_a, mo_a),
                     fswap,
                     'bbaa',
                     max_memory=max_memory,
                     verbose=log)
        ao2mo.kernel(mycc.mol, (orbo_b, mo_b, mo_b, mo_b),
                     fswap,
                     'bbbb',
                     max_memory=max_memory,
                     verbose=log)

        for p0, p1 in lib.prange(0, nocc, blksize):
            tmp = np.asarray(fswap['aaaa'][p0 * nmo:p1 * nmo])
            tmp += np.asarray(fswap['aabb'][p0 * nmo:p1 * nmo])
            tmp += np.asarray(fswap['bbaa'][p0 * nmo:p1 * nmo])
            tmp += np.asarray(fswap['bbbb'][p0 * nmo:p1 * nmo])
            tmp = lib.unpack_tril(tmp).reshape(p1 - p0, nmo, nmo, nmo)
            eris.oooo[p0:p1] = (
                tmp[:, :nocc, :nocc, :nocc].transpose(0, 2, 1, 3) -
                tmp[:, :nocc, :nocc, :nocc].transpose(0, 2, 3, 1))
            eris.ooov[p0:p1] = (
                tmp[:, :nocc, :nocc, nocc:].transpose(0, 2, 1, 3) -
                tmp[:, nocc:, :nocc, :nocc].transpose(0, 2, 3, 1))
            eris.ovvv[p0:p1] = (
                tmp[:, nocc:, nocc:, nocc:].transpose(0, 2, 1, 3) -
                tmp[:, nocc:, nocc:, nocc:].transpose(0, 2, 3, 1))
            eris.oovv[p0:p1] = (
                tmp[:, nocc:, :nocc, nocc:].transpose(0, 2, 1, 3) -
                tmp[:, nocc:, :nocc, nocc:].transpose(0, 2, 3, 1))
            eris.ovov[p0:p1] = (
                tmp[:, :nocc, nocc:, nocc:].transpose(0, 2, 1, 3) -
                tmp[:, nocc:, nocc:, :nocc].transpose(0, 2, 3, 1))
            eris.ovvo[p0:p1] = (
                tmp[:, nocc:, nocc:, :nocc].transpose(0, 2, 1, 3) -
                tmp[:, :nocc, nocc:, nocc:].transpose(0, 2, 3, 1))
            tmp = None
        cput0 = log.timer_debug1('transforming ovvv', *cput0)

        eris.vvvv = feri.create_dataset('vvvv', (nvir, nvir, nvir, nvir),
                                        dtype)
        tril2sq = lib.square_mat_in_trilu_indices(nvir)
        fswap = lib.H5TmpFile()
        ao2mo.kernel(mycc.mol, (orbv_a, orbv_a, orbv_a, orbv_a),
                     fswap,
                     'aaaa',
                     max_memory=max_memory,
                     verbose=log)
        ao2mo.kernel(mycc.mol, (orbv_a, orbv_a, orbv_b, orbv_b),
                     fswap,
                     'aabb',
                     max_memory=max_memory,
                     verbose=log)
        ao2mo.kernel(mycc.mol, (orbv_b, orbv_b, orbv_a, orbv_a),
                     fswap,
                     'bbaa',
                     max_memory=max_memory,
                     verbose=log)
        ao2mo.kernel(mycc.mol, (orbv_b, orbv_b, orbv_b, orbv_b),
                     fswap,
                     'bbbb',
                     max_memory=max_memory,
                     verbose=log)
        for p0, p1 in lib.prange(0, nvir, blksize):
            off0 = p0 * (p0 + 1) // 2
            off1 = p1 * (p1 + 1) // 2
            tmp = np.asarray(fswap['aaaa'][off0:off1])
            tmp += np.asarray(fswap['aabb'][off0:off1])
            tmp += np.asarray(fswap['bbaa'][off0:off1])
            tmp += np.asarray(fswap['bbbb'][off0:off1])

            if p0 > 0:
                c = tmp[tril2sq[p0:p1, :p0] - off0]
                c = lib.unpack_tril(c.reshape((p1 - p0) * p0, -1))
                eris.vvvv[p0:p1, :p0] = c.reshape(p1 - p0, p0, nvir, nvir)
            c = tmp[tril2sq[:p1, p0:p1] - off0]
            c = lib.unpack_tril(c.reshape((p1 - p0) * p1, -1))
            eris.vvvv[:p1, p0:p1] = c.reshape(p1, p1 - p0, nvir, nvir)
            tmp = None

        for p0, p1 in lib.prange(0, nvir, blksize):
            tmp = np.asarray(eris.vvvv[p0:p1])
            eris.vvvv[p0:p1] = tmp.transpose(0, 2, 1, 3) - tmp.transpose(
                0, 2, 3, 1)
        cput0 = log.timer_debug1('transforming vvvv', *cput0)

    else:  # with orbspin
        mo = mo_a + mo_b
        orbo = mo[:, :nocc]
        orbv = mo[:, nocc:]

        max_memory = mycc.max_memory - lib.current_memory()[0]
        blksize = min(nocc, max(2, int(max_memory * 1e6 / 8 / (nmo**3 * 2))))
        max_memory = max(MEMORYMIN, max_memory)

        fswap = lib.H5TmpFile()
        ao2mo.kernel(mycc.mol, (orbo, mo, mo, mo),
                     fswap,
                     max_memory=max_memory,
                     verbose=log)
        sym_forbid = orbspin[:, None] != orbspin

        for p0, p1 in lib.prange(0, nocc, blksize):
            tmp = np.asarray(fswap['eri_mo'][p0 * nmo:p1 * nmo])
            tmp = lib.unpack_tril(tmp).reshape(p1 - p0, nmo, nmo, nmo)
            tmp[sym_forbid[p0:p1]] = 0
            tmp[:, :, sym_forbid] = 0

            eris.oooo[p0:p1] = (
                tmp[:, :nocc, :nocc, :nocc].transpose(0, 2, 1, 3) -
                tmp[:, :nocc, :nocc, :nocc].transpose(0, 2, 3, 1))
            eris.ooov[p0:p1] = (
                tmp[:, :nocc, :nocc, nocc:].transpose(0, 2, 1, 3) -
                tmp[:, nocc:, :nocc, :nocc].transpose(0, 2, 3, 1))
            eris.ovvv[p0:p1] = (
                tmp[:, nocc:, nocc:, nocc:].transpose(0, 2, 1, 3) -
                tmp[:, nocc:, nocc:, nocc:].transpose(0, 2, 3, 1))
            eris.oovv[p0:p1] = (
                tmp[:, nocc:, :nocc, nocc:].transpose(0, 2, 1, 3) -
                tmp[:, nocc:, :nocc, nocc:].transpose(0, 2, 3, 1))
            eris.ovov[p0:p1] = (
                tmp[:, :nocc, nocc:, nocc:].transpose(0, 2, 1, 3) -
                tmp[:, nocc:, nocc:, :nocc].transpose(0, 2, 3, 1))
            eris.ovvo[p0:p1] = (
                tmp[:, nocc:, nocc:, :nocc].transpose(0, 2, 1, 3) -
                tmp[:, :nocc, nocc:, nocc:].transpose(0, 2, 3, 1))
            tmp = None
        cput0 = log.timer_debug1('transforming ovvv', *cput0)

        eris.vvvv = feri.create_dataset('vvvv', (nvir, nvir, nvir, nvir),
                                        dtype)
        sym_forbid = (orbspin[nocc:, None] !=
                      orbspin[nocc:])[np.tril_indices(nvir)]
        tril2sq = lib.square_mat_in_trilu_indices(nvir)

        fswap = lib.H5TmpFile()
        ao2mo.kernel(mycc.mol, orbv, fswap, max_memory=max_memory, verbose=log)
        for p0, p1 in lib.prange(0, nvir, blksize):
            off0 = p0 * (p0 + 1) // 2
            off1 = p1 * (p1 + 1) // 2
            tmp = np.asarray(fswap['eri_mo'][off0:off1])
            tmp[sym_forbid[off0:off1]] = 0
            tmp[:, sym_forbid] = 0

            if p0 > 0:
                c = tmp[tril2sq[p0:p1, :p0] - off0]
                c = lib.unpack_tril(c.reshape((p1 - p0) * p0, -1))
                eris.vvvv[p0:p1, :p0] = c.reshape(p1 - p0, p0, nvir, nvir)
            c = tmp[tril2sq[:p1, p0:p1] - off0]
            c = lib.unpack_tril(c.reshape((p1 - p0) * p1, -1))
            eris.vvvv[:p1, p0:p1] = c.reshape(p1, p1 - p0, nvir, nvir)
            tmp = None

        for p0, p1 in lib.prange(0, nvir, blksize):
            tmp = np.asarray(eris.vvvv[p0:p1])
            eris.vvvv[p0:p1] = tmp.transpose(0, 2, 1, 3) - tmp.transpose(
                0, 2, 3, 1)
        cput0 = log.timer_debug1('transforming vvvv', *cput0)

    return eris
Example #38
0
 def tril_indices(self):
     return np.tril_indices(self.ddim)
Example #39
0
    def call(self, x, mask=None):
        # TODO: validate input shape

        # The input of this layer is [L, mu, a] in concatenated form. We first split
        # those up.
        idx = 0
        if self.mode == 'full':
            L_flat = x[:, idx:idx +
                       (self.nb_actions * self.nb_actions + self.nb_actions) //
                       2]
            idx += (self.nb_actions * self.nb_actions + self.nb_actions) // 2
        elif self.mode == 'diag':
            L_flat = x[:, idx:idx + self.nb_actions]
            idx += self.nb_actions
        else:
            L_flat = None
        assert L_flat is not None
        mu = x[:, idx:idx + self.nb_actions]
        idx += self.nb_actions
        a = x[:, idx:idx + self.nb_actions]
        idx += self.nb_actions

        if self.mode == 'full':
            # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
            L = None
            LT = None
            if K._BACKEND == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, L_acc, LT_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)],
                                         x)
                    diag = K.exp(T.diag(x_) + K.epsilon())
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)],
                                         diag)
                    return x_, x_.T

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                results, _ = theano.scan(fn=fn,
                                         sequences=L_flat,
                                         outputs_info=outputs_info)
                L, LT = results
            elif K._BACKEND == 'tensorflow':
                import tensorflow as tf

                # Number of elements in a triangular matrix.
                nb_elems = (self.nb_actions * self.nb_actions +
                            self.nb_actions) // 2

                # Create mask for the diagonal elements in L_flat. This is used to exponentiate
                # only the diagonal elements, which is done before gathering.
                diag_indeces = [0]
                for row in range(1, self.nb_actions):
                    diag_indeces.append(diag_indeces[-1] + (row + 1))
                diag_mask = np.zeros(1 + nb_elems)  # +1 for the leading zero
                diag_mask[np.array(diag_indeces) + 1] = 1
                diag_mask = K.variable(diag_mask)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1, )), [nb_rows]), 1)
                L_flat = tf.concat(1, [zeros, L_flat])

                # Create mask that can be used to gather elements from L_flat and put them
                # into a lower triangular matrix.
                tril_mask = np.zeros((self.nb_actions, self.nb_actions),
                                     dtype='int32')
                tril_mask[np.tril_indices(self.nb_actions)] = range(
                    1, nb_elems + 1)

                # Finally, process each element of the batch.
                init = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]

                def fn(a, x):
                    # Exponentiate everything. This is much easier than only exponentiating
                    # the diagonal elements, and, usually, the action space is relatively low.
                    x_ = K.exp(x + K.epsilon())
                    # Only keep the diagonal elements.
                    x_ *= diag_mask
                    # Add the original, non-diagonal elements.
                    x_ += x * (1. - diag_mask)
                    # Finally, gather everything into a lower triangular matrix.
                    L_ = tf.gather(x_, tril_mask)
                    return [L_, tf.transpose(L_)]

                tmp = tf.scan(fn, L_flat, initializer=init)
                if isinstance(tmp, (list, tuple)):
                    # TensorFlow 0.10 now returns a tuple of tensors.
                    L, LT = tmp
                else:
                    # Old TensorFlow < 0.10 returns a shared tensor.
                    L = tmp[:, 0, :, :]
                    LT = tmp[:, 1, :, :]
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(
                    K._BACKEND))
            assert L is not None
            assert LT is not None
            P = K.batch_dot(L, LT)
        elif self.mode == 'diag':
            if K._BACKEND == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, P_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)],
                                         x)
                    return x_

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                P, _ = theano.scan(fn=fn,
                                   sequences=L_flat,
                                   outputs_info=outputs_info)
            elif K._BACKEND == 'tensorflow':
                import tensorflow as tf

                # Create mask that can be used to gather elements from L_flat and put them
                # into a diagonal matrix.
                diag_mask = np.zeros((self.nb_actions, self.nb_actions),
                                     dtype='int32')
                diag_mask[np.diag_indices(self.nb_actions)] = range(
                    1, self.nb_actions + 1)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1, )), [nb_rows]), 1)
                L_flat = tf.concat(1, [zeros, L_flat])

                # Finally, process each element of the batch.
                def fn(a, x):
                    x_ = tf.gather(x, diag_mask)
                    return x_

                P = tf.scan(fn,
                            L_flat,
                            initializer=K.zeros(
                                (self.nb_actions, self.nb_actions)))
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(
                    K._BACKEND))
        assert P is not None
        assert K.ndim(P) == 3

        # Combine a, mu and P into a scalar (over the batches). What we compute here is
        # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately
        # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to
        # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All
        # operations happen over the batch size, which is dimension 0.
        prod = K.batch_dot(K.expand_dims(a - mu, dim=1), P)
        prod = K.batch_dot(prod, K.expand_dims(a - mu, dim=-1))
        A = -.5 * K.batch_flatten(prod)
        assert K.ndim(A) == 2
        return A
Example #40
0
    def gradient_marginal_loglikelihood(self, observations, parameters,
            forward_message=None, backward_message=None, weights=None,
            tqdm=None):
        # Forward Pass
        forward_messages = self.forward_pass(observations, parameters,
                forward_message, include_init_message=True)
        # Backward Pass
        backward_messages = self.backward_pass(observations, parameters,
                backward_message, include_init_message=True)

        # Gradients
        grad = {var: np.zeros_like(value)
                for var, value in parameters.as_dict().items()}

        Pi, expanded_pi = parameters.pi, parameters.expanded_pi
        D = parameters.D
        LRinv, Rinv, R = parameters.LRinv, parameters.Rinv, parameters.R

        pbar = enumerate(zip(forward_messages[:-1], backward_messages[1:]))
        if tqdm is not None:
            pbar = tqdm(pbar)
            pbar.set_description("gradient loglike")
        for t, (forward_t, backward_t) in pbar:
            # r_t is Pr(z_{t-1} | y_{< t})
            # s_t is Pr(z_t | y_{< t})
            # q_t is Pr(y_{> t} | z_t)
            r_t = forward_t['prob_vector']
            s_t = np.dot(r_t, Pi)
            q_t = backward_t['likelihood_vector']

            weight_t = 1.0 if weights is None else weights[t]

            # Calculate P_t = Pr(y_t | z_t)
            y_cur = observations[t]
            P_t, _ = self._likelihoods(
                    y_cur=y_cur,
                    parameters=parameters
                )

            # Marginal + Pairwise Marginal
            joint_post = np.diag(r_t).dot(Pi).dot(np.diag(P_t*q_t))
            joint_post = joint_post/np.sum(joint_post)
            marg_post = np.sum(joint_post, axis=0)

            # Grad for pi
            if parameters.pi_type == "logit":
                # Gradient of logit_pi
                grad['logit_pi'] += weight_t * (joint_post - \
                        np.diag(np.sum(joint_post, axis=1)).dot(Pi))
            elif parameters.pi_type == "expanded":
                grad['expanded_pi'] += weight_t * np.array([
                    (expanded_pi[k]**-1)*(
                        joint_post[k] - np.sum(joint_post[k])*Pi[k])
                    for k in range(self.num_states)
                    ])
            else:
                raise RuntimeError()

            # grad for mu and LRinv
            y_prev = y_cur[1:].flatten()
            for k, D_k, LRinv_k, Rinv_k, R_k in zip(
                    range(self.num_states), D, LRinv, Rinv, R):
                diff_k = y_cur[0] - np.dot(D_k, y_prev)
                grad['D'][k] += weight_t * (
                        np.outer(Rinv_k.dot(diff_k), y_prev) * marg_post[k])
                grad_LRinv_k = weight_t * (
                        (R_k - np.outer(diff_k, diff_k)).dot(LRinv_k)
                        ) * marg_post[k]
                grad['LRinv_vec'][k] += grad_LRinv_k[np.tril_indices(self.m)]

        return grad
Example #41
0
    def test_ft_aoao(self):
        #coords = pdft.gen_grid.gen_uniform_grids(cell)
        #aoR = pdft.numint.eval_ao(cell, coords)
        #ngrids, nao = aoR.shape
        #ref = numpy.asarray([tools.fft(aoR[:,i].conj()*aoR[:,j], cell.mesh)
        #                     for i in range(nao) for j in range(nao)])
        #ref = ref.reshape(nao,nao,-1).transpose(2,0,1) * (cell.vol/ngrids)
        #dat = ft_ao.ft_aopair(cell, cell.Gv, aosym='s1hermi')
        #self.assertAlmostEqual(numpy.linalg.norm(ref[:,0,0]-dat[:,0,0])    , 0, 5)
        #self.assertAlmostEqual(numpy.linalg.norm(ref[:,1,1]-dat[:,1,1])    , 0.02315483195832373, 4)
        #self.assertAlmostEqual(numpy.linalg.norm(ref[:,2:,2:]-dat[:,2:,2:]), 0, 9)
        #self.assertAlmostEqual(numpy.linalg.norm(ref[:,0,2:]-dat[:,0,2:])  , 0, 9)
        #self.assertAlmostEqual(numpy.linalg.norm(ref[:,2:,0]-dat[:,2:,0])  , 0, 9)
        #idx = numpy.tril_indices(nao)
        #ref = dat[:,idx[0],idx[1]]
        #dat = ft_ao.ft_aopair(cell, cell.Gv, aosym='s2')
        #self.assertAlmostEqual(abs(dat-ref).sum(), 0, 9)

        coords = pdft.gen_grid.gen_uniform_grids(cell1)
        Gv, Gvbase, kws = cell1.get_Gv_weights(cell1.mesh)
        b = cell1.reciprocal_vectors()
        gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
        dat = ft_ao.ft_aopair(cell1,
                              cell1.Gv,
                              aosym='s1',
                              b=b,
                              gxyz=gxyz,
                              Gvbase=Gvbase)
        self.assertAlmostEqual(lib.fp(dat),
                               1.5666516306798806 + 1.953555017583245j, 9)
        dat = ft_ao.ft_aopair(cell1,
                              cell1.Gv,
                              aosym='s2',
                              b=b,
                              gxyz=gxyz,
                              Gvbase=Gvbase)
        self.assertAlmostEqual(lib.fp(dat),
                               -0.85276967757297917 + 1.0378751267506394j, 9)
        dat = ft_ao.ft_aopair(cell1,
                              cell1.Gv,
                              aosym='s1hermi',
                              b=b,
                              gxyz=gxyz,
                              Gvbase=Gvbase)
        self.assertAlmostEqual(lib.fp(dat),
                               1.5666516306798806 + 1.953555017583245j, 9)
        aoR = pdft.numint.eval_ao(cell1, coords)
        ngrids, nao = aoR.shape
        aoaoR = numpy.einsum('pi,pj->ijp', aoR, aoR)
        ref = tools.fft(aoaoR.reshape(nao * nao, -1), cell1.mesh)
        ref = ref.reshape(nao, nao, -1).transpose(2, 0,
                                                  1) * (cell1.vol / ngrids)
        self.assertAlmostEqual(numpy.linalg.norm(ref[:, 0, 0] - dat[:, 0, 0]),
                               0, 7)
        self.assertAlmostEqual(numpy.linalg.norm(ref[:, 1, 1] - dat[:, 1, 1]),
                               0, 7)
        self.assertAlmostEqual(
            numpy.linalg.norm(ref[:, 2:, 2:] - dat[:, 2:, 2:]), 0, 7)
        self.assertAlmostEqual(
            numpy.linalg.norm(ref[:, 0, 2:] - dat[:, 0, 2:]), 0, 7)
        self.assertAlmostEqual(
            numpy.linalg.norm(ref[:, 2:, 0] - dat[:, 2:, 0]), 0, 7)
        idx = numpy.tril_indices(nao)
        ref = dat[:, idx[0], idx[1]]
        dat = ft_ao.ft_aopair(cell1, cell1.Gv, aosym='s2')
        self.assertAlmostEqual(abs(dat - ref).sum(), 0, 9)
Example #42
0
    def __init__(self, m=None, P=None, U=None, S=None, Pm=None, L=None):
        """Initialize a gaussian pdf given a valid combination of its
        parameters. Valid combinations are: m-P, m-U, m-S, Pm-P, Pm-U, Pm-S.

        :param m: mean
        :param P: precision
        :param U: upper triangular precision factor such that U'U = P
        :param S: covariance
        :param C : upper or lower triangular covariance factor, in any case
        S = C'C
        :param Pm: precision times mean such that P*m = Pm
        :param L: lower triangular covariance factor given as 1D array such that
        LL' = S
        """
        if m is not None:
            m = np.asarray(m)
            self.m = m
            self.ndim = m.size

            if P is not None:
                P = np.asarray(P)
                L = np.linalg.cholesky(P)
                self.P = P
                self.C = np.linalg.inv(L)
                self.S = np.dot(self.C.T, self.C)
                self.Pm = np.dot(P, m)
                self.logdetP = 2.0 * np.sum(np.log(np.diagonal(L)))

            elif U is not None:
                U = np.asarray(U)
                self.P = np.dot(U.T, U)
                self.C = np.linalg.inv(U.T)
                self.S = np.dot(self.C.T, self.C)
                self.Pm = np.dot(self.P, m)
                self.logdetP = 2.0 * np.sum(np.log(np.diagonal(U)))

            elif L is not None:
                L = np.asarray(L)
                Lm = np.zeros((self.ndim, self.ndim))
                idx_l = np.tril_indices(self.ndim, -1)
                idx_d = np.diag_indices(self.ndim)

                Lm[idx_l] = L[self.ndim:]
                Lm[idx_d] = L[0:self.ndim]
                self.C = Lm.T
                self.S = np.dot(self.C.T, self.C)
                self.P = np.linalg.inv(self.S)
                self.Pm = np.dot(self.P, m)
                self.logdetP = -2.0 * np.sum(np.log(np.diagonal(self.C)))

            elif S is not None:
                S = np.asarray(S)
                self.P = np.linalg.inv(S)
                self.C = np.linalg.cholesky(S).T
                self.S = S
                self.Pm = np.dot(self.P, m)
                self.logdetP = -2.0 * np.sum(np.log(np.diagonal(self.C)))

            else:
                raise ValueError("Precision information missing.")

        elif Pm is not None:
            Pm = np.asarray(Pm)
            self.Pm = Pm
            self.ndim = Pm.size

            if P is not None:
                P = np.asarray(P)
                L = np.linalg.cholesky(P)
                self.P = P
                self.C = np.linalg.inv(L)
                self.S = np.dot(self.C.T, self.C)
                self.m = np.linalg.solve(P, Pm)
                self.logdetP = 2.0 * np.sum(np.log(np.diagonal(L)))

            elif U is not None:
                U = np.asarray(U)
                self.P = np.dot(U.T, U)
                self.C = np.linalg.inv(U.T)
                self.S = np.dot(self.C.T, self.C)
                self.m = np.linalg.solve(self.P, Pm)
                self.logdetP = 2.0 * np.sum(np.log(np.diagonal(U)))

            elif S is not None:
                S = np.asarray(S)
                self.P = np.linalg.inv(S)
                self.C = np.linalg.cholesky(S).T
                self.S = S
                self.m = np.dot(S, Pm)
                self.logdetP = -2.0 * np.sum(np.log(np.diagonal(self.C)))

            else:
                raise ValueError("Precision information missing.")

        else:
            raise ValueError("Mean information missing.")
Example #43
0
def s2ij_s1(symmetry, eri, norb):
    idx = numpy.tril_indices(norb)
    eri1 = numpy.empty((norb, norb, norb, norb))
    eri1[idx] = eri.reshape(-1, norb, norb)
    eri1[idx[1], idx[0]] = eri.reshape(-1, norb, norb)
    return eri1
Example #44
0
    def get_topologies(self, symbols, saturate=False):
        ''' Return the possible topologies of a given chemical species.

        Parameters
        ----------
        symbols : str
            Atomic symbols to construct the topologies from.
        saturate : bool
            Saturate the molecule with hydrogen based on the
            default.radicals set.

        Returns
        -------
        molecules : list (N,)
            Gratoms objects with unique connectivity matrix attached.
            No 3D positions will be provided for these structures.

        '''
        num, cnt = Molecule.get_atomic_numbers(symbols, True)
        # print(num, cnt)
        mcnt = cnt[num != 1]
        mnum = num[num != 1]

        if cnt[num == 1]:
            hcnt = cnt[num == 1][0]
        else:
            hcnt = 0

        elements = np.repeat(mnum, mcnt)
        max_degree = defaults.get('radicals')[elements]
        n = mcnt.sum()

        hmax = int(max_degree.sum() - (n - 1) * 2)
        if hcnt > hmax:
            hcnt = hmax

        if saturate:
            hcnt = hmax
        if n == 1:
            atoms = Gratoms(elements, cell=[1, 1, 1])
            hatoms = Molecule.hydrogenate(atoms, np.array([hcnt]))
            return [hatoms]
        elif n == 0:
            hatoms = Gratoms('H{}'.format(hcnt))
            if hcnt == 2:
                hatoms.graph.add_edge(0, 1, bonds=1)
            return [hatoms]

        ln = np.arange(n).sum()
        il = np.tril_indices(n, -1)

        backbones, molecules = [], []
        combos = combinations(np.arange(ln), n - 1)
        for c in combos:
            # Construct the connectivity matrix
            ltm = np.zeros(ln)
            ltm[np.atleast_2d(c)] = 1

            connectivity = np.zeros((n, n))
            connectivity[il] = ltm
            connectivity = np.maximum(connectivity, connectivity.T)

            degree = connectivity.sum(axis=0)

            # Not fully connected (subgraph)
            if np.any(degree == 0) or not \
                    is_connected(from_numpy_matrix(connectivity)):
                continue

            # Overbonded atoms.
            remaining_bonds = (max_degree - degree).astype(int)
            if np.any(remaining_bonds < 0):
                continue

            atoms = Gratoms(numbers=elements,
                            edges=connectivity,
                            cell=[1, 1, 1])

            isomorph = False
            for G0 in backbones:
                if atoms.is_isomorph(G0):
                    isomorph = True
                    break

            if not isomorph:
                backbones += [atoms]

                # The backbone is saturated, do not enumerate
                if hcnt == hmax:
                    hatoms = Molecule.hydrogenate(atoms, remaining_bonds)
                    molecules += [hatoms]
                    continue

                # Enumerate hydrogens across backbone
                for bins in self.bin_hydrogen(hcnt, n):
                    if not np.all(bins <= remaining_bonds):
                        continue

                    hatoms = Molecule.hydrogenate(atoms, bins)

                    isomorph = False
                    for G0 in molecules:
                        if hatoms.is_isomorph(G0):
                            isomorph = True
                            break

                    if not isomorph:
                        molecules += [hatoms]

        return molecules
Example #45
0
    return np.linalg.norm(A_lamb, 'fro')

###parameters
muTot=200
mu_arr=np.logspace(-10,5.0,muTot)
norm_arr_nonint=np.zeros(muTot)
norm_arr_int=np.zeros(muTot)
hz=5.00#for int Ham

###
t_start_nonint=time.time()
###nonint
H=Ham_nonint(L)
wij, num_lamb_mat=output_gauge_potent(H,L)
###finding minimum and maximum wij
index_lower = np.tril_indices(2**L,-1)
wij_arr=wij[index_lower]
wij_min_nonint= min(wij_arr)
wij_max_nonint=max(wij_arr)
print wij_min_nonint,wij_max_nonint
###running the loop    
for i in range(muTot):
    mu=mu_arr[i]
    A_lamb=gauge_potent_mu(wij, num_lamb_mat,mu)
    norm_arr_nonint[i]=norm(A_lamb)


t_end_nonint=time.time()
t_nonint_code=(t_end_nonint-t_start_nonint)/60  

#######
Example #46
0
    def test_outcore(self):
        ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
        cderi0 = df.incore.cholesky_eri(mol)
        df.outcore.cholesky_eri(mol, ftmp.name)
        with h5py.File(ftmp.name, 'r') as feri:
            self.assertTrue(numpy.allclose(feri['j3c'], cderi0))

        df.outcore.cholesky_eri(mol, ftmp.name, ioblk_size=.05)
        with h5py.File(ftmp.name, 'r') as feri:
            self.assertTrue(numpy.allclose(feri['j3c'], cderi0))

        nao = mol.nao_nr()
        naux = cderi0.shape[0]
        df.outcore.general(mol, (numpy.eye(nao), ) * 2,
                           ftmp.name,
                           max_memory=.05,
                           ioblk_size=.02)
        with h5py.File(ftmp.name, 'r') as feri:
            self.assertTrue(numpy.allclose(feri['eri_mo'], cderi0))

        ####
        buf = numpy.zeros((naux, nao, nao))
        idx = numpy.tril_indices(nao)
        buf[:, idx[0], idx[1]] = cderi0
        buf[:, idx[1], idx[0]] = cderi0
        cderi0 = buf
        df.outcore.cholesky_eri(mol, ftmp.name, aosym='s1', ioblk_size=.05)
        with h5py.File(ftmp.name, 'r') as feri:
            self.assertTrue(
                numpy.allclose(feri['j3c'], cderi0.reshape(naux, -1)))

        ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
        numpy.random.seed(1)
        co = numpy.random.random((nao, 4))
        cv = numpy.random.random((nao, 25))
        cderi0 = numpy.einsum('kpq,pi,qj->kij', cderi0, co, cv)
        df.outcore.general(mol, (co, cv), ftmp.name, ioblk_size=.05)
        with h5py.File(ftmp.name, 'r') as feri:
            self.assertTrue(
                numpy.allclose(feri['eri_mo'], cderi0.reshape(naux, -1)))

        cderi0 = df.incore.aux_e2(mol,
                                  auxmol,
                                  intor='int3c2e_ip1_sph',
                                  aosym='s1',
                                  comp=3).reshape(3, nao**2, -1)
        j2c = df.incore.fill_2c2e(mol, auxmol)
        low = scipy.linalg.cholesky(j2c, lower=True)
        cderi0 = [
            scipy.linalg.solve_triangular(low, j3c.T, lower=True)
            for j3c in cderi0
        ]
        nao = mol.nao_nr()
        df.outcore.general(mol, (numpy.eye(nao), ) * 2,
                           ftmp.name,
                           int3c='int3c2e_ip1_sph',
                           aosym='s1',
                           int2c='int2c2e_sph',
                           comp=3,
                           max_memory=.05,
                           ioblk_size=.02)
        with h5py.File(ftmp.name, 'r') as feri:
            self.assertTrue(numpy.allclose(feri['eri_mo'], cderi0))
def test_silhouette_paper_example():
    # Explicitly check per-sample results against Rousseeuw (1987)
    # Data from Table 1
    lower = [
        5.58,
        7.00,
        6.50,
        7.08,
        7.00,
        3.83,
        4.83,
        5.08,
        8.17,
        5.83,
        2.17,
        5.75,
        6.67,
        6.92,
        4.92,
        6.42,
        5.00,
        5.58,
        6.00,
        4.67,
        6.42,
        3.42,
        5.50,
        6.42,
        6.42,
        5.00,
        3.92,
        6.17,
        2.50,
        4.92,
        6.25,
        7.33,
        4.50,
        2.25,
        6.33,
        2.75,
        6.08,
        6.67,
        4.25,
        2.67,
        6.00,
        6.17,
        6.17,
        6.92,
        6.17,
        5.25,
        6.83,
        4.50,
        3.75,
        5.75,
        5.42,
        6.08,
        5.83,
        6.67,
        3.67,
        4.75,
        3.00,
        6.08,
        6.67,
        5.00,
        5.58,
        4.83,
        6.17,
        5.67,
        6.50,
        6.92,
    ]
    D = np.zeros((12, 12))
    D[np.tril_indices(12, -1)] = lower
    D += D.T

    names = [
        "BEL",
        "BRA",
        "CHI",
        "CUB",
        "EGY",
        "FRA",
        "IND",
        "ISR",
        "USA",
        "USS",
        "YUG",
        "ZAI",
    ]

    # Data from Figure 2
    labels1 = [1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
    expected1 = {
        "USA": 0.43,
        "BEL": 0.39,
        "FRA": 0.35,
        "ISR": 0.30,
        "BRA": 0.22,
        "EGY": 0.20,
        "ZAI": 0.19,
        "CUB": 0.40,
        "USS": 0.34,
        "CHI": 0.33,
        "YUG": 0.26,
        "IND": -0.04,
    }
    score1 = 0.28

    # Data from Figure 3
    labels2 = [1, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 2]
    expected2 = {
        "USA": 0.47,
        "FRA": 0.44,
        "BEL": 0.42,
        "ISR": 0.37,
        "EGY": 0.02,
        "ZAI": 0.28,
        "BRA": 0.25,
        "IND": 0.17,
        "CUB": 0.48,
        "USS": 0.44,
        "YUG": 0.31,
        "CHI": 0.31,
    }
    score2 = 0.33

    for labels, expected, score in [
        (labels1, expected1, score1),
        (labels2, expected2, score2),
    ]:
        expected = [expected[name] for name in names]
        # we check to 2dp because that's what's in the paper
        pytest.approx(
            expected,
            silhouette_samples(D, np.array(labels), metric="precomputed"),
            abs=1e-2,
        )
        pytest.approx(
            score, silhouette_score(D, np.array(labels), metric="precomputed"), abs=1e-2
        )
Example #48
0
def fcidump(wfn, fname='INTDUMP', oe_ints=None):
    """Save integrals to file in FCIDUMP format as defined in Comp. Phys. Commun. 54 75 (1989)
    Additional one-electron integrals, including orbital energies, can also be saved.
    This latter format can be used with the HANDE QMC code but is not standard.

    :returns: None

    :raises: ValidationError when SCF wavefunction is not RHF

    :type wfn: :py:class:`~psi4.core.Wavefunction`
    :param wfn: set of molecule, basis, orbitals from which to generate cube files
    :param fname: name of the integrals file, defaults to INTDUMP
    :param oe_ints: list of additional one-electron integrals to save to file.
    So far only EIGENVALUES is a valid option.

    :examples:

    >>> # [1] Save one- and two-electron integrals to standard FCIDUMP format
    >>> E, wfn = energy('scf', return_wfn=True)
    >>> fcidump(wfn)

    >>> # [2] Save orbital energies, one- and two-electron integrals.
    >>> E, wfn = energy('scf', return_wfn=True)
    >>> fcidump(wfn, oe_ints=['EIGENVALUES'])

    """
    # Get some options
    reference = core.get_option('SCF', 'REFERENCE')
    ints_tolerance = core.get_global_option('INTS_TOLERANCE')
    # Some sanity checks
    if reference not in ['RHF', 'UHF']:
        raise ValidationError(
            'FCIDUMP not implemented for {} references\n'.format(reference))
    if oe_ints is None:
        oe_ints = []

    molecule = wfn.molecule()
    docc = wfn.doccpi()
    frzcpi = wfn.frzcpi()
    frzvpi = wfn.frzvpi()
    active_docc = docc - frzcpi
    active_socc = wfn.soccpi()
    active_mopi = wfn.nmopi() - frzcpi - frzvpi

    nbf = active_mopi.sum() if wfn.same_a_b_orbs() else 2 * active_mopi.sum()
    nirrep = wfn.nirrep()
    nelectron = 2 * active_docc.sum() + active_socc.sum()
    irrep_map = _irrep_map(wfn)

    wfn_irrep = 0
    for h, n_socc in enumerate(active_socc):
        if n_socc % 2 == 1:
            wfn_irrep ^= h

    core.print_out('Writing integrals in FCIDUMP format to ' + fname + '\n')
    # Generate FCIDUMP header
    header = '&FCI\n'
    header += 'NORB={:d},\n'.format(nbf)
    header += 'NELEC={:d},\n'.format(nelectron)
    header += 'MS2={:d},\n'.format(wfn.nalpha() - wfn.nbeta())
    header += 'UHF=.{}.,\n'.format(not wfn.same_a_b_orbs()).upper()
    orbsym = ''
    for h in range(active_mopi.n()):
        for n in range(frzcpi[h], frzcpi[h] + active_mopi[h]):
            orbsym += '{:d},'.format(irrep_map[h])
            if not wfn.same_a_b_orbs():
                orbsym += '{:d},'.format(irrep_map[h])
    header += 'ORBSYM={}\n'.format(orbsym)
    header += 'ISYM={:d},\n'.format(irrep_map[wfn_irrep])
    header += '&END\n'
    with open(fname, 'w') as intdump:
        intdump.write(header)

    # Get an IntegralTransform object
    check_iwl_file_from_scf_type(core.get_global_option('SCF_TYPE'), wfn)
    spaces = [core.MOSpace.all()]
    trans_type = core.IntegralTransform.TransformationType.Restricted
    if not wfn.same_a_b_orbs():
        trans_type = core.IntegralTransform.TransformationType.Unrestricted
    ints = core.IntegralTransform(wfn, spaces, trans_type)
    ints.transform_tei(core.MOSpace.all(), core.MOSpace.all(),
                       core.MOSpace.all(), core.MOSpace.all())
    core.print_out('Integral transformation complete!\n')

    DPD_info = {
        'instance_id': ints.get_dpd_id(),
        'alpha_MO': ints.DPD_ID('[A>=A]+'),
        'beta_MO': 0
    }
    if not wfn.same_a_b_orbs():
        DPD_info['beta_MO'] = ints.DPD_ID("[a>=a]+")
    # Write TEI to fname in FCIDUMP format
    core.fcidump_tei_helper(nirrep, wfn.same_a_b_orbs(), DPD_info,
                            ints_tolerance, fname)

    # Read-in OEI and write them to fname in FCIDUMP format
    # Indexing functions to translate from zero-based (C and Python) to
    # one-based (Fortran)
    mo_idx = lambda x: x + 1
    alpha_mo_idx = lambda x: 2 * x + 1
    beta_mo_idx = lambda x: 2 * (x + 1)

    with open(fname, 'a') as intdump:
        core.print_out('Writing frozen core operator in FCIDUMP format to ' +
                       fname + '\n')
        if reference == 'RHF':
            PSIF_MO_FZC = 'MO-basis Frozen-Core Operator'
            moH = core.Matrix(PSIF_MO_FZC, wfn.nmopi(), wfn.nmopi())
            moH.load(core.IO.shared_object(), psif.PSIF_OEI)
            mo_slice = core.Slice(frzcpi, active_mopi)
            MO_FZC = moH.get_block(mo_slice, mo_slice)
            offset = 0
            for h, block in enumerate(MO_FZC.nph):
                il = np.tril_indices(block.shape[0])
                for index, x in np.ndenumerate(block[il]):
                    row = mo_idx(il[0][index] + offset)
                    col = mo_idx(il[1][index] + offset)
                    if (abs(x) > ints_tolerance):
                        intdump.write(
                            '{:29.20E} {:4d} {:4d} {:4d} {:4d}\n'.format(
                                x, row, col, 0, 0))
                offset += block.shape[0]
            # Additional one-electron integrals as requested in oe_ints
            # Orbital energies
            core.print_out('Writing orbital energies in FCIDUMP format to ' +
                           fname + '\n')
            if 'EIGENVALUES' in oe_ints:
                eigs_dump = write_eigenvalues(
                    wfn.epsilon_a().get_block(mo_slice).to_array(), mo_idx)
                intdump.write(eigs_dump)
        else:
            PSIF_MO_A_FZC = 'MO-basis Alpha Frozen-Core Oper'
            moH_A = core.Matrix(PSIF_MO_A_FZC, wfn.nmopi(), wfn.nmopi())
            moH_A.load(core.IO.shared_object(), psif.PSIF_OEI)
            mo_slice = core.Slice(frzcpi, active_mopi)
            MO_FZC_A = moH_A.get_block(mo_slice, mo_slice)
            offset = 0
            for h, block in enumerate(MO_FZC_A.nph):
                il = np.tril_indices(block.shape[0])
                for index, x in np.ndenumerate(block[il]):
                    row = alpha_mo_idx(il[0][index] + offset)
                    col = alpha_mo_idx(il[1][index] + offset)
                    if (abs(x) > ints_tolerance):
                        intdump.write(
                            '{:29.20E} {:4d} {:4d} {:4d} {:4d}\n'.format(
                                x, row, col, 0, 0))
                offset += block.shape[0]
            PSIF_MO_B_FZC = 'MO-basis Beta Frozen-Core Oper'
            moH_B = core.Matrix(PSIF_MO_B_FZC, wfn.nmopi(), wfn.nmopi())
            moH_B.load(core.IO.shared_object(), psif.PSIF_OEI)
            mo_slice = core.Slice(frzcpi, active_mopi)
            MO_FZC_B = moH_B.get_block(mo_slice, mo_slice)
            offset = 0
            for h, block in enumerate(MO_FZC_B.nph):
                il = np.tril_indices(block.shape[0])
                for index, x in np.ndenumerate(block[il]):
                    row = beta_mo_idx(il[0][index] + offset)
                    col = beta_mo_idx(il[1][index] + offset)
                    if (abs(x) > ints_tolerance):
                        intdump.write(
                            '{:29.20E} {:4d} {:4d} {:4d} {:4d}\n'.format(
                                x, row, col, 0, 0))
                offset += block.shape[0]
            # Additional one-electron integrals as requested in oe_ints
            # Orbital energies
            core.print_out('Writing orbital energies in FCIDUMP format to ' +
                           fname + '\n')
            if 'EIGENVALUES' in oe_ints:
                alpha_eigs_dump = write_eigenvalues(
                    wfn.epsilon_a().get_block(mo_slice).to_array(),
                    alpha_mo_idx)
                beta_eigs_dump = write_eigenvalues(
                    wfn.epsilon_b().get_block(mo_slice).to_array(),
                    beta_mo_idx)
                intdump.write(alpha_eigs_dump + beta_eigs_dump)
        # Dipole integrals
        #core.print_out('Writing dipole moment OEI in FCIDUMP format to ' + fname + '\n')
        # Traceless quadrupole integrals
        #core.print_out('Writing traceless quadrupole moment OEI in FCIDUMP format to ' + fname + '\n')
        # Frozen core + nuclear repulsion energy
        core.print_out(
            'Writing frozen core + nuclear repulsion energy in FCIDUMP format to '
            + fname + '\n')
        e_fzc = ints.get_frozen_core_energy()
        e_nuc = molecule.nuclear_repulsion_energy(
            wfn.get_dipole_field_strength())
        intdump.write('{: 29.20E} {:4d} {:4d} {:4d} {:4d}\n'.format(
            e_fzc + e_nuc, 0, 0, 0, 0))
    core.print_out(
        'Done generating {} with integrals in FCIDUMP format.\n'.format(fname))
Example #49
0
def psthcorr(rec,
             nids=None,
             ssnids=None,
             ssseps=None,
             natexps=False,
             strange=None,
             plot=True):
    if nids == None:
        nids = sorted(rec.n)  # use active neurons
    if ssnids == None:
        ssnids = nids  # use nids as the superset
    nn = len(nids)
    nnss = len(ssnids)
    # note that using norm=True or norm='ntrials' doesn't seem to make a difference to the
    # results, probably doesn't matter for calculating corrs:
    midbins, psths, spikets = rec.psth(nids=nids,
                                       natexps=natexps,
                                       strange=strange,
                                       plot=False,
                                       binw=0.02,
                                       tres=0.005,
                                       norm=True)
    rho = np.corrcoef(psths)  # defaults to bias=1
    rho[np.diag_indices(
        nn)] = np.nan  # nan the diagonal, which imshow plots as white
    ssrho = np.zeros((nnss, nnss))  # superset rho matrix
    ssrho.fill(np.nan)  # init with nans
    # load up values into appropriate spots in superset rho matrix:
    for i in range(nn):
        for j in range(nn):
            ssi, ssj = ssnids.searchsorted([nids[i], nids[j]])
            ssrho[ssi, ssj] = rho[i, j]

    if plot == False:
        return ssrho

    # plot superset rho matrix:
    figure(figsize=FIGSIZE)
    imshow(ssrho, vmin=-1, vmax=1, cmap='jet')  # cmap='gray' is too bland
    ssnidticks = np.arange(0, nnss, 10)
    xticks(ssnidticks)
    yticks(ssnidticks)
    if SHOWCOLORBAR:
        colorbar()
    basetitle = rec.absname
    if strange != None:
        strange_sec = tuple(np.array(strange) /
                            1e6)  # convert to sec for display
        basetitle += '_strange=(%.f, %.f)' % strange_sec
    gcfm().window.setWindowTitle(basetitle + '_rho_mat')
    tight_layout(pad=0.3)

    # plot rho histogram:
    lti = np.tril_indices(
        nnss, -1)  # lower triangle (below diagonal) indices of ssrho
    ssrhol = ssrho[lti]
    notnanis = np.logical_not(np.isnan(ssrhol))  # indices of non-nan values
    fssrhol = ssrhol[notnanis]  # ssrhol filtered out for nans
    fssrholmean = fssrhol.mean()
    t, p = ttest_1samp(fssrhol, 0)  # 2-sided ttest relative to 0
    print('mean=%g, t=%g, p=%g' % (fssrholmean, t, p))
    if p < ALPHA0:
        pstring = '$p<%g$' % ceilsigfig(p)
    else:
        pstring = '$p>%g$' % floorsigfig(p)
    figure(figsize=FIGSIZE)
    rhobins = np.arange(RHOMIN, RHOMAX + 0.0333,
                        0.0333)  # left edges + rightmost edge
    n = hist(fssrhol, bins=rhobins, color='k')[0]
    axvline(x=fssrholmean, c='r',
            ls='--')  # draw vertical red line at mean fssrhol
    axvline(x=0, c='e', ls='--')  # draw vertical grey line at x=0
    xlim(xmin=RHOMIN, xmax=RHOMAX)
    ylim(ymax=n.max())  # effectively normalizes the histogram
    rhoticks = np.arange(-0.2, 1 + 0.2, 0.2)  # excluding the final 1
    xticks(rhoticks)
    yticks([n.max()])  # turn off y ticks to save space
    #yticks([0, n.max()])
    text(0.98,
         0.98,
         '$\mu$=%.2g\n%s' % (fssrholmean, pstring),
         color='k',
         transform=gca().transAxes,
         horizontalalignment='right',
         verticalalignment='top')
    gcfm().window.setWindowTitle(basetitle + '_rho_hist')
    tight_layout(pad=0.3)

    # plot rho vs separation:
    fssseps = ssseps[notnanis]  # ssseps filtered out for nans
    figure(figsize=FIGSIZE)
    # scatter plot:
    pl.plot(fssseps, fssrhol, 'k.')
    # bin seps and plot mean rho in each bin:
    sepbins = np.arange(0, fssseps.max() + SEPBINW, SEPBINW)  # left edges
    sepmeans, rhomeans, rhostds = scatterbin(fssseps, fssrhol, sepbins)
    #pl.plot(sepmeans, rhomeans, 'r.-', ms=10, lw=2)
    errorbar(sepmeans,
             rhomeans,
             yerr=rhostds,
             fmt='r.-',
             ms=10,
             lw=2,
             zorder=9999)
    xlim(xmin=0, xmax=SEPMAX)
    ylim(ymin=RHOMIN, ymax=RHOMAX)
    septicks = np.arange(0, fssseps.max() + 100, 500)
    xticks(septicks)
    yticks(rhoticks)
    gcfm().window.setWindowTitle(basetitle + '_rho_sep')
    tight_layout(pad=0.3)
    return ssrho
 def as_array(self):
     """Return standard numpy array equivalent"""
     a = np.zeros((self.size, self.size))
     a[np.tril_indices(self.size)] = self._elements
     a[np.triu_indices(self.size)] = a.T[np.triu_indices(self.size)]
     return a
Example #51
0
 def __init__(self):
     alleles = _AMINO_ACIDS
     num_alleles = len(alleles)
     root_distribution = [
         0.074,
         0.052,
         0.045,
         0.054,
         0.025,
         0.034,
         0.054,
         0.074,
         0.026,
         0.068,
         0.099,
         0.058,
         0.025,
         0.047,
         0.039,
         0.057,
         0.051,
         0.013,
         0.032,
         0.073,
     ]
     relative_rates = [
         0.735790389698,
         0.485391055466,
         1.297446705134,
         0.543161820899,
         0.500964408555,
         3.180100048216,
         1.459995310470,
         0.227826574209,
         0.397358949897,
         0.240836614802,
         1.199705704602,
         3.020833610064,
         1.839216146992,
         1.190945703396,
         0.329801504630,
         1.170949042800,
         1.360574190420,
         1.240488508640,
         3.761625208368,
         0.140748891814,
         5.528919177928,
         1.955883574960,
         0.418763308518,
         1.355872344485,
         0.798473248968,
         0.418203192284,
         0.609846305383,
         0.423579992176,
         0.716241444998,
         1.456141166336,
         2.414501434208,
         0.778142664022,
         0.354058109831,
         2.435341131140,
         1.626891056982,
         0.539859124954,
         0.605899003687,
         0.232036445142,
         0.283017326278,
         0.418555732462,
         0.774894022794,
         0.236202451204,
         0.186848046932,
         0.189296292376,
         0.252718447885,
         0.800016530518,
         0.622711669692,
         0.211888159615,
         0.218131577594,
         0.831842640142,
         0.580737093181,
         0.372625175087,
         0.217721159236,
         0.348072209797,
         3.890963773304,
         1.295201266783,
         5.411115141489,
         1.593137043457,
         1.032447924952,
         0.285078800906,
         3.945277674515,
         2.802427151679,
         0.752042440303,
         1.022507035889,
         0.406193586642,
         0.445570274261,
         1.253758266664,
         0.983692987457,
         0.648441278787,
         0.222621897958,
         0.767688823480,
         2.494896077113,
         0.555415397470,
         0.459436173579,
         0.984311525359,
         3.364797763104,
         6.030559379572,
         1.073061184332,
         0.492964679748,
         0.371644693209,
         0.354861249223,
         0.281730694207,
         0.441337471187,
         0.144356959750,
         0.291409084165,
         0.368166464453,
         0.714533703928,
         1.517359325954,
         2.064839703237,
         0.266924750511,
         1.773855168830,
         1.173275900924,
         0.448133661718,
         0.494887043702,
         0.730628272998,
         0.356008498769,
         0.858570575674,
         0.926563934846,
         0.504086599527,
         0.527007339151,
         0.388355409206,
         0.374555687471,
         1.047383450722,
         0.454123625103,
         0.233597909629,
         4.325092687057,
         1.122783104210,
         2.904101656456,
         1.582754142065,
         1.197188415094,
         1.934870924596,
         1.769893238937,
         1.509326253224,
         1.117029762910,
         0.357544412460,
         0.352969184527,
         1.752165917819,
         0.918723415746,
         0.540027644824,
         1.169129577716,
         1.729178019485,
         0.914665954563,
         1.898173634533,
         0.934187509431,
         1.119831358516,
         1.277480294596,
         1.071097236007,
         0.641436011405,
         0.585407090225,
         1.179091197260,
         0.915259857694,
         1.303875200799,
         1.488548053722,
         0.488206118793,
         1.005451683149,
         5.151556292270,
         0.465839367725,
         0.426382310122,
         0.191482046247,
         0.145345046279,
         0.527664418872,
         0.758653808642,
         0.407635648938,
         0.508358924638,
         0.301248600780,
         0.341985787540,
         0.691474634600,
         0.332243040634,
         0.888101098152,
         2.074324893497,
         0.252214830027,
         0.387925622098,
         0.513128126891,
         0.718206697586,
         0.720517441216,
         0.538222519037,
         0.261422208965,
         0.470237733696,
         0.958989742850,
         0.596719300346,
         0.308055737035,
         4.218953969389,
         0.674617093228,
         0.811245856323,
         0.717993486900,
         0.951682162246,
         6.747260430801,
         0.369405319355,
         0.796751520761,
         0.801010243199,
         4.054419006558,
         2.187774522005,
         0.438388343772,
         0.312858797993,
         0.258129289418,
         1.116352478606,
         0.530785790125,
         0.524253846338,
         0.253340790190,
         0.201555971750,
         8.311839405458,
         2.231405688913,
         0.498138475304,
         2.575850755315,
         0.838119610178,
         0.496908410676,
         0.561925457442,
         2.253074051176,
         0.266508731426,
         1.000000000000,
     ]
     transition_matrix = np.zeros((num_alleles, num_alleles))
     tril = np.tril_indices(num_alleles, k=-1)
     transition_matrix[tril] = relative_rates
     transition_matrix += np.tril(transition_matrix).T
     transition_matrix *= root_distribution
     row_sums = transition_matrix.sum(axis=1)
     transition_matrix = transition_matrix / max(row_sums)
     row_sums = transition_matrix.sum(axis=1, dtype="float64")
     np.fill_diagonal(transition_matrix, 1.0 - row_sums)
     super().__init__(alleles, root_distribution, transition_matrix)
Example #52
0
def psthcorrdiff(rhos, seps, basetitle):
    """Plot difference of a pair of rho matrices (rhos[0] - rhos[1]). seps is the
    corresponding distance matrix"""
    assert len(rhos) == 2

    # calc rho difference matrix:
    rhod = rhos[0] - rhos[1]
    assert rhod.shape[0] == rhod.shape[1]  # square
    nn = rhod.shape[0]

    # plot rho diff matrix:
    figure(figsize=FIGSIZE)
    imshow(rhod, vmin=-1, vmax=1, cmap='jet')  # cmap='gray' is too bland
    ssnidticks = np.arange(0, nn, 10)
    xticks(ssnidticks)
    yticks(ssnidticks)
    if SHOWCOLORBAR:
        colorbar()
    gcfm().window.setWindowTitle(basetitle + '_rhod_mat')
    tight_layout(pad=0.3)

    # plot rho difference histogram:
    lti = np.tril_indices(nn, -1)  # lower triangle (below diagonal) indices
    rhol = rhod[lti]
    notnanis = np.logical_not(np.isnan(rhol))  # indices of non-nan values
    frhol = rhol[notnanis]  # rhol filtered out for nans
    frholmean = frhol.mean()
    t, p = ttest_1samp(frhol, 0)  # 2-sided ttest relative to 0
    print('mean=%g, t=%g, p=%g' % (frholmean, t, p))
    if p < ALPHA0:
        pstring = '$p<%g$' % ceilsigfig(p)
    else:
        pstring = '$p>%g$' % floorsigfig(p)
    figure(figsize=FIGSIZE)
    rhobins = np.arange(RHODIFFMIN, RHODIFFMAX + 0.0333,
                        0.0333)  # left edges + rightmost edge
    n = hist(frhol, bins=rhobins, color='k')[0]
    axvline(x=frholmean, c='r',
            ls='--')  # draw vertical red line at mean frhol
    axvline(x=0, c='e', ls='--')  # draw vertical grey line at x=0
    xlim(xmin=RHODIFFMIN, xmax=RHODIFFMAX)
    ylim(ymax=n.max())  # effectively normalizes the histogram
    rhoticks = np.arange(-0.6, 0.6 + 0.2, 0.2)
    xticks(rhoticks)
    yticks([n.max()])  # turn off y ticks to save space
    #yticks([0, n.max()])
    text(0.98,
         0.98,
         '$\mu$=%.2g\n%s' % (frholmean, pstring),
         color='k',
         transform=gca().transAxes,
         horizontalalignment='right',
         verticalalignment='top')
    gcfm().window.setWindowTitle(basetitle + '_rhod_hist')
    tight_layout(pad=0.3)

    # plot rho difference vs separation:
    fseps = seps[notnanis]  # seps filtered out for nans
    figure(figsize=FIGSIZE)
    # scatter plot:
    pl.plot(fseps, frhol, 'k.')
    # bin seps and plot mean rho in each bin:
    sortis = np.argsort(fseps)
    seps = fseps[sortis]
    rhos = frhol[sortis]
    sepbins = np.arange(0, seps.max() + SEPBINW, SEPBINW)  # left edges
    sepis = seps.searchsorted(sepbins)
    sepmeans, rhomeans, rhostds = [], [], []
    for sepi0, sepi1 in zip(sepis[:-1], sepis[1:]):  # iterate over sepbins
        sepmeans.append(
            seps[sepi0:sepi1].mean())  # mean sep of all points in this sepbin
        rhoslice = rhos[sepi0:sepi1]  # rhos in this sepbin
        rhomeans.append(
            rhoslice.mean())  # mean rho of all points in this sepbin
        rhostds.append(rhoslice.std())  # std of rho in this sepbin
    #pl.plot(sepmeans, rhomeans, 'r.-', ms=10, lw=2)
    errorbar(sepmeans,
             rhomeans,
             yerr=rhostds,
             fmt='r.-',
             ms=10,
             lw=2,
             zorder=9999)
    xlim(xmin=0, xmax=SEPMAX)
    ylim(ymin=RHODIFFMIN, ymax=RHODIFFMAX)
    septicks = np.arange(0, seps.max() + 100, 500)
    xticks(septicks)
    yticks(rhoticks)
    gcfm().window.setWindowTitle(basetitle + '_rhod_sep')
    tight_layout(pad=0.3)
Example #53
0
def tril_indices(*args, **kwargs):
    return tuple(map(torch.from_numpy, _np.tril_indices(*args, **kwargs)))
Example #54
0
def to_fcivec(cisdvec, norb, nelec, frozen=None):
    '''Convert CISD coefficients to FCI coefficients'''
    if isinstance(nelec, (int, numpy.number)):
        nelecb = nelec // 2
        neleca = nelec - nelecb
    else:
        neleca, nelecb = nelec
        assert (neleca == nelecb)

    frozen_mask = numpy.zeros(norb, dtype=bool)
    if frozen is None:
        nfroz = 0
    elif isinstance(frozen, (int, numpy.integer)):
        nfroz = frozen
        frozen_mask[:frozen] = True
    else:
        nfroz = len(frozen)
        frozen_mask[frozen] = True

    nocc = numpy.count_nonzero(~frozen_mask[:neleca])
    nmo = norb - nfroz
    nvir = nmo - nocc
    c0, c1, c2 = cisdvec_to_amplitudes(cisdvec, nmo, nocc)
    t1addr, t1sign = tn_addrs_signs(nmo, nocc, 1)

    na = cistring.num_strings(nmo, nocc)
    fcivec = numpy.zeros((na, na))
    fcivec[0, 0] = c0
    fcivec[0, t1addr] = fcivec[t1addr, 0] = c1.ravel() * t1sign
    c2ab = c2.transpose(0, 2, 1, 3).reshape(nocc * nvir, -1)
    c2ab = numpy.einsum('i,j,ij->ij', t1sign, t1sign, c2ab)
    fcivec[t1addr[:, None], t1addr] = c2ab

    if nocc > 1 and nvir > 1:
        c2aa = c2 - c2.transpose(1, 0, 2, 3)
        ooidx = numpy.tril_indices(nocc, -1)
        vvidx = numpy.tril_indices(nvir, -1)
        c2aa = c2aa[ooidx][:, vvidx[0], vvidx[1]]
        t2addr, t2sign = tn_addrs_signs(nmo, nocc, 2)
        fcivec[0, t2addr] = fcivec[t2addr, 0] = c2aa.ravel() * t2sign

    if nfroz == 0:
        return fcivec

    assert (norb < 63)

    strs = cistring.gen_strings4orblist(range(norb), neleca)
    na = len(strs)
    count = numpy.zeros(na, dtype=int)
    parity = numpy.zeros(na, dtype=bool)
    core_mask = numpy.ones(na, dtype=bool)
    # During the loop, count saves the number of occupied orbitals that
    # lower (with small orbital ID) than the present orbital i.
    # Moving all the frozen orbitals to the beginning of the orbital list
    # (before the occupied orbitals) leads to parity odd (= True, with
    # negative sign) or even (= False, with positive sign).
    for i in range(norb):
        if frozen_mask[i]:
            if i < neleca:
                # frozen occupied orbital should be occupied
                core_mask &= (strs & (1 << i)) != 0
                parity ^= (count & 1) == 1
            else:
                # frozen virtual orbital should not be occupied.
                # parity is not needed since it's unoccupied
                core_mask &= (strs & (1 << i)) == 0
        else:
            count += (strs & (1 << i)) != 0
    sub_strs = strs[core_mask & (count == nocc)]
    addrs = cistring.strs2addr(norb, neleca, sub_strs)
    fcivec1 = numpy.zeros((na, na))
    fcivec1[addrs[:, None], addrs] = fcivec
    fcivec1[parity, :] *= -1
    fcivec1[:, parity] *= -1
    return fcivec1
Example #55
0
def trans_e1_outcore(mol,
                     mo,
                     ncore,
                     ncas,
                     erifile,
                     max_memory=None,
                     level=1,
                     verbose=logger.WARN):
    time0 = (time.clock(), time.time())
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mol.stdout, verbose)
    log.debug1('trans_e1_outcore level %d  max_memory %d', level, max_memory)
    nao, nmo = mo.shape
    nao_pair = nao * (nao + 1) // 2
    nocc = ncore + ncas

    _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    faapp_buf = h5py.File(_tmpfile1.name)
    feri = h5py.File(erifile, 'w')

    mo_c = numpy.asarray(mo, order='C')
    mo = numpy.asarray(mo, order='F')
    pashape = (0, nmo, ncore, nocc)
    papa_buf = numpy.zeros((nao, ncas, nmo * ncas))
    j_pc = numpy.zeros((nmo, ncore))
    k_pc = numpy.zeros((nmo, ncore))

    mem_words = int(max(2000, max_memory - papa_buf.nbytes / 1e6) * 1e6 / 8)
    aobuflen = mem_words // (nao_pair + nocc * nmo) + 1
    ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32)
    shranges = outcore.guess_shell_ranges(mol, True, aobuflen, None, ao_loc)
    intor = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    nstep = len(shranges)
    paapp = 0
    maxbuflen = max([x[2] for x in shranges])
    log.debug('mem_words %.8g MB, maxbuflen = %d', mem_words * 8 / 1e6,
              maxbuflen)
    bufs1 = numpy.empty((maxbuflen, nao_pair))
    bufs2 = numpy.empty((maxbuflen, nmo * ncas))
    if level == 1:
        bufs3 = numpy.empty((maxbuflen, nao * ncore))
        log.debug('mem cache %.8g MB',
                  (bufs1.nbytes + bufs2.nbytes + bufs3.nbytes) / 1e6)
    else:
        log.debug('mem cache %.8g MB', (bufs1.nbytes + bufs2.nbytes) / 1e6)
    ti0 = log.timer('Initializing trans_e1_outcore', *time0)

    # fmmm, ftrans, fdrv for level 1
    fmmm = libmcscf.AO2MOmmm_ket_nr_s2
    ftrans = libmcscf.AO2MOtranse1_nr_s4
    fdrv = libmcscf.AO2MOnr_e2_drv
    for istep, sh_range in enumerate(shranges):
        log.debug('[%d/%d], AO [%d:%d], len(buf) = %d', istep + 1, nstep,
                  *sh_range)
        buf = bufs1[:sh_range[2]]
        _ao2mo.nr_e1fill(intor, sh_range, mol._atm, mol._bas, mol._env, 's4',
                         1, ao2mopt, buf)
        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('AO integrals buffer', *ti0)
        bufpa = bufs2[:sh_range[2]]
        _ao2mo.nr_e1(buf, mo, pashape, 's4', 's1', out=bufpa)
        # jc_pp, kc_pp
        if level == 1:  # ppaa, papa and vhf, jcp, kcp
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('buffer-pa', *ti1)
            buf1 = bufs3[:sh_range[2]]
            fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p),
                 buf.ctypes.data_as(ctypes.c_void_p),
                 mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(sh_range[2]),
                 ctypes.c_int(nao), (ctypes.c_int * 4)(0, nao, 0, ncore),
                 ctypes.POINTER(ctypes.c_void_p)(), ctypes.c_int(0))
            p0 = 0
            for ij in range(sh_range[0], sh_range[1]):
                i, j = lib.index_tril_to_pair(ij)
                i0 = ao_loc[i]
                j0 = ao_loc[j]
                i1 = ao_loc[i + 1]
                j1 = ao_loc[j + 1]
                di = i1 - i0
                dj = j1 - j0
                if i == j:
                    dij = di * (di + 1) // 2
                    buf = numpy.empty((di, di, nao * ncore))
                    idx = numpy.tril_indices(di)
                    buf[idx] = buf1[p0:p0 + dij]
                    buf[idx[1], idx[0]] = buf1[p0:p0 + dij]
                    buf = buf.reshape(di, di, nao, ncore)
                    mo1 = mo_c[i0:i1]
                    tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:, :ncore])
                    tmp = lib.dot(mo1.T, tmp.reshape(di, -1))
                    j_pc += numpy.einsum('vp,pvc->pc', mo1,
                                         tmp.reshape(nmo, di, ncore))
                    tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:, :ncore])
                    tmp = lib.dot(tmp.reshape(-1, nmo),
                                  mo).reshape(di, ncore, nmo)
                    k_pc += numpy.einsum('vp,vcp->pc', mo1, tmp)
                else:
                    dij = di * dj
                    buf = buf1[p0:p0 + dij].reshape(di, dj, nao, ncore)
                    mo1 = mo_c[i0:i1]
                    mo2 = mo_c[j0:j1]
                    tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:, :ncore])
                    tmp = lib.dot(mo1.T, tmp.reshape(di, -1))
                    j_pc += numpy.einsum('vp,pvc->pc', mo2,
                                         tmp.reshape(nmo, dj, ncore)) * 2
                    tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:, :ncore])
                    tmp = lib.dot(tmp.reshape(-1, nmo),
                                  mo).reshape(dj, ncore, nmo)
                    k_pc += numpy.einsum('vp,vcp->pc', mo2, tmp)
                    tmp = numpy.einsum('uvpc,vc->ucp', buf, mo2[:, :ncore])
                    tmp = lib.dot(tmp.reshape(-1, nmo),
                                  mo).reshape(di, ncore, nmo)
                    k_pc += numpy.einsum('up,ucp->pc', mo1, tmp)
                p0 += dij
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('j_cp and k_cp', *ti1)

        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('half transformation of the buffer', *ti1)

# ppaa, papa
        faapp_buf[str(istep)] = \
                bufpa.reshape(sh_range[2],nmo,ncas)[:,ncore:nocc].reshape(-1,ncas**2).T
        p0 = 0
        for ij in range(sh_range[0], sh_range[1]):
            i, j = lib.index_tril_to_pair(ij)
            i0 = ao_loc[i]
            j0 = ao_loc[j]
            i1 = ao_loc[i + 1]
            j1 = ao_loc[j + 1]
            di = i1 - i0
            dj = j1 - j0
            if i == j:
                dij = di * (di + 1) // 2
                buf1 = numpy.empty((di, di, nmo * ncas))
                idx = numpy.tril_indices(di)
                buf1[idx] = bufpa[p0:p0 + dij]
                buf1[idx[1], idx[0]] = bufpa[p0:p0 + dij]
            else:
                dij = di * dj
                buf1 = bufpa[p0:p0 + dij].reshape(di, dj, -1)
                mo1 = mo[j0:j1, ncore:nocc].copy()
                for i in range(di):
                    lib.dot(mo1.T, buf1[i], 1, papa_buf[i0 + i], 1)
            mo1 = mo[i0:i1, ncore:nocc].copy()
            buf1 = lib.dot(mo1.T, buf1.reshape(di, -1))
            papa_buf[j0:j1] += buf1.reshape(ncas, dj, -1).transpose(1, 0, 2)
            p0 += dij
        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('ppaa and papa buffer', *ti1)

        ti0 = log.timer('gen AO/transform MO [%d/%d]' % (istep + 1, nstep),
                        *ti0)
    buf = buf1 = bufpa = None
    bufs1 = bufs2 = bufs3 = None
    time1 = log.timer('mc_ao2mo pass 1', *time0)

    log.debug1('Half transformation done. Current memory %d',
               lib.current_memory()[0])

    nblk = int(
        max(8,
            min(nmo,
                (max_memory * 1e6 / 8 - papa_buf.size) / (ncas**2 * nmo))))
    log.debug1('nblk for papa = %d', nblk)
    dset = feri.create_dataset('papa', (nmo, ncas, nmo, ncas), 'f8')
    for i0, i1 in prange(0, nmo, nblk):
        tmp = lib.dot(mo[:, i0:i1].T, papa_buf.reshape(nao, -1))
        dset[i0:i1] = tmp.reshape(i1 - i0, ncas, nmo, ncas)
    papa_buf = tmp = None
    time1 = log.timer('papa pass 2', *time1)

    tmp = numpy.empty((ncas**2, nao_pair))
    p0 = 0
    for istep, sh_range in enumerate(shranges):
        tmp[:, p0:p0 + sh_range[2]] = faapp_buf[str(istep)]
        p0 += sh_range[2]
    nblk = int(
        max(8, min(nmo,
                   (max_memory * 1e6 / 8 - tmp.size) / (ncas**2 * nmo) - 1)))
    log.debug1('nblk for ppaa = %d', nblk)
    dset = feri.create_dataset('ppaa', (nmo, nmo, ncas, ncas), 'f8')
    for i0, i1 in prange(0, nmo, nblk):
        tmp1 = _ao2mo.nr_e2(tmp,
                            mo, (i0, i1, 0, nmo),
                            's4',
                            's1',
                            ao_loc=ao_loc)
        tmp1 = tmp1.reshape(ncas, ncas, i1 - i0, nmo)
        for j in range(i1 - i0):
            dset[i0 + j] = tmp1[:, :, j].transpose(2, 0, 1)
    tmp = tmp1 = None
    time1 = log.timer('ppaa pass 2', *time1)

    faapp_buf.close()
    feri.close()
    _tmpfile1 = None
    time0 = log.timer('mc_ao2mo', *time0)
    return j_pc, k_pc
Example #56
0
                cbar=False,
                square=True)
    plt.title('{} Canonical Angle'.format(['First', 'Second', 'Third'][ii]))
    plt.xticks(unique_idx, whisker_unique, rotation=45)
    plt.yticks(unique_idx, whisker_unique, rotation=0)
plt.tight_layout()
plt.savefig(os.path.join(p_load,
                         'canonical_angles_between_neuron_weights.pdf'))

# ===========================================
# plot a histogram of the canonical angles between the weights
# ===========================================
wd = figsize[0] / 1.5
ht = wd
f = plt.figure(figsize=(wd, ht))
x, y = np.tril_indices(canonical_angles.shape[0], -1)
first = canonical_angles[x, y, 0]
second = canonical_angles[x, y, 1]
third = canonical_angles[x, y, 2]

sns.distplot(first, kde=False)
sns.distplot(second, kde=False)
sns.distplot(third, kde=False)
sns.despine()
plt.title('Values of angles between\nsubspaces covered by all neurons')
plt.ylabel('# of pairwise comparisons')
plt.xlabel('$cos(\\theta)$')
plt.legend(
    ['{} Canonical Angle'.format(x) for x in ['First', 'Second', 'Third']])
plt.tight_layout()
plt.savefig(
Example #57
0
    def initialize(self, model):
        """
        Called on the first call to update

        `ilabels` is a list of n_i x n_i matrices containing integer
        labels that correspond to specific correlation parameters.
        Two elements of ilabels[i] with the same label share identical
        variance components.

        `designx` is a matrix, with each row containing dummy
        variables indicating which variance components are associated
        with the corresponding element of QY.
        """

        super(Nested, self).initialize(model)

        if self.model.weights is not None:
            warnings.warn(
                "weights not implemented for nested cov_struct, "
                "using unweighted covariance estimate", NotImplementedWarning)

        # A bit of processing of the nest data
        id_matrix = np.asarray(self.model.dep_data)
        if id_matrix.ndim == 1:
            id_matrix = id_matrix[:, None]
        self.id_matrix = id_matrix

        endog = self.model.endog_li
        designx, ilabels = [], []

        # The number of layers of nesting
        n_nest = self.id_matrix.shape[1]

        for i in range(self.model.num_group):
            ngrp = len(endog[i])
            glab = self.model.group_labels[i]
            rix = self.model.group_indices[glab]

            # Determine the number of common variance components
            # shared by each pair of observations.
            ix1, ix2 = np.tril_indices(ngrp, -1)
            ncm = (self.id_matrix[rix[ix1], :] == self.id_matrix[rix[ix2], :]
                   ).sum(1)

            # This is used to construct the working correlation
            # matrix.
            ilabel = np.zeros((ngrp, ngrp), dtype=np.int32)
            ilabel[(ix1, ix2)] = ncm + 1
            ilabel[(ix2, ix1)] = ncm + 1
            ilabels.append(ilabel)

            # This is used to estimate the variance components.
            dsx = np.zeros((len(ix1), n_nest + 1), dtype=np.float64)
            dsx[:, 0] = 1
            for k in np.unique(ncm):
                ii = np.flatnonzero(ncm == k)
                dsx[ii, 1:k + 1] = 1
            designx.append(dsx)

        self.designx = np.concatenate(designx, axis=0)
        self.ilabels = ilabels

        svd = np.linalg.svd(self.designx, 0)
        self.designx_u = svd[0]
        self.designx_s = svd[1]
        self.designx_v = svd[2].T
Example #58
0
def quadratic_terms(x):
    n = len(x)
    x = x[np.newaxis]  # turn x from 1D to a 2D array
    temp = x * x.T - (1 - 1 / np.sqrt(2)) * np.diag(pow(x, 2)[0])

    return temp[np.tril_indices(n)]
Example #59
0
File: io.py Project: ai-med/almgig
    def _get_transformed_inputs(self,
                                features: np.ndarray,
                                edges: np.ndarray,
                                invalid_prob: float = 1e-9) -> GraphData:
        assert edges.shape[0] == edges.shape[1], '{} != {}'.format(
            edges.shape[0], edges.shape[1])
        assert edges.shape[0] == self._nodes, '{} != {}'.format(
            edges.shape[0], self._nodes)
        assert features.shape[0] == self._nodes, '{} != {}'.format(
            features.shape[0], self._nodes)
        assert edges.max() <= self._edge_types, '{} > {}'.format(
            edges.max(), self._edge_types)
        assert edges.min() == 0, '{} != 0'.format(edges.min())

        nn = self._nodes

        # lower triangle
        idx_lower = np.tril_indices(nn)
        row, col = idx_lower
        assert row.shape[0] == self._n_adj_flat

        # degree[i]: degree of nodes before adding i-th edge
        degree = []
        for i, (x, y) in enumerate(zip(row, col)):
            weight = edges[x, y]
            if weight == 0:
                continue
            if len(degree) == 0:
                nc = np.zeros(nn)
            else:
                nc = degree[-1].copy()
            nc[x] += weight
            nc[y] += weight
            degree.append(nc)

        # weight_indicators[i]: binary mask of allowed edge types before
        # adding i-th edge
        vv = self._valence[features.argmax(axis=-1)]
        indicator = np.ones((nn, self._edge_types), dtype=np.float32)
        for i, left in enumerate(vv.astype(int)):
            indicator[i, left:] = 0
        weight_indicators = [indicator]

        for deg in degree[:
                          -1]:  # we don't need valence _after_ adding the last edge
            indicator = weight_indicators[-1].copy()

            d = (vv - deg).astype(int)
            for i, left in enumerate(d):
                indicator[i, left:] = 0
            weight_indicators.append(indicator)

        num_edges = len(weight_indicators)
        assert np.count_nonzero(edges[idx_lower]) == num_edges
        # shape = (num_edges, num_nodes, num_edge_types)
        weight_indicators = np.asarray(weight_indicators, dtype=np.float32)

        # zero-padding if num_edges < max_edges
        null_edges = self._max_edges - num_edges
        if null_edges > 0:
            null_ind = np.zeros((null_edges, edges.shape[0], self._edge_types),
                                dtype=np.float32)
            weight_indicators = np.row_stack((weight_indicators, null_ind))

        # mask zero values
        weight_indicators = np.log((1.0 - weight_indicators) * invalid_prob +
                                   weight_indicators)

        adj_one_hot = np.zeros((self._edge_types, nn, nn), dtype=np.int32)
        # lower triangle of one-hot encoded edges matrix
        weight_bin = np.zeros([self._n_adj_flat, self._edge_types],
                              dtype=np.int32)
        # indices of true edges (wrt flat lower triangular matrix)
        coord = np.zeros(self._max_edges, dtype=np.int32)
        # edge list of true edges (with row, col index)
        edge_list = np.zeros((self._max_edges, 2), dtype=np.int32)
        cur_edge = 0
        for k, (i, j) in enumerate(zip(row, col)):
            if edges[i, j] > 0:
                ti = edges[i, j] - 1

                adj_one_hot[ti, i, j] = 1
                adj_one_hot[ti, j, i] = 1

                weight_bin[k, ti] = 1
                coord[cur_edge] = k
                edge_list[cur_edge, :] = i, j
                cur_edge += 1
        assert cur_edge == num_edges

        adj = np.zeros(edges.shape, dtype=np.int32)
        adj[edges.nonzero()] = 1
        adj_lower = adj[idx_lower][:, np.newaxis]

        return GraphData(adj_one_hot, adj_lower, weight_bin, features,
                         weight_indicators, coord, edge_list, num_edges)
Example #60
0
 def __init__(self):
     alleles = _AMINO_ACIDS
     num_alleles = len(alleles)
     root_distribution = [
         0.087127,
         0.040904,
         0.040432,
         0.046872,
         0.033474,
         0.038255,
         0.049530,
         0.088612,
         0.033619,
         0.036886,
         0.085357,
         0.080481,
         0.014753,
         0.039772,
         0.050680,
         0.069577,
         0.058542,
         0.010494,
         0.029916,
         0.064717,
     ]
     relative_rates = [
         0.267828,
         0.984474,
         0.327059,
         1.199805,
         0.000000,
         8.931515,
         0.360016,
         0.232374,
         0.000000,
         0.000000,
         0.887753,
         2.439939,
         1.028509,
         1.348551,
         0.000000,
         1.961167,
         0.000000,
         1.493409,
         11.388659,
         0.000000,
         7.086022,
         2.386111,
         0.087791,
         1.385352,
         1.240981,
         0.107278,
         0.281581,
         0.811907,
         0.228116,
         2.383148,
         5.290024,
         0.868241,
         0.282729,
         6.011613,
         0.439469,
         0.106802,
         0.653416,
         0.632629,
         0.768024,
         0.239248,
         0.438074,
         0.180393,
         0.609526,
         0.000000,
         0.076981,
         0.406431,
         0.154924,
         0.341113,
         0.000000,
         0.000000,
         0.730772,
         0.112880,
         0.071514,
         0.443504,
         2.556685,
         0.258635,
         4.610124,
         3.148371,
         0.716913,
         0.000000,
         1.519078,
         0.830078,
         0.267683,
         0.270475,
         0.460857,
         0.180629,
         0.717840,
         0.896321,
         0.000000,
         0.000000,
         0.000000,
         1.127499,
         0.304803,
         0.170372,
         0.000000,
         3.332732,
         5.230115,
         2.411739,
         0.183641,
         0.136906,
         0.138503,
         0.000000,
         0.000000,
         0.000000,
         0.000000,
         0.153478,
         0.475927,
         1.951951,
         1.565160,
         0.000000,
         0.921860,
         2.485920,
         1.028313,
         0.419244,
         0.133940,
         0.187550,
         1.526188,
         0.507003,
         0.347153,
         0.933709,
         0.119152,
         0.316258,
         0.335419,
         0.170205,
         0.110506,
         4.051870,
         1.531590,
         4.885892,
         0.956097,
         1.598356,
         0.561828,
         0.793999,
         2.322243,
         0.353643,
         0.247955,
         0.171432,
         0.954557,
         0.619951,
         0.459901,
         2.427202,
         3.680365,
         0.265745,
         2.271697,
         0.660930,
         0.162366,
         0.525651,
         0.340156,
         0.306662,
         0.226333,
         1.900739,
         0.331090,
         1.350599,
         1.031534,
         0.136655,
         0.782857,
         5.436674,
         0.000000,
         2.001375,
         0.224968,
         0.000000,
         0.000000,
         0.000000,
         0.000000,
         0.000000,
         0.270564,
         0.000000,
         0.461776,
         0.000000,
         0.000000,
         0.762354,
         0.000000,
         0.740819,
         0.000000,
         0.244139,
         0.078012,
         0.946940,
         0.000000,
         0.953164,
         0.000000,
         0.214717,
         0.000000,
         1.265400,
         0.374834,
         0.286572,
         0.132142,
         0.000000,
         6.952629,
         0.000000,
         0.336289,
         0.417839,
         0.608070,
         2.059564,
         0.240368,
         0.158067,
         0.178316,
         0.484678,
         0.346983,
         0.367250,
         0.538165,
         0.438715,
         8.810038,
         1.745156,
         0.103850,
         2.565955,
         0.123606,
         0.485026,
         0.303836,
         1.561997,
         0.000000,
         0.279379,
     ]
     transition_matrix = np.zeros((num_alleles, num_alleles))
     tril = np.tril_indices(num_alleles, k=-1)
     transition_matrix[tril] = relative_rates
     transition_matrix += np.tril(transition_matrix).T
     transition_matrix *= root_distribution
     row_sums = transition_matrix.sum(axis=1)
     transition_matrix = transition_matrix / max(row_sums)
     row_sums = transition_matrix.sum(axis=1, dtype="float64")
     np.fill_diagonal(transition_matrix, 1.0 - row_sums)
     super().__init__(alleles, root_distribution, transition_matrix)