예제 #1
0
	def __init__(self,n_components,comp=None,centers=None,covars=None):
		self.n_components = n_components
		self.comp = sp.copy(comp)
		self.centers = sp.copy(centers)
		self.covars = sp.copy(covars)
		if centers != None:
			self.n_dim = centers.shape[1]
예제 #2
0
 def _read_sky_logfile(self):
     #TODO : expand to read errors, msgs etc
     # read in the whole sky log file, shouldn't be big
     f = open(self.skylogfile)
     lines = f.readlines()
     f.close()
     dust = [line.split()[1:] for line in lines if line.startswith('dtau_dust')]
     line = [line.split()[1:] for line in lines if line.startswith('dtau_line')]
     dust = _sp.array(dust, dtype='float')
     line = _sp.array(line, dtype='float')
     transitions = _sp.unique(dust[:,0])
     shells = _sp.unique(dust[:,1])
     dtau_dust = dict()
     dtau_line = dict()
     dtau_tot = dict()
     for t in transitions:
         d = []
         l = []
         for s in shells:
             d.append( _sp.mean([i[2] for i in dust if ((i[0]==t) * (i[1]==s))]) )
             l.append( _sp.mean([i[2] for i in line if ((i[0]==t) * (i[1]==s))]) )
         dtau_dust[t] = _sp.copy(d)
         dtau_line[t] = _sp.copy(l)
         dtau_tot[t] = _sp.array(d) + _sp.array(l)
     # create object to store in main class
     class Tau(object):pass
     Tau.dtau_dust = dtau_dust
     Tau.dtau_line = dtau_line
     Tau.dtau_tot = dtau_tot
     Tau.transitions = transitions
     Tau.shells = shells
     self.Tau = Tau
예제 #3
0
파일: optimize.py 프로젝트: mmssouza/coevol
 def __init__(self,fitness_func,npop = 20,w = 0.5,c1 = 2.01,c2 = 2.02,debug = False):
  seed()
  self.debug = debug
  self.c1 = c1
  self.c2 = c2
  self.w = w
  self.ns = int(npop) 
  self.fitness_func = fitness_func  
  # gera pop inicial
  if os.path.isfile("dump_pso.pkl"):
   dump_fd = open("dump_pso.pkl",'r')
   self.pop = cPickle.load(dump_fd)
   self.fit = cPickle.load(dump_fd)
   self.v = cPickle.load(dump_fd)
   self.bfg = cPickle.load(dump_fd)
   self.bfg_fitness = cPickle.load(dump_fd)
   self.bfp = cPickle.load(dump_fd)
   self.bfp_fitness  = cPickle.load(dump_fd)
  else:
   self.pop = scipy.array([self.gera_individuo() for i in scipy.arange(self.ns)])
   self.fit = scipy.zeros(self.ns)
   # avalia fitness de toda populacao
   for i in scipy.arange(self.ns):
    self.fit[i],self.pop[i] = self.avalia_aptidao(self.pop[i])  
   # inicializa velocidades iniciais
   self.v = scipy.zeros((self.ns,Dim))
   # guarda a melhor posicao de cada particula 
   self.bfp = scipy.copy(self.pop)
   self.bfp_fitness = scipy.copy(self.fit)
   # guarda a melhor posicao global
   self.bfg = self.pop[self.bfp_fitness.argmin()].copy()
   self.bfg_fitness = self.bfp_fitness.min().copy()
 def __init__(self, field, system_dir, nprocs=4, **kwargs):
     #
     super().__init__()
     #
     # field attributes that are copied over
     field.create_point_data()
     self.nx = field.nx
     self.nz = field.nz
     self.data_vector = field.data_vector
     self.data_map = field.data_map
     self.point_data = field.point_data
     self._field = field.clone()
     self._mask = sp.ones(self.data_map.shape, dtype=bool)
     #
     self.offset_map = sp.zeros(self.data_map.shape)
     self.offset_points = sp.zeros(self.point_data.shape)
     if kwargs.get('offset_field', None):
         kwargs['offset_field'].create_point_data()
         self.offset_map = sp.copy(kwargs['offset_field'].data_map)
         self.offset_points = sp.copy(kwargs['offset_field'].point_data)
     #
     self.system_dir = system_dir
     self.nprocs = nprocs
     self.avg_fact = kwargs.get('avg_fact', 1.0)
     self.mesh_params = kwargs.get('mesh_params', {})
     self.merge_groups = []
예제 #5
0
	def initialize(self,data,random=False):
		self.data = data
		self.n_dim = data.shape[1]
		if random:
			mins = sp.zeros(self.n_dim)
			maxes = sp.zeros(self.n_dim)
			sds = sp.zeros(self.n_dim)
			centers = sp.zeros((self.n_components,self.n_dim))
			for i in xrange(self.n_dim):
				mins[i] = min(self.data[:,i])
				maxes[i] = max(self.data[:,i])
				sds[i] = sp.std(self.data[:,i])
				centers[:,i] = sp.random.uniform(mins[i],maxes[i],self.n_components)
			self.comp = sp.ones(self.n_components)/float(self.n_components) + sp.random.uniform(-1./self.n_components,1./self.n_components,self.n_components)
			self.comp /= sp.sum(self.comp)
			covars = sp.array([sp.diag(sds**2) for i in xrange(self.n_components)])
			self.centers = centers
			self.covars = covars
		else:
			clust = cluster.KMeans(self.n_components)
			clust.fit(self.data)
			self.centers = sp.copy(clust.cluster_centers_)
			labels = sp.copy(clust.labels_)
			self.covars = sp.zeros((self.n_components,self.n_dim,self.n_dim))
			self.comp = sp.zeros(self.n_components)
			for i in xrange(self.n_components):
				inds = labels == i
				temp = self.data[inds,:]
				self.covars[i,:,:] = sp.dot(temp.T,temp)
				self.comp[i] = sum(inds)/float(self.data.shape[0])
def NumpyTensorInitializerForVacancy(gridShape, filename, vacancyfile=None):
    """
    Initialize a 10 component plasticity state by reading from a numpy "tofile" type file or two files.
    """
    dict = {('x','x') : (0,0), ('x','y') : (0,1), ('x','z') : (0,2),\
            ('y','x') : (1,0), ('y','y') : (1,1), ('y','z') : (1,2),\
            ('z','x') : (2,0), ('z','y') : (2,1), ('z','z') : (2,2)}
    data = fromfile(filename)
    if vacancyfile is None:
        data = data.reshape([10] + list(gridShape))
    else:
        data = data.reshape([3,3] + list(gridShape))
        dataV = fromfile(vacancyfile)
        dataV = dataV.reshape(list(gridShape))
    state = VacancyState.VacancyState(gridShape)
    field = state.GetOrderParameterField() 
    if vacancyfile is None:
        i = 0
        for component in field.components:
            field[component] = copy(data[i]) 
            i += 1
    else:
        for component in field.components:
            if component[0] not in [x,y,z]:
                field[component] = copy(dataV) 
            else:
                field[component] = copy(data[dict[component]]) 
    return state
예제 #7
0
 def copy_data(self, obj):
     r"""
     Copies data properites of the field onto another object created
     """
     obj.nx = self.nx
     obj.nz = self.nz
     obj.data_map = sp.copy(self.data_map)
     obj.data_vector = sp.copy(self.data_vector)
     obj.point_data = sp.copy(self.point_data)
예제 #8
0
 def clone(self):
     r"""
     Creates a fully qualified DataField object from the existing one.
     """
     # instantiating class and adding attributes
     clone = DataField(None)
     #
     self.copy_data(clone)
     clone._raw_data = sp.copy(self._raw_data)
     clone._cell_interfaces = sp.copy(self._cell_interfaces)
     #
     return clone
예제 #9
0
def wigner(psi,xvec,yvec,g=sqrt(2)):
    """Wigner function for a state vector or density matrix 
    at points xvec+i*yvec.
    
    Parameters
    ----------
    state : qobj 
        A state vector or density matrix.
    
    xvec : array_like
        x-coordinates at which to calculate the Wigner function.
    
    yvec : array_like
        y-coordinates at which to calculate the Wigner function.
        
    g : float
        Scaling factor for a = 0.5*g*(x+iy), default g=sqrt(2).
    
    Returns
    --------
    W : array
        Values representing the Wigner function calculated over the specified range [xvec,yvec].
    
    
    """
    if psi.type=='ket' or psi.type=='oper':
        M=prod(psi.shape[0])
    elif psi.type=='bra':
        M=prod(psi.shape[1])
    else:
        raise TypeError('Input state is not a valid operator.')
    X,Y = meshgrid(xvec, yvec)
    amat = 0.5*g*(X + 1.0j*Y)
    wmat=zeros(shape(amat))
    Wlist=array([zeros(shape(amat),dtype=complex) for k in range(M)])
    Wlist[0]=exp(-2.0*abs(amat)**2)/pi
    if psi.type=='ket' or psi.type=='bra':
        psi=ket2dm(psi)
    wmat=real(psi[0,0])*real(Wlist[0])
    for n in range(1,M):
        Wlist[n]=(2.0*amat*Wlist[n-1])/sqrt(n)
        wmat+= 2.0*real(psi[0,n]*Wlist[n])
    for m in range(M-1):
        temp=copy(Wlist[m+1])
        Wlist[m+1]=(2.0*conj(amat)*temp-sqrt(m+1)*Wlist[m])/sqrt(m+1)
        for n in range(m+1,M-1):
            temp2=(2.0*amat*Wlist[n]-sqrt(m+1)*temp)/sqrt(n+1)
            temp=copy(Wlist[n+1])
            Wlist[n+1]=temp2
        wmat+=real(psi[m+1,m+1]*Wlist[m+1])
        for k in range(m+2,M):
            wmat+=2.0*real(psi[m+1,k]*Wlist[k])
    return 0.5*wmat*g**2
def run():
    data = sp.copy(housing_data)
    x = data[:, [0, 1]]
    y = data[:, [2]]
    m = sp.shape(y)[0]
    
    # Normalize the x values
    (x, mu, sigma) = graddesc.featureNormalize(x)
    
    # Add intercept term to x
    x = sp.concatenate((sp.ones((m, 1)), x), axis=1)
    
    # Init Theta and run Gradient Descent
    num_iters = 400
    
    # Choose some alpha value
    alphas = [0.01, 0.03, 0.1, 0.3, 1.0]
    
    for alpha in alphas:
        theta = sp.zeros((3, 1))
        (theta, J_history) = graddesc.gradientDescent(x, y, theta, alpha, num_iters)
        # Plot the value of J by number of iterations
        plt.plot(range(1, J_history.size+1), J_history, '-b')
        plt.title('Alpha = %f' % (alpha))
        plt.xlabel('Number of iterations')
        plt.ylabel('J')
        plt.xlim([0, 50])
        plt.show(block=True)
    
        # Estimate the price of a 1650 sq-ft, 3 br house
        price = 0
        house = sp.array([[1.0, 1650.0, 3.0]])
        # Normalize the features
        house[0, 1:] = (house[0, 1:] - mu) / sigma
        price = house.dot(theta)
        print('The estimated price with alpha', alpha, 'is', price[0, 0])
    
    # Reload the data
    data = sp.copy(housing_data)
    
    x = data[:, [0, 1]]
    y = data[:, [2]]
    
    # Add intercept term to x
    x = sp.concatenate((sp.ones((m, 1)), x), axis=1)
    
    # Calculate the normal equation
    theta = graddesc.normalEqn(x, y)
    print('Theta computed from the normal equations:')
    print(theta)
 def __init__(self, data, point_data):
     #
     # setting up the region
     super().__init__(None)
     if data.shape != point_data.shape[0:2]:
         msg = 'data and point_data have different dimensions: {} != {}'
         raise ValueError(msg.format(data.shape, point_data.shape[:2]))
     #
     self.nz, self.nx = data.shape
     self.data_map = sp.copy(data)
     self.data_vector = sp.ravel(data)
     self.point_data = sp.copy(point_data)
     #
     self._raw_data = sp.copy(data)
     self._define_cell_interfaces()
예제 #12
0
    def _site_percolation(self, pmask):
        r"""
        This private method is called by 'find_clusters2'
        """
        # Find throats that produce site percolation
        conns = sp.copy(self['throat.conns'])
        conns[:, 0] = pmask[conns[:, 0]]
        conns[:, 1] = pmask[conns[:, 1]]
        # Only if both pores are True is the throat set to True
        tmask = sp.all(conns, axis=1)

        # Perform the clustering using scipy.csgraph
        csr = self.create_adjacency_matrix(data=tmask,
                                           sprsfmt='csr',
                                           dropzeros=True)
        clusters = sprs.csgraph.connected_components(csgraph=csr,
                                                     directed=False)[1]

        # Adjust cluster numbers such that non-invaded pores are labelled -1
        # Note: The following line also takes care of assigning cluster numbers
        # to single isolated invaded pores
        p_clusters = (clusters + 1)*(pmask) - 1
        # Label invaded throats with their neighboring pore's label
        t_clusters = clusters[self['throat.conns']]
        ind = (t_clusters[:, 0] == t_clusters[:, 1])
        t_clusters = t_clusters[:, 0]
        # Label non-invaded throats with -1
        t_clusters[~ind] = -1

        return (p_clusters, t_clusters)
예제 #13
0
def update_rule(Asp,states0,parameters,scale=0.0):
	thresh,personal,a,b,c,scale0=parameters #ignore scale ( = 0 )
	states=sp.copy(states0)
	#states is a list of states for all N individuals
	
	nei_sum=Asp*states
	degrees=Asp*sp.ones(len(states))
	
	##get average of all neighbours, i.e. s
	nei_av=[]
	for i in range(0,len(nei_sum)):
		if degrees[i]>0: nei_av.append(nei_sum[i]/degrees[i])
		else: nei_av.append(0.0)
	
	totav=sum(states)/len(states) #this is m
	
	for n in range(0,len(states)): #len means length, i.e. number of individuals
		
		utility=a[n]*personal[n]+b[n]*nei_av[n]+c[n]*totav
		if states[n] < 1.0: #if state == 0
			if utility <= thresh[n]: 
				states[n]=0.0#scale*utility ##i.e. zero if scale=0
			else:
				states[n]=1.0	
	return states
예제 #14
0
파일: sqrtm3.py 프로젝트: sn1p3r46/Tiro
def sqrtm3(X):
    M = sp.copy(X)
    m, fb, fe = block_structure(M)
    n = M.shape[0]
    for i in range(0,m):
        M[fb[i]:fe[i],fb[i]:fe[i]] = twobytworoot(M[fb[i]:fe[i],fb[i]:fe[i]])
        #print M

    for j in range(1,m):
        for i in range(0,m-j):
            #print M[fb[i]:fe[i],fb[JJ]:fe[JJ]]
            JJ = i+j
            Tnoto = M[fb[i]:fe[i],fb[JJ]:fe[JJ]] #dopo togliere il copy
            #print "Tnot: "
            #print Tnoto
            for k in range(i+1,JJ):
                Tnoto -= (M[fb[i]:fe[i],fb[k]:fe[k]]).dot(M[fb[k]:fe[k],fb[JJ]:fe[JJ]])
                #print M[fb[i]:fe[i],fb[k]:fe[k]]
                #print M[fb[k]:fe[k],fb[JJ]:fe[JJ]]

            if((M[fb[i]:fe[i],fb[JJ]:fe[JJ]]).shape==(1,1)):
                #print "forma 1"
                #print M[fb[i]:fe[i],fb[JJ]:fe[JJ]]           #  Uij
                #print M[fb[i]:fe[i],fb[i]:fe[i]]               #  Uii
                #print M[fb[JJ]:fe[JJ],fb[JJ]:fe[JJ]]       #  Ujj
                M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = Tnoto/(M[fb[i]:fe[i],fb[i]:fe[i]] + M[fb[JJ]:fe[JJ],fb[JJ]:fe[JJ]])

            else:
                Uii = M[fb[i]:fe[i],fb[i]:fe[i]]
                Ujj = M[fb[JJ]:fe[JJ],fb[JJ]:fe[JJ]]
                shapeUii = Uii.shape[0]
                shapeUjj = Ujj.shape[0]
                """
                print "------------"
                print Tnoto
                print Tnoto.shape
                print sp.kron(sp.eye(shapeUjj),Uii)
                print sp.kron(Ujj.T,sp.eye(shapeUii))
                print Tnoto
                """
                #M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = sp.linalg.solve_sylvester(Uii, Ujj, Tnoto)

                """
                x, scale, info = dtrsyl(Uii, Ujj, Tnoto

                if (scale==1.0):
                     = x

                else:
                    M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = x*scale
                    print "scale!=0"
                """
                Tnoto = Tnoto.reshape((shapeUii*shapeUjj),1,order="F")
                M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = \
                linalg.solve(sp.kron(sp.eye(shapeUjj),Uii) +
                sp.kron(Ujj.T,sp.eye(shapeUii)),
                Tnoto).reshape(shapeUii,shapeUjj,order="F")


    return M
예제 #15
0
  def normalizeLength(self, noteOns, factor):
    #shibu = 60. / self.wavetempo * (self.binarized_data[0].size / self.duration)
    shibu = (self.fs/10.) / (self.wavetempo/60.)
    fixToResolution = noteOns/shibu*480.
    fixToResolution[:, 2] = noteOns[:, 2]
    # MIDI_Res(分解能) = 480
    MIDI_Res = 480.
    minnotel = 1./4.*MIDI_Res
    #rate(許容誤差)
    rate = 0.5

    #NoteNoが大きいものから順に並び替え
    fixToResolution = self.rowsort(fixToResolution)
    self.oldFixToResolution = sp.copy(fixToResolution)

    #lilypond符号用リスト
    book = [[] for i in range(fixToResolution.shape[0])]

    for n in range(fixToResolution.shape[0]):
      x_cor = fixToResolution[n, 0] + minnotel*rate - 1

      #x_cor = fixToResolution[n, 0] + minnotel - 1
      x_cor = (sp.floor(x_cor/minnotel))*minnotel
      if(x_cor == 0):
        x_cor = 1
      fixToResolution[n, 0] = x_cor
      fixToResolution[n, 3], book[n] = self.normalizeNoteLength(fixToResolution[n, 3] + factor)
      book[n] = self.convertNoteNo(fixToResolution[n, 2]) + book[n]
      fixToResolution[n, 1] = fixToResolution[n, 3] + fixToResolution[n, 0] - 1
    
    self.book = book
    return fixToResolution
예제 #16
0
    def errorApproximation(self, ratio, dim=20):

        self.buildMatrix()

        sumNonzeros = (self.vxm !=0).sum()
        numTest = int(ratio*sumNonzeros)

        elementList = []

        nonZeroTuple = sp.nonzero(self.vxm)

        for x in range(int(numTest)):
            rInt = sp.random.randint(0,nonZeroTuple[0].size)
            randrow = nonZeroTuple[0][rInt]
            randcolumn = nonZeroTuple[1][rInt]

            valElementIndex = [randrow,randcolumn]
            elementList.append(valElementIndex)

        self.modvxm = sp.copy(self.vxm)

        for x in elementList:
            self.modvxm[x[0],x[1]] = 0

        self.modvmx = self.fillAverages(vxm = self.modvxm)
        self.newmodvxm = self.predict(dim,vxm=self.modvxm)

        sqDiff = 0
        for x in elementList:
            sqDiff += sp.square(self.newmodvxm[x[0],x[1]] - self.vxm[x[0],x[1]])
        self.rmse = sp.sqrt(sqDiff/len(elementList))
예제 #17
0
파일: data.py 프로젝트: PMBio/pygp_kronsum
    def getX(self,standardized=True,maf=None):
        """
        return SNPs, if neccessary standardize them
        """
        X = SP.copy(self.X)

        # test for missing values
        isnan = SP.isnan(X)
        for i in isnan.sum(0).nonzero()[0]:
            # set to mean 
            X[isnan[:,i],i] = X[~isnan[:,i],i].mean()
                
        if maf!=None:
            LG.debug('filter SNPs')
            LG.debug('... number of SNPs(before filtering): %d'%X.shape[1])
            idx_snps = SP.logical_and(X[self.idx_samples].mean(0)>0.1,X[self.idx_samples].mean(0)<0.9)
            LG.debug('... number of SNPs(after filtering) : %d'%idx_snps.sum())
        else:
            idx_snps = SP.ones(self.n_f,dtype=bool)
        
        if standardized:
            LG.debug('standardize SNPs')
            X = X[self.idx_samples][:,idx_snps]
            X-= X.mean(0)
            X /= X.std(0,dtype=NP.float32)
            X /= SP.sqrt(X.shape[1])
            return X
      
        return X[self.idx_samples][:,idx_snps]
예제 #18
0
 def execute(self):
     self.power_mat, self.thermal_expectation = self.full_calculation()
     n_chan = self.power_mat.shape[1]
     n_freq = self.power_mat.shape[0]
     # Calculate the the mean channel correlations at low frequencies.
     low_f_mat = sp.mean(self.power_mat[1:4 * n_chan + 1,:,:], 0).real
     # Factorize it into preinciple components.
     e, v = linalg.eigh(low_f_mat)
     self.low_f_mode_values = e
     # Make sure the eigenvalues are sorted.
     if sp.any(sp.diff(e) < 0):
         raise RuntimeError("Eigenvalues not sorted.")
     self.low_f_modes = v
     # Now subtract out the noisiest channel modes and see what is left.
     n_modes_subtract = 10
     mode_subtracted_power_mat = sp.copy(self.power_mat.real)
     mode_subtracted_auto_power = sp.empty((n_modes_subtract, n_freq))
     for ii in range(n_modes_subtract):
         mode = v[:,-ii]
         amp = sp.sum(mode[:,None] * mode_subtracted_power_mat, 1)
         amp = sp.sum(amp * mode, 1)
         to_subtract = amp[:,None,None] * mode[:,None] * mode
         mode_subtracted_power_mat -= to_subtract
         auto_power = mode_subtracted_power_mat.view()
         auto_power.shape = (n_freq, n_chan**2)
         auto_power = auto_power[:,::n_chan + 1]
         mode_subtracted_auto_power[ii,:] = sp.mean(auto_power, -1)
     self.subtracted_auto_power = mode_subtracted_auto_power
예제 #19
0
def GP_train(x, y, cov_par, cov_func = None, cov_typ ='SE', \
             cov_fixed = None, prior = None, \
             MF = None, MF_par = None, MF_args = None, \
             MF_fixed = None):
    '''    
    Max likelihood optimization of GP hyper-parameters. Calls
    GP_negloglik. Takes care of merging / splitting the fixed /
    variable and cov / MF parameters
    '''
    if MF != None:
        merged_par = scipy.append(cov_par, MF_par)
        n_MF_par = len(MF_par)
        fixed = scipy.append(scipy.zeros(len(cov_par), 'bool'), \
                             scipy.zeros(n_MF_par, 'bool'))
        if (cov_fixed != None): fixed[0:-n_MF_par] = cov_fixed
        if (MF_fixed != None): fixed[-n_MF_par:] = MF_fixed
        if MF_args == None: MF_args = x[:]
    else:
        merged_par = cov_par[:]
        n_MF_par = 0
        fixed = scipy.zeros(len(cov_par), 'bool')
        if cov_fixed != None: fixed[:] = cov_fixed
    var_par_in = merged_par[fixed == False]
    fixed_par = merged_par[fixed == True]
    args = (x, y, cov_func, cov_typ, MF, n_MF_par, MF_args, fixed, \
            fixed_par, prior)
    var_par_out = \
        sop.fmin(GP_negloglik, var_par_in, args, disp = 0)
    par_out = scipy.copy(merged_par)
    par_out[fixed == False] = var_par_out
    par_out[fixed == True] = fixed_par
    if MF != None:
        return par_out[:-n_MF_par], par_out[-n_MF_par:]
    else:
        return par_out
def run():
    theta = sp.zeros((3, 1))
    data = sp.copy(admission_data)
    X = data[:, [0, 1]]
    y = data[:, [2]]
    m = sp.shape(y)[0]

    # Add intercept term to x
    X = sp.concatenate((sp.ones((m, 1)), X), axis=1)

    """
    Part 1: Plotting
    """

    print('Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.')
    logres.plotData(data)
    plt.xlabel('Exam 1 score')
    plt.ylabel('Exam 2 score')
    plt.legend('Admitted', 'Not admitted')
    plt.show()

    print('Program paused. Press enter to continue.')
    raw_input()

    """
    Part 2: Compute Cost and Gradient
    """

    (m, n) = X.shape

    initial_theta = sp.zeros((n, 1))

    (cost, grad) = logres.costFunction(initial_theta, X, y)

    print('Cost at initial theta (zeros): ', cost)
    print('Gradient at initial theta (zeros): ', grad)

    print('Program paused. Press enter to continue.')
    raw_input()

    """
    Part 3: Optimizing using fminunc
    """

    (theta, cost) = logres.find_minimum_theta(theta, X, y)

    print('Cost at theta found by fmin: ', cost)
    print('Theta: ', theta)

    logres.plotDecisionBoundary(data, X, theta)

    plt.show()

    """
    Part 4: Predict and Accuracies
    """

    prob = logres.sigmoid(sp.asmatrix([1, 45, 85]).dot(theta))
    print('For a student with scores 45 and 85, we predict an admission probability of ', prob[0, 0])
    print('Program paused. Press enter to continue.')
예제 #21
0
def interp2d(qx, qy, qz):
    Vandermonde = sp.zeros((4,4))
    Vandermonde[:,0] = 1
    Vandermonde[:,1] = qx
    Vandermonde[:,2] = qy
    Vandermonde[:,3] = qx*qy
    Vinv = sp.linalg.inv(Vandermonde)
    
    print 'Vandermonde\n', Vandermonde
    print
    print 'Vandermonde inverse official \n', Vinv    
    Vinv = inverse(Vandermonde, 4)
    print 'Vandermonde inverse Gauss \n', Vinv
    V22 = sp.copy(Vinv.T)
    print 'Identity check'
    print sp.dot(Vinv,Vandermonde)
    print 'Transpose official'
    print V22
    for i in range(3):
        for j in range(i+1,4):
            d = Vinv[i,j]
            Vinv[i,j]= Vinv[j,i]
            Vinv[j,i]= d
    print 'Index ranspose\n', Vinv
    print 'Check transpose\n', Vinv-V22
    
    
    def SU2(x,y):
        RHS = sp.array([1,x,y,x*y])
        b = sp.dot(Vinv,RHS)
        return sp.dot(b,qz.T)
    SU2 = sp.vectorize(SU2)
    return SU2
예제 #22
0
파일: parlib.py 프로젝트: pokornyv/SPEpy
def KramersKronigFFT(ImX_A):
	'''	Hilbert transform used to calculate real part of a function from its imaginary part
	uses piecewise cubic interpolated integral kernel of the Hilbert transform
	use only if len(ImX_A)=2**m-1, uses fft from scipy.fftpack  '''
	X_A = sp.copy(ImX_A)
	N = int(len(X_A))
	## be careful with the data type, orherwise it fails for large N
	if N > 3e6: A = sp.arange(3,N+1,dtype='float64')
	else:       A = sp.arange(3,N+1)  
	X1 = 4.0*sp.log(1.5)
	X2 = 10.0*sp.log(4.0/3.0)-6.0*sp.log(1.5)
	## filling the kernel
	if N > 3e6: Kernel_A = sp.zeros(N-2,dtype='float64')
	else:       Kernel_A = sp.zeros(N-2)
	Kernel_A = (1-A**2)*((A-2)*sp.arctanh(1.0/(1-2*A))+(A+2)*sp.arctanh(1.0/(1+2*A)))\
	+((A**3-6*A**2+11*A-6)*sp.arctanh(1.0/(3-2*A))+(A+3)*(A**2+3*A+2)*sp.arctanh(1.0/(2*A+3)))/3.0
	Kernel_A = sp.concatenate([-sp.flipud(Kernel_A),sp.array([-X2,-X1,0.0,X1,X2]),Kernel_A])/sp.pi
	## zero-padding the functions for fft
	ImXExt_A = sp.concatenate([X_A[int((N-1)/2):],sp.zeros(N+2),X_A[:int((N-1)/2)]])
	KernelExt_A = sp.concatenate([Kernel_A[N:],sp.zeros(1),Kernel_A[:N]])
	## performing the fft
	ftReXExt_A = -fft(ImXExt_A)*fft(KernelExt_A)
	ReXExt_A = sp.real(ifft(ftReXExt_A))
	ReX_A = sp.concatenate([ReXExt_A[int((3*N+3)/2+1):],ReXExt_A[:int((N-1)/2+1)]])
	return ReX_A
예제 #23
0
	def sample(self,filename, burnin=100, sample_rate=10, n_samples=10, stopwords=None):
		self.buildCorpus(filename,stopwords)
		self.initialize()
		self.total_nzw = sp.zeros((self.n_topics,self.n_words))
		self.total_nmz = sp.zeros((self.n_docs,self.n_topics))
		self.logprobs = sp.zeros(burnin + sample_rate*n_samples)
		for i in xrange(burnin):
			self._sweep()
			self.logprobs[i] = self._loglikelihood()
			print "Iteration: {}".format(i) + "\tLog-prob: {}".format(self.logprobs[i])
		for i in xrange(n_samples*sample_rate):
			self._sweep()
			self.logprobs[i+burnin] = self._loglikelihood()
			print "Iteration: {}".format(i+burnin) + "\tLog-prob: {}".format(self.logprobs[i+burnin])
			if not i%sample_rate:
				self.total_nzw += sp.copy(self.nzw)
				self.total_nmz += sp.copy(self.nmz)
예제 #24
0
def hardThreshold(coeffs,thresh):
	new_coeffs = []
	for j in coeffs:
		new_coeffs.append(sp.copy(j))
	for j in xrange(1,len(new_coeffs)):
		for i in new_coeffs[j]:
			i *= sp.absolute(i) > thresh
	return new_coeffs
예제 #25
0
    def fillAverages(self,vxm):

        vxmc = sp.copy(vxm)
        for i in range(vxmc.shape[0]):
            row = vxmc[i,:]
            row[row==0] = sp.mean(row[row!=0])
            vxmc[i,:] = row
        return vxmc
예제 #26
0
def get_quantiles(scores, num_dots=1000):
    """
    Uses scipy
    """
    scores = sp.copy(sp.array(scores))
    scores.sort()
    indices = [int(len(scores) * i / (num_dots + 2)) for i in range(1, num_dots + 1)]
    return scores[indices]
예제 #27
0
def get_log_quantiles(scores, num_dots=1000, max_val=5):
    """
    Uses scipy
    """
    scores = sp.copy(sp.array(scores))
    scores.sort()
    indices = sp.array(10 ** ((-sp.arange(1, num_dots + 1, dtype='single') / (num_dots + 1)) * max_val) \
                * len(scores), dtype='int')
    return -sp.log10(scores[indices])
 def write_symmetry_plane(self, path='.', create_dirs=True, overwrite=False):
     r"""
     Exports the +Y half of the mesh flattening out everything below 0 on
     the Y axis
     """
     # TODO: consider replacing the bottom face type with symmetryPlane
     #
     # storing orginial vertices
     old_verts = sp.copy(self._vertices)
     self._vertices[sp.where(self._vertices[:, 1] <= 0.0), 1] = 0.0
     #
     # outputing mesh
     self.write_foam_file(path=path,
                          create_dirs=create_dirs,
                          overwrite=overwrite)
     #
     # restoring original verts
     self._vertices = sp.copy(old_verts)
예제 #29
0
def softThreshold(coeffs,thresh):
	new_coeffs = []
	for j in coeffs:
		new_coeffs.append(sp.copy(j))
	for j in xrange(1,len(new_coeffs)):
		for i in new_coeffs[j]:
			i[sp.absolute(i)<thresh] = 0
			i[sp.absolute(i)>=thresh] -= (sp.sign(i[sp.absolute(i)>=thresh]))*thresh
	return new_coeffs
예제 #30
0
파일: chi2mixture.py 프로젝트: PMBio/limix
 def sf(self,lrt):
     """
     computes the survival function of a mixture of a chi-squared random variable of degree
     0 and a scaled chi-squared random variable of degree d
     """
     _lrt = SP.copy(lrt)
     _lrt[lrt<self.tol] = 0
     pv = self.mixture*STATS.chi2.sf(_lrt/self.scale,self.dof)
     return pv
예제 #31
0
    def _build_coefficient_matrix(self):
        r'''
        This builds the sparse coefficient matrix for the linear solver.
        '''
        # Filling coefficient matrix
        tpore1 = self._net['throat.conns'][:, 0]
        tpore2 = self._net['throat.conns'][:, 1]

        #Identify Dirichlet pores
        try:
            temp = self.pores(self._phase.name + '_Dirichlet',
                              mode='difference')
        except:
            raise Exception(
                'The linear transport solver needs at least one Dirichlet boundary condition for the phase which is attached to '
                + self.name)
        loc1 = sp.in1d(tpore1, temp)
        loc2 = sp.in1d(tpore2, temp)
        modified_tpore1 = tpore1[loc1]
        modified_tpore2 = tpore2[loc1]
        row = modified_tpore1
        col = modified_tpore2

        #Expand the conductance to a vector if necessary
        g = self['throat.conductance']
        if sp.size(g) == 1:
            g = g * sp.ones(self.num_throats())
        data_main = g
        data = data_main[loc1]

        modified_tpore2 = tpore2[loc2]
        modified_tpore1 = tpore1[loc2]
        row = sp.append(row, modified_tpore2)
        col = sp.append(col, modified_tpore1)
        data = sp.append(data, data_main[loc2])
        A_dim = self.num_pores()

        #Check for Neuman_group BCs and add superpores if necessary
        try:
            self.pores(self._phase.name + '_Neumann_group')
            group_values = self.get_data(
                prop=self._phase.name + '_bcval_Neumann_group',
                pores=self.pores(self._phase.name + '_Neumann_group'))
            self._group_Neumann_vals = sp.unique(group_values)
            A_dim = A_dim + len(self._group_Neumann_vals)
            extera_neu = self._group_Neumann_vals
            self._g_super = 1e-60
            for item in sp.r_[0:len(extera_neu)]:
                neu_tpore2 = self.pores(self._phase.name + '_Neumann_group')
                neu_tpore2 = neu_tpore2[group_values == extera_neu[item]]
                row = sp.append(row, neu_tpore2)
                col = sp.append(col, len(neu_tpore2) * [A_dim - item - 1])
                data = sp.append(data, len(neu_tpore2) * [self._g_super])
                row = sp.append(row, len(neu_tpore2) * [A_dim - item - 1])
                col = sp.append(col, neu_tpore2)
                data = sp.append(data, len(neu_tpore2) * [self._g_super])
        except:
            pass

        # Adding positions for diagonal
        diag = sp.r_[0:A_dim]
        try:
            pores = self.pores(self._phase.name + '_Dirichlet')
            row = sp.append(row, diag[pores])
            col = sp.append(col, diag[pores])
            data = sp.append(data, sp.ones_like(diag[pores]))
            temp_data = sp.copy(data)
            temp_data[sp.in1d(row, diag[pores])] = 0
            non_Dir_diag = diag[-sp.in1d(diag, diag[pores])]
        except:
            temp_data = sp.copy(data)
            non_Dir_diag = diag
        S_temp = sp.zeros(A_dim)
        for i in sp.r_[0:len(row)]:
            S_temp[row[i]] = S_temp[row[i]] - temp_data[i]
        data = sp.append(data, S_temp[non_Dir_diag])
        row = sp.append(row, non_Dir_diag)
        col = sp.append(col, non_Dir_diag)
        #Convert the lists to the sparse matrix
        self._Coeff_dimension = A_dim
        a = sprs.coo.coo_matrix((data, (row, col)), (A_dim, A_dim))
        A = a.tocsr()
        return (A)
예제 #32
0
def ncp_bcd(X, rank, random_state=None, init='rand', **options):
    """
    Fits nonnegative CP Decomposition using the Block Coordinate Descent (BCD)
    Method.

    Parameters
    ----------
    X : (I_1, ..., I_N) array_like
        A real array with nonnegative entries and ``X.ndim >= 3``.

    rank : integer
        The `rank` sets the number of components to be computed.

    random_state : integer, RandomState instance or None, optional (default ``None``)
        If integer, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used by np.random.

    init : str, or KTensor, optional (default ``'rand'``).
        Specifies initial guess for KTensor factor matrices.
        If ``'randn'``, Gaussian random numbers are used to initialize.
        If ``'rand'``, uniform random numbers are used to initialize.
        If KTensor instance, a copy is made to initialize the optimization.

    options : dict, specifying fitting options.

        tol : float, optional (default ``tol=1E-5``)
            Stopping tolerance for reconstruction error.

        max_iter : integer, optional (default ``max_iter = 500``)
            Maximum number of iterations to perform before exiting.

        min_iter : integer, optional (default ``min_iter = 1``)
            Minimum number of iterations to perform before exiting.

        max_time : integer, optional (default ``max_time = np.inf``)
            Maximum computational time before exiting.

        verbose : bool ``{'True', 'False'}``, optional (default ``verbose=True``)
            Display progress.


    Returns
    -------
    result : FitResult instance
        Object which holds the fitted results. It provides the factor matrices
        in form of a KTensor, ``result.factors``.


    Notes
    -----
    This implemenation is using the Block Coordinate Descent Method.


    References
    ----------
    Xu, Yangyang, and Wotao Yin. "A block coordinate descent method for
    regularized multiconvex optimization with applications to
    negative tensor factorization and completion."
    SIAM Journal on imaging sciences 6.3 (2013): 1758-1789.


    Examples
    --------

    """

    # Check inputs.
    optim_utils._check_cpd_inputs(X, rank)

    # Store norm of X for computing objective function.
    N = X.ndim

    # Initialize problem.
    U, normX = optim_utils._get_initial_ktensor(init, X, rank, random_state)
    result = FitResult(U, 'NCP_BCD', **options)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Block coordinate descent
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    Um = U.copy()  # Extrapolations of compoenents
    extraw = 1  # Used for extrapolation weight update
    weights_U = np.ones(N)  # Extrapolation weights
    L = np.ones(N)  # Lipschitz constants
    obj_bcd = 0.5 * normX**2  # Initial objective value

    # Main optimization loop.
    while result.still_optimizing:
        obj_bcd_old = obj_bcd  # Old objective value
        U_old = U.copy()
        extraw_old = extraw

        for n in range(N):

            # Select all components, but U_n
            components = [U[j] for j in range(N) if j != n]

            # i) compute the N-1 gram matrices
            grams = sci.multiply.reduce([arr.T.dot(arr) for arr in components])

            # Update gradient Lipschnitz constant
            L0 = L  # Lipschitz constants
            L[n] = linalg.norm(grams, 2)

            # ii)  Compute Khatri-Rao product
            kr = khatri_rao(components)
            p = unfold(X, n).dot(kr)

            # Compute Gradient.
            grad = Um[n].dot(grams) - p

            # Enforce nonnegativity (project onto nonnegative orthant).
            U[n] = sci.maximum(0.0, Um[n] - grad / L[n])

        # Compute objective function and update optimization result.
        # grams *= U[X.ndim - 1].T.dot(U[X.ndim - 1])
        # obj = np.sqrt(sci.sum(grams) - 2 * sci.sum(U[X.ndim - 1] * p) + normX**2) / normX
        obj = linalg.norm(X - U.full()) / normX
        result.update(obj)

        # Correction and extrapolation.
        grams *= U[N - 1].T.dot(U[N - 1])
        obj_bcd = 0.5 * (sci.sum(grams) - 2 * sci.sum(U[N - 1] * p) + normX**2)

        extraw = (1 + sci.sqrt(1 + 4 * extraw_old**2)) / 2.0

        if obj_bcd >= obj_bcd_old:
            # restore previous A to make the objective nonincreasing
            Um = sci.copy(U_old)

        else:
            # apply extrapolation
            w = (extraw_old - 1.0) / extraw  # Extrapolation weight
            for n in range(N):
                weights_U[n] = min(w, 1.0 * sci.sqrt(
                    L0[n] / L[n]))  # choose smaller weights for convergence
                Um[n] = U[n] + weights_U[n] * (U[n] - U_old[n]
                                               )  # extrapolation

    # Finalize and return the optimization result.
    return result.finalize()
예제 #33
0
 def wrap(x):
     xq = sp.copy(x)
     xq.resize([1, d])
     a = G.infer_m_post(xq, [[sp.NaN]])
     return a[0, 0]
예제 #34
0
    def __init__(self,
                 challenge_func,
                 ns=10,
                 npop1=20,
                 pr=0.3,
                 beta=0.85,
                 npop2=20,
                 w=0.7,
                 c1=1.5,
                 c2=1.5):
        # Tamanho das populacoes
        seed()
        self.ns = ns
        self.npop1 = npop1
        self.npop2 = npop2
        # Parametros do DE
        self.beta = beta
        self.pr = pr
        # Parametros do PSO
        self.c1 = c1
        self.c2 = c2
        self.w = w
        # Funcao que representa problema desafio
        self.fc = challenge_func
        # Respostas do problema desafio
        #self.pso = pso(fitness_func = challenge_func,npop = npop2,w = w,c1 = c1,c2 = c2)
        self.ans1 = scipy.zeros(self.npop1)
        self.ans2 = scipy.zeros(self.npop2)
        # Populacoes
        self.pop1 = []
        self.pop2 = []
        # Gera pop1 e pop2 e resolve problema desafio
        for i in scipy.arange(self.npop1):
            self.ans1[i], aux = self.resolve_desafio(self.gera_individuo())
            self.pop1.append(aux.copy())

        for i in scipy.arange(self.npop2):
            self.ans2[i], aux = self.resolve_desafio(self.gera_individuo())
            self.pop2.append(aux.copy())

        self.pop1 = scipy.array(self.pop1)
        self.pop2 = scipy.array(self.pop2)

        self.hall_of_fame1 = []
        for i in scipy.arange(15):
            self.hall_of_fame1.insert(
                0,
                scipy.hstack((self.ans1.min(), self.pop1[self.ans1.argmin()])))

        self.hall_of_fame2 = []
        for i in scipy.arange(15):
            #self.hall_of_fame2.insert(0,scipy.hstack((self.pso.fit[0],self.pso.pop[0])))
            self.hall_of_fame2.insert(
                0,
                scipy.hstack((self.ans2.min(), self.pop2[self.ans2.argmin()])))

        # Funcoes fitness das populacoes
        self.fit1 = scipy.zeros(self.npop1)
        self.fit2 = scipy.zeros(self.npop2)

        for i in scipy.arange(self.npop2):
            self.fit2[i] = self.avalia_aptidao2(self.ans2[i])

        for i in scipy.arange(self.npop1):
            self.fit1[i] = self.avalia_aptidao1(self.ans1[i])

        # inicializa velocidades iniciais do PSO
        self.v = scipy.zeros(self.pop2.shape)
        # guarda o melhor fitness de cada particula PSO
        self.bfp = scipy.copy(self.pop2)
        self.bfp_fitness = scipy.copy(self.fit2)
        self.bfp_ans = scipy.copy(self.ans2)
        # guarda o melhor fitness global PSO
        self.bfg = self.pop2[self.bfp_fitness.argmax()].copy()
        self.bfg_fitness = self.bfp_fitness.max().copy()
        self.bfg_ans = self.bfp_ans[self.bfp_fitness.argmax()].copy()
예제 #35
0
def dare_old(A, B, Q, R, S=None, E=None):
    # Make sure we can import required slycot routine
    try:
        from slycot import sb02md
    except ImportError:
        raise ControlSlycot("can't find slycot module 'sb02md'")

    try:
        from slycot import sb02mt
    except ImportError:
        raise ControlSlycot("can't find slycot module 'sb02mt'")

    # Make sure we can find the required slycot routine
    try:
        from slycot import sg02ad
    except ImportError:
        raise ControlSlycot("can't find slycot module 'sg02ad'")

    # Reshape 1-d arrays
    if len(shape(A)) == 1:
        A = A.reshape(1, A.size)

    if len(shape(B)) == 1:
        B = B.reshape(1, B.size)

    if len(shape(Q)) == 1:
        Q = Q.reshape(1, Q.size)

    if R is not None and len(shape(R)) == 1:
        R = R.reshape(1, R.size)

    if S is not None and len(shape(S)) == 1:
        S = S.reshape(1, S.size)

    if E is not None and len(shape(E)) == 1:
        E = E.reshape(1, E.size)

    # Determine main dimensions
    if size(A) == 1:
        n = 1
    else:
        n = size(A, 0)

    if size(B) == 1:
        m = 1
    else:
        m = size(B, 1)

    # Solve the standard algebraic Riccati equation
    if S is None and E is None:
        # Check input data for consistency
        if size(A) > 1 and shape(A)[0] != shape(A)[1]:
            raise ControlArgument("A must be a quadratic matrix.")

        if (size(Q) > 1 and shape(Q)[0] != shape(Q)[1]) or \
            (size(Q) > 1 and shape(Q)[0] != n) or \
            size(Q) == 1 and n > 1:
            raise ControlArgument("Q must be a quadratic matrix of the same \
                dimension as A.")

        if (size(B) > 1 and shape(B)[0] != n) or \
            size(B) == 1 and n > 1:
            raise ControlArgument("Incompatible dimensions of B matrix.")

        if not (asarray(Q) == asarray(Q).T).all():
            raise ControlArgument("Q must be a symmetric matrix.")

        if not (asarray(R) == asarray(R).T).all():
            raise ControlArgument("R must be a symmetric matrix.")

        # Create back-up of arrays needed for later computations
        A_ba = copy(A)
        R_ba = copy(R)
        B_ba = copy(B)

        # Solve the standard algebraic Riccati equation by calling Slycot
        # functions sb02mt and sb02md
        try:
            A_b, B_b, Q_b, R_b, L_b, ipiv, oufact, G = sb02mt(n, m, B, R)
        except ValueError as ve:
            if ve.info < 0:
                e = ValueError(ve.message)
                e.info = ve.info
            elif ve.info == m + 1:
                e = ValueError("The matrix R is numerically singular.")
                e.info = ve.info
            else:
                e = ValueError("The %i-th element of d in the UdU (LdL) \
                     factorization is zero." % ve.info)
                e.info = ve.info
            raise e

        try:
            X, rcond, w, S, U, A_inv = sb02md(n, A, G, Q, 'D')
        except ValueError as ve:
            if ve.info < 0 or ve.info > 5:
                e = ValueError(ve.message)
                e.info = ve.info
            elif ve.info == 1:
                e = ValueError("The matrix A is (numerically) singular in \
                    discrete-time case.")
                e.info = ve.info
            elif ve.info == 2:
                e = ValueError("The Hamiltonian or symplectic matrix H cannot \
                    be reduced to real Schur form.")
                e.info = ve.info
            elif ve.info == 3:
                e = ValueError("The real Schur form of the Hamiltonian or \
                     symplectic matrix H cannot be appropriately ordered.")
                e.info = ve.info
            elif ve.info == 4:
                e = ValueError("The Hamiltonian or symplectic matrix H has \
                     less than n stable eigenvalues.")
                e.info = ve.info
            elif ve.info == 5:
                e = ValueError("The N-th order system of linear algebraic \
                     equations is singular to working precision.")
                e.info = ve.info
            raise e

        # Calculate the gain matrix G
        if size(R_b) == 1:
            G = dot( 1/(dot(asarray(B_ba).T,dot(X,B_ba))+R_ba) , \
                dot(asarray(B_ba).T,dot(X,A_ba)) )
        else:
            G = dot( inv(dot(asarray(B_ba).T,dot(X,B_ba))+R_ba) , \
                dot(asarray(B_ba).T,dot(X,A_ba)) )

        # Return the solution X, the closed-loop eigenvalues L and
        # the gain matrix G
        return (X, w[:n], G)

    # Solve the generalized algebraic Riccati equation
    elif S is not None and E is not None:
        # Check input data for consistency
        if size(A) > 1 and shape(A)[0] != shape(A)[1]:
            raise ControlArgument("A must be a quadratic matrix.")

        if (size(Q) > 1 and shape(Q)[0] != shape(Q)[1]) or \
            (size(Q) > 1 and shape(Q)[0] != n) or \
            size(Q) == 1 and n > 1:
            raise ControlArgument("Q must be a quadratic matrix of the same \
                dimension as A.")

        if (size(B) > 1 and shape(B)[0] != n) or \
            size(B) == 1 and n > 1:
            raise ControlArgument("Incompatible dimensions of B matrix.")

        if (size(E) > 1 and shape(E)[0] != shape(E)[1]) or \
            (size(E) > 1 and shape(E)[0] != n) or \
            size(E) == 1 and n > 1:
            raise ControlArgument("E must be a quadratic matrix of the same \
                dimension as A.")

        if (size(R) > 1 and shape(R)[0] != shape(R)[1]) or \
            (size(R) > 1 and shape(R)[0] != m) or \
            size(R) == 1 and m > 1:
            raise ControlArgument("R must be a quadratic matrix of the same \
                dimension as the number of columns in the B matrix.")

        if (size(S) > 1 and shape(S)[0] != n) or \
            (size(S) > 1 and shape(S)[1] != m) or \
            size(S) == 1 and n > 1 or \
            size(S) == 1 and m > 1:
            raise ControlArgument("Incompatible dimensions of S matrix.")

        if not (asarray(Q) == asarray(Q).T).all():
            raise ControlArgument("Q must be a symmetric matrix.")

        if not (asarray(R) == asarray(R).T).all():
            raise ControlArgument("R must be a symmetric matrix.")

        # Create back-up of arrays needed for later computations
        A_b = copy(A)
        R_b = copy(R)
        B_b = copy(B)
        E_b = copy(E)
        S_b = copy(S)

        # Solve the generalized algebraic Riccati equation by calling the
        # Slycot function sg02ad
        try:
            rcondu,X,alfar,alfai,beta,S_o,T,U,iwarn = \
                    sg02ad('D','B','N','U','N','N','S','R',n,m,0,A,E,B,Q,R,S)
        except ValueError as ve:
            if ve.info < 0 or ve.info > 7:
                e = ValueError(ve.message)
                e.info = ve.info
            elif ve.info == 1:
                e = ValueError("The computed extended matrix pencil is \
                            singular, possibly due to rounding errors.")
                e.info = ve.info
            elif ve.info == 2:
                e = ValueError("The QZ algorithm failed.")
                e.info = ve.info
            elif ve.info == 3:
                e = ValueError("Reordering of the generalized eigenvalues \
                     failed.")
                e.info = ve.info
            elif ve.info == 4:
                e = ValueError("After reordering, roundoff changed values of \
                            some complex eigenvalues so that leading \
                            eigenvalues in the generalized Schur form no \
                            longer satisfy the stability condition; this \
                            could also be caused due to scaling.")
                e.info = ve.info
            elif ve.info == 5:
                e = ValueError("The computed dimension of the solution does \
                            not equal N.")
                e.info = ve.info
            elif ve.info == 6:
                e = ValueError("The spectrum is too close to the boundary of \
                            the stability domain.")
                e.info = ve.info
            elif ve.info == 7:
                e = ValueError("A singular matrix was encountered during the \
                            computation of the solution matrix X.")
                e.info = ve.info
            raise e

        L = zeros((n, 1))
        L.dtype = 'complex64'
        for i in range(n):
            L[i] = (alfar[i] + alfai[i] * 1j) / beta[i]

        # Calculate the gain matrix G
        if size(R_b) == 1:
            G = dot( 1/(dot(asarray(B_b).T,dot(X,B_b))+R_b) , \
                dot(asarray(B_b).T,dot(X,A_b)) + asarray(S_b).T)
        else:
            G = dot( inv(dot(asarray(B_b).T,dot(X,B_b))+R_b) , \
                dot(asarray(B_b).T,dot(X,A_b)) + asarray(S_b).T)

        # Return the solution X, the closed-loop eigenvalues L and
        # the gain matrix G
        return (X, L, G)

    # Invalid set of input parameters
    else:
        raise ControlArgument("Invalid set of input parameters.")
예제 #36
0
def regions_to_network(im, dt=None, voxel_size=1):
    r"""
    Analyzes an image that has been partitioned into pore regions and extracts
    the pore and throat geometry as well as network connectivity.

    Parameters
    ----------
    im : ND-array
        An image of the pore space partitioned into individual pore regions.
        Note that this image must have zeros indicating the solid phase.

    dt : ND-array
        The distance transform of the pore space.  If not given it will be
        calculated, but it can save time to provide one if available.

    voxel_size : scalar
        The resolution of the image, expressed as the length of one side of a
        voxel, so the volume of a voxel would be **voxel_size**-cubed.  The
        default is 1, which is useful when overlaying the PNM on the original
        image since the scale of the image is alway 1 unit lenth per voxel.

    Returns
    -------
    A dictionary containing all the pore and throat size data, as well as the
    network topological information.  The dictionary names use the OpenPNM
    convention (i.e. 'pore.coords', 'throat.conns') so it may be converted
    directly to an OpenPNM network object using the ``update`` command.

    """
    print('_' * 60)
    print('Extracting pore and throat information from image')
    from skimage.morphology import disk, ball
    struc_elem = disk if im.ndim == 2 else ball

    # if ~sp.any(im == 0):
    #     raise Exception('The received image has no solid phase (0\'s)')

    if dt is None:
        dt = spim.distance_transform_edt(im > 0)
        dt = spim.gaussian_filter(input=dt, sigma=0.5)

    # Get 'slices' into im for each pore region
    slices = spim.find_objects(im)

    # Initialize arrays
    Ps = sp.arange(1, sp.amax(im) + 1)
    Np = sp.size(Ps)
    p_coords = sp.zeros((Np, im.ndim), dtype=float)
    p_volume = sp.zeros((Np, ), dtype=float)
    p_dia_local = sp.zeros((Np, ), dtype=float)
    p_dia_global = sp.zeros((Np, ), dtype=float)
    p_label = sp.zeros((Np, ), dtype=int)
    p_area_surf = sp.zeros((Np, ), dtype=int)
    t_conns = []
    t_dia_inscribed = []
    t_area = []
    t_perimeter = []
    t_coords = []
    # dt_shape = sp.array(dt.shape)

    # Start extracting size information for pores and throats
    for i in tqdm(Ps):
        pore = i - 1
        if slices[pore] is None:
            continue
        s = extend_slice(slices[pore], im.shape)
        sub_im = im[s]
        sub_dt = dt[s]
        pore_im = sub_im == i
        padded_mask = sp.pad(pore_im, pad_width=1, mode='constant')
        pore_dt = spim.distance_transform_edt(padded_mask)
        s_offset = sp.array([i.start for i in s])
        p_label[pore] = i
        # p_coords[pore, :] = spim.center_of_mass(pore_im) + s_offset
        peaks = sp.vstack(sp.where(pore_dt == pore_dt.max())).T
        p_coords[pore, :] = peaks[0] + s_offset
        p_volume[pore] = sp.sum(pore_im)
        p_dia_local[pore] = 2 * sp.amax(pore_dt)
        p_dia_global[pore] = 2 * sp.amax(sub_dt)
        p_area_surf[pore] = sp.sum(pore_dt == 1)
        im_w_throats = spim.binary_dilation(input=pore_im,
                                            structure=struc_elem(1))
        im_w_throats = im_w_throats * sub_im
        Pn = sp.unique(im_w_throats)[1:] - 1
        for j in Pn:
            if j > pore:
                t_conns.append([pore, j])
                vx = sp.where(im_w_throats == (j + 1))
                t_dia_inscribed.append(2 * sp.amax(sub_dt[vx]))
                t_perimeter.append(sp.sum(sub_dt[vx] < 2))
                t_area.append(sp.size(vx[0]))
                t_inds = tuple([i + j for i, j in zip(vx, s_offset)])
                temp = sp.where(dt[t_inds] == sp.amax(dt[t_inds]))[0][0]
                if im.ndim == 2:
                    t_coords.append(tuple((t_inds[0][temp], t_inds[1][temp])))
                else:
                    t_coords.append(
                        tuple((t_inds[0][temp], t_inds[1][temp],
                               t_inds[2][temp])))
    # Clean up values
    Nt = len(t_dia_inscribed)  # Get number of throats
    if im.ndim == 2:  # If 2D, add 0's in 3rd dimension
        p_coords = sp.vstack((p_coords.T, sp.zeros((Np, )))).T
        t_coords = sp.vstack((sp.array(t_coords).T, sp.zeros((Nt, )))).T

    net = {}
    net['pore.all'] = sp.ones((Np, ), dtype=bool)
    net['throat.all'] = sp.ones((Nt, ), dtype=bool)
    net['pore.coords'] = sp.copy(p_coords) * voxel_size
    net['pore.centroid'] = sp.copy(p_coords) * voxel_size
    net['throat.centroid'] = sp.array(t_coords) * voxel_size
    net['throat.conns'] = sp.array(t_conns)
    net['pore.label'] = sp.array(p_label)
    net['pore.volume'] = sp.copy(p_volume) * (voxel_size**3)
    net['throat.volume'] = sp.zeros((Nt, ), dtype=float)
    net['pore.diameter'] = sp.copy(p_dia_local) * voxel_size
    net['pore.inscribed_diameter'] = sp.copy(p_dia_local) * voxel_size
    net['pore.equivalent_diameter'] = 2 * (
        (3 / 4 * net['pore.volume'] / sp.pi)**(1 / 3))
    net['pore.extended_diameter'] = sp.copy(p_dia_global) * voxel_size
    net['pore.surface_area'] = sp.copy(p_area_surf) * (voxel_size)**2
    net['throat.diameter'] = sp.array(t_dia_inscribed) * voxel_size
    net['throat.inscribed_diameter'] = sp.array(t_dia_inscribed) * voxel_size
    net['throat.area'] = sp.array(t_area) * (voxel_size**2)
    net['throat.perimeter'] = sp.array(t_perimeter) * voxel_size
    net['throat.equivalent_diameter'] = (sp.array(t_area) *
                                         (voxel_size**2))**0.5
    P12 = net['throat.conns']
    PT1 = sp.sqrt(
        sp.sum(((p_coords[P12[:, 0]] - t_coords) * voxel_size)**2, axis=1))
    PT2 = sp.sqrt(
        sp.sum(((p_coords[P12[:, 1]] - t_coords) * voxel_size)**2, axis=1))
    net['throat.total_length'] = PT1 + PT2
    PT1 = PT1 - p_dia_local[P12[:, 0]] / 2 * voxel_size
    PT2 = PT2 - p_dia_local[P12[:, 1]] / 2 * voxel_size
    net['throat.length'] = PT1 + PT2
    dist = (p_coords[P12[:, 0]] - p_coords[P12[:, 1]]) * voxel_size
    net['throat.direct_length'] = sp.sqrt(sp.sum(dist**2, axis=1))
    # Make a dummy openpnm network to get the conduit lengths
    pn = op.network.GenericNetwork()
    pn.update(net)
    pn.add_model(propname='throat.endpoints',
                 model=op_gm.throat_endpoints.spherical_pores,
                 pore_diameter='pore.inscribed_diameter',
                 throat_diameter='throat.inscribed_diameter')
    pn.add_model(propname='throat.conduit_lengths',
                 model=op_gm.throat_length.conduit_lengths)
    pn.add_model(propname='pore.area', model=op_gm.pore_area.sphere)
    net['throat.endpoints.head'] = pn['throat.endpoints.head']
    net['throat.endpoints.tail'] = pn['throat.endpoints.tail']
    net['throat.conduit_lengths.pore1'] = pn['throat.conduit_lengths.pore1']
    net['throat.conduit_lengths.pore2'] = pn['throat.conduit_lengths.pore2']
    net['throat.conduit_lengths.throat'] = pn['throat.conduit_lengths.throat']
    net['pore.area'] = pn['pore.area']
    prj = pn.project
    prj.clear()
    wrk = op.Workspace()
    wrk.close_project(prj)

    return net
예제 #37
0
def annopred_genomewide(data_file=None,
                        ld_radius=None,
                        ld_dict=None,
                        out_file_prefix=None,
                        ps=None,
                        n=None,
                        h2=None,
                        num_iter=None,
                        zero_jump_prob=0.05,
                        burn_in=5,
                        PRF=None):
    """
    Calculate LDpred for a genome
    """
    prf_chr = PRF['chrom']
    prf_sids = PRF['sids']
    prf_pi = PRF['pi']
    prf_sigi2 = PRF['sigi2']

    df = h5py.File(data_file, 'r')
    has_phenotypes = False
    if 'y' in df.keys():
        'Validation phenotypes found.'
        y = df['y'][...]  # Phenotype
        num_individs = len(y)
        risk_scores_pval_derived = sp.zeros(num_individs)
        risk_scores_pval_derived_inf = sp.zeros(num_individs)
        has_phenotypes = True

    ld_scores_dict = ld_dict['ld_scores_dict']
    chrom_ld_dict = ld_dict['chrom_ld_dict']
    chrom_ref_ld_mats = ld_dict['chrom_ref_ld_mats']

    print 'LD radius used: %d' % ld_radius
    results_dict = {}
    num_snps = 0
    sum_beta2s = 0
    cord_data_g = df['cord_data']

    for chrom_str in chromosomes_list:
        if chrom_str in cord_data_g.keys():
            g = cord_data_g[chrom_str]
            betas = g['betas'][...]
            n_snps = len(betas)
            num_snps += n_snps
            sum_beta2s += sp.sum(betas**2)

    L = ld_scores_dict['avg_gw_ld_score']
    chi_square_lambda = sp.mean(n * sum_beta2s / float(num_snps))
    #    print 'Genome-wide lambda inflation:', chi_square_lambda,
    print 'Genome-wide mean LD score:', L
    gw_h2_ld_score_est = max(0.0001, (max(1, chi_square_lambda) - 1) /
                             (n * (L / num_snps)))
    print 'Estimated genome-wide heritability:', gw_h2_ld_score_est

    assert chi_square_lambda > 1, 'Check the summary statistic file'
    if h2 is None:
        h2 = gw_h2_ld_score_est
    print h2
    h2_new = sp.sum(prf_sigi2)
    sig_12 = (1.0) / n  #######################
    pr_sig = {}
    pr_p = {}
    annopred_inf_chrom_dict = {}
    print 'Calculating initial values for MCMC using infinitesimal model'
    for chrom_str in chromosomes_list:
        if chrom_str in cord_data_g.keys():
            print 'Calculating posterior betas for Chromosome %s' % (
                (chrom_str.split('_'))[1])
            g = cord_data_g[chrom_str]

            #Filter monomorphic SNPs
            snp_stds = g['snp_stds_ref'][...]
            snp_stds = snp_stds.flatten()
            ok_snps_filter = snp_stds > 0
            pval_derived_betas = g['betas'][...]
            pval_derived_betas = pval_derived_betas[ok_snps_filter]
            sids = g['sids'][...]
            sids = sids[ok_snps_filter]
            chri = int(chrom_str.split('_')[1])
            prf_sids_chri = prf_sids[prf_chr == chri]
            prf_pi_chri = prf_pi[prf_chr == chri]
            prf_sigi2_chri = prf_sigi2[prf_chr == chri]
            if len(prf_sids_chri) == len(sids):
                if sum(prf_sids_chri == sids) == len(prf_sids_chri):
                    pr_p[chrom_str] = sp.copy(prf_pi_chri)
                    pr_sig[chrom_str] = sp.copy(prf_sigi2_chri)
                else:
                    print 'Order of SNPs does not match, sorting prior files'
                    pr_p[chrom_str] = sp.zeros(len(sids))
                    pr_sig[chrom_str] = sp.zeros(len(sids))
                    for i, sid in enumerate(sids):
                        pr_p[chrom_str][i] = prf_pi_chri[prf_sids_chri == sid]
                        pr_sig[chrom_str][i] = prf_sigi2_chri[prf_sids_chri ==
                                                              sid]
            else:
                print 'More SNPs found in prior file, extracting SNPs from prior files'
                pr_p[chrom_str] = sp.zeros(len(sids))
                pr_sig[chrom_str] = sp.zeros(len(sids))
                for i, sid in enumerate(sids):
                    pr_p[chrom_str][i] = prf_pi_chri[prf_sids_chri == sid]
                    pr_sig[chrom_str][i] = prf_sigi2_chri[prf_sids_chri == sid]
            pr_sig[chrom_str] = h2 * pr_sig[chrom_str] / h2_new
            if h2 is not None:
                h2_chrom = sp.sum(pr_sig[chrom_str])
            else:
                h2_chrom = gw_h2_ld_score_est * (n_snps / float(num_snps))
            start_betas = annopred_inf(
                pval_derived_betas,
                pr_sigi=pr_sig[chrom_str],
                reference_ld_mats=chrom_ref_ld_mats[chrom_str],
                n=n,
                ld_window_size=2 * ld_radius)
            annopred_inf_chrom_dict[chrom_str] = start_betas

    for p in ps:
        print 'Starting AnnoPred with ', p
        p_str = p
        results_dict[p_str] = {}

        if out_file_prefix:
            #Preparing output files
            raw_effect_sizes = []
            annopred_effect_sizes = []
            annopred_inf_effect_sizes = []
            out_sids = []
            chromosomes = []
            out_positions = []
            out_nts = []

        out = []
        out_inf = []
        out.append('The input prior p is ' + str(prf_pi[0]) + '\n')
        out.append('Estimated Genome-wide heritability: ' +
                   str(gw_h2_ld_score_est) + '\n')
        out.append('Posterior variance for each snp: ' + str(sig_12) + '\n')
        print 'Estimated Genome-wide heritability from Priors:', h2
        print 'Posterior variance for each snp:', sig_12
        for chrom_str in chromosomes_list:
            if chrom_str in cord_data_g.keys():
                g = cord_data_g[chrom_str]
                if has_phenotypes:
                    if 'raw_snps_val' in g.keys():
                        raw_snps = g['raw_snps_val'][...]
                    else:
                        raw_snps = g['raw_snps_ref'][...]

                #Filter monomorphic SNPs
                snp_stds = g['snp_stds_ref'][...]
                snp_stds = snp_stds.flatten()
                ok_snps_filter = snp_stds > 0
                snp_stds = snp_stds[ok_snps_filter]
                pval_derived_betas = g['betas'][...]
                pval_derived_betas = pval_derived_betas[ok_snps_filter]
                positions = g['positions'][...]
                positions = positions[ok_snps_filter]
                sids = g['sids'][...]
                sids = sids[ok_snps_filter]
                log_odds = g['log_odds'][...]
                log_odds = log_odds[ok_snps_filter]
                nts = g['nts'][...]
                nts = nts[ok_snps_filter]

                prf_pi_chri_sorted = pr_p[chrom_str]
                prf_sigi2_chri_sorted = pr_sig[chrom_str]

                if out_file_prefix:
                    chromosomes.extend([chrom_str] * len(pval_derived_betas))
                    out_positions.extend(positions)
                    out_sids.extend(sids)
                    raw_effect_sizes.extend(log_odds)
                    out_nts.extend(nts)

                n_snps = len(pval_derived_betas)

                if h2 is not None:
                    h2_chrom = sp.sum(prf_sigi2_chri_sorted)
                    #h2_chrom = h2 * (n_snps / float(num_snps))
                else:
                    h2_chrom = gw_h2_ld_score_est * (n_snps / float(num_snps))
                #print 'Prior parameters: p=%0.3f, n=%d, m=%d, h2_chrom=%0.4f' % (p, n, n_snps, h2_chrom)
                res_dict = non_infinitesimal_mcmc(
                    pval_derived_betas,
                    Pi=prf_pi_chri_sorted,
                    Sigi2=prf_sigi2_chri_sorted,
                    sig_12=sig_12,
                    h2=h2_chrom,
                    n=n,
                    ld_radius=ld_radius,
                    num_iter=num_iter,
                    burn_in=burn_in,
                    ld_dict=chrom_ld_dict[chrom_str],
                    start_betas=annopred_inf_chrom_dict[chrom_str],
                    zero_jump_prob=zero_jump_prob)
                updated_betas = res_dict['betas']
                updated_inf_betas = res_dict['inf_betas']
                sum_sqr_effects = sp.sum(updated_betas**2)
                if sum_sqr_effects > gw_h2_ld_score_est:
                    print 'Sum of squared updated effects estimates seems too large:', sum_sqr_effects
                    print 'This suggests that the Gibbs sampler did not convergence.'

                print 'Calculating scores for Chromosome %s' % (
                    (chrom_str.split('_'))[1])
                updated_betas = updated_betas / (snp_stds.flatten())
                updated_inf_betas = updated_inf_betas / (snp_stds.flatten())
                annopred_effect_sizes.extend(updated_betas)
                annopred_inf_effect_sizes.extend(updated_inf_betas)
                if has_phenotypes:
                    prs = sp.dot(updated_betas, raw_snps)
                    prs_inf = sp.dot(updated_inf_betas, raw_snps)
                    risk_scores_pval_derived += prs
                    risk_scores_pval_derived_inf += prs_inf
                    corr = sp.corrcoef(y, prs)[0, 1]
                    r2 = corr**2
                    corr_inf = sp.corrcoef(y, prs_inf)[0, 1]
                    r2_inf = corr_inf**2
                    #                    print 'The R2 prediction accuracy of PRS using %s was: %0.4f' %(chrom_str, r2)
                    #                    print 'The R2 prediction accuracy of PRS using %s was: %0.4f' %(chrom_str, r2_inf)
                    out.append('The R2 prediction accuracy of PRS using ' +
                               chrom_str + ' was ' + str(r2) + '\n')
                    out_inf.append('The R2 prediction accuracy of PRS using ' +
                                   chrom_str + ' was ' + str(r2_inf) + '\n')


#        print 'There were %d (SNP) effects' % num_snps
        if has_phenotypes:
            num_indivs = len(y)
            results_dict[p_str]['y'] = y
            results_dict[p_str]['risk_scores_pd'] = risk_scores_pval_derived
            #            print 'Prediction accuracy was assessed using %d individuals.'%(num_indivs)
            out.append('Prediction accuracy was assessed using ' +
                       str(num_indivs) + ' individuals\n')

            corr = sp.corrcoef(y, risk_scores_pval_derived)[0, 1]
            r2 = corr**2
            results_dict[p_str]['r2_pd'] = r2
            #            print 'The  R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)' % (r2, ((1-r2)**2)/num_indivs)
            out.append(
                'The  R2 prediction accuracy (observed scale) for the whole genome was: '
                + str(r2) + ' (' + str(((1 - r2)**2) / num_indivs) + ')\n')

            corr_inf = sp.corrcoef(y, risk_scores_pval_derived_inf)[0, 1]
            r2_inf = corr_inf**2
            results_dict[p_str]['r2_pd'] = r2_inf
            #            print 'The  R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)' % (r2_inf, ((1-r2_inf)**2)/num_indivs)
            out_inf.append(
                'The  R2 prediction accuracy (observed scale) for the whole genome was: '
                + str(r2_inf) + ' (' + str(((1 - r2_inf)**2) / num_indivs) +
                ')\n')

            if corr < 0:
                risk_scores_pval_derived = -1 * risk_scores_pval_derived
            auc = pred_accuracy(y, risk_scores_pval_derived)
            print 'AnnoPred AUC/COR for the whole genome was: %0.4f' % auc
            out.append('AUC/COR for the whole genome was: ' + str(auc) + '\n')

            if corr_inf < 0:
                risk_scores_pval_derived_inf = -1 * risk_scores_pval_derived_inf
            auc_inf = pred_accuracy(y, risk_scores_pval_derived_inf)
            print 'AnnoPred-inf AUC/COR for the whole genome was: %0.4f' % auc_inf
            out_inf.append('AUC/COR for the whole genome was: ' +
                           str(auc_inf) + '\n')

            sp.savetxt('%s_y_' % (out_file_prefix) + str(p) + '.txt', y)
            sp.savetxt('%s_prs_' % (out_file_prefix) + str(p) + '.txt',
                       risk_scores_pval_derived)
            sp.savetxt('%s_prs-inf' % (out_file_prefix) + str(p) + '.txt',
                       risk_scores_pval_derived_inf)

            #Now calibration
            denominator = sp.dot(risk_scores_pval_derived.T,
                                 risk_scores_pval_derived)
            y_norm = (y - sp.mean(y)) / sp.std(y)
            numerator = sp.dot(risk_scores_pval_derived.T, y_norm)
            regression_slope = (numerator / denominator)  #[0][0]
            #            print 'The slope for predictions with P-value derived  effects is:',regression_slope
            out.append(
                'The slope for predictions with P-value derived  effects is: '
                + str(regression_slope) + '\n')
            results_dict[p_str]['slope_pd'] = regression_slope

            ff = open('%s_non_inf_auc_' % (out_file_prefix) + str(p) + '.txt',
                      "w")
            ff.writelines(out)
            ff.close()

            ff_inf = open('%s_inf_auc_' % (out_file_prefix) + str(p) + '.txt',
                          "w")
            ff_inf.writelines(out_inf)
            ff_inf.close()

        weights_out_file = '%s_non_inf_betas_' % (out_file_prefix) + str(
            p) + '.txt'  ###################################
        with open(weights_out_file, 'w') as f:
            f.write(
                'chrom    pos    sid    nt1    nt2    raw_beta     AnnoPred_beta\n'
            )
            for chrom, pos, sid, nt, raw_beta, annopred_beta in it.izip(
                    chromosomes, out_positions, out_sids, out_nts,
                    raw_effect_sizes, annopred_effect_sizes):
                nt1, nt2 = nt[0], nt[1]
                f.write('%s    %d    %s    %s    %s    %0.4e    %0.4e\n' %
                        (chrom, pos, sid, nt1, nt2, raw_beta, annopred_beta))

        weights_out_file = '%s_inf_betas_' % (out_file_prefix) + str(
            p) + '.txt'
        with open(weights_out_file, 'w') as f:
            f.write(
                'chrom    pos    sid    nt1    nt2    raw_beta    AnnoPred_inf_beta \n'
            )
            for chrom, pos, sid, nt, raw_beta, annopred_inf_beta in it.izip(
                    chromosomes, out_positions, out_sids, out_nts,
                    raw_effect_sizes, annopred_inf_effect_sizes):
                nt1, nt2 = nt[0], nt[1]
                f.write(
                    '%s    %d    %s    %s    %s    %0.4e    %0.4e\n' %
                    (chrom, pos, sid, nt1, nt2, raw_beta, annopred_inf_beta))
예제 #38
0
def Move(prevspace, direction, timestep):
    space = sci.copy(prevspace)
    check = sci.zeros(
        (space.shape[0], space.shape[1]))  #zero indicates not moved
    [rows, cols] = space.shape
    rem = timestep % 4
    if (rem == 0):
        irange = range(1, rows - 1)
        jrange = range(1, cols - 1)
    if (rem == 1):
        irange = range(1, rows - 1)
        jrange = reversed(range(1, cols - 1))
    if (rem == 2):
        irange = reversed(range(1, rows - 1))
        jrange = range(1, cols - 1)
    else:
        irange = reversed(range(1, rows - 1))
        jrange = reversed(range(1, cols - 1))
    for i in irange:
        for j in jrange:
            direct = direction[i, j]
            if ((space[i, j] == 1 or space[i, j] == 100) and check[i, j] == 0):
                count = 0
                shift = sci.random.choice([1, -1])
                while (space[i, j] != 0):
                    count += 1
                    if (direct == 0 and space[i - 1, j] == 0):
                        space[i - 1,
                              j], space[i, j] = space[i, j], space[i - 1, j]
                        check[i - 1, j] = 1
                    elif (direct == 1 and space[i - 1, j + 1] == 0):
                        space[i - 1,
                              j + 1], space[i, j] = space[i, j], space[i - 1,
                                                                       j + 1]
                        check[i - 1, j + 1] = 1
                    elif (direct == 2 and space[i, j + 1] == 0):
                        space[i, j + 1], space[i, j] = space[i,
                                                             j], space[i,
                                                                       j + 1]
                        check[i, j + 1] = 1
                    elif (direct == 3 and space[i + 1, j + 1] == 0):
                        space[i + 1,
                              j + 1], space[i, j] = space[i, j], space[i + 1,
                                                                       j + 1]
                        check[i + 1, j + 1] = 1
                    elif (direct == 4 and space[i + 1, j] == 0):
                        space[i + 1,
                              j], space[i, j] = space[i, j], space[i + 1, j]
                        check[i + 1, j] = 1
                    elif (direct == 5 and space[i + 1, j - 1] == 0):
                        space[i + 1,
                              j - 1], space[i, j] = space[i, j], space[i + 1,
                                                                       j - 1]
                        check[i + 1, j - 1] = 1
                    elif (direct == 6 and space[i, j - 1] == 0):
                        space[i, j - 1], space[i, j] = space[i,
                                                             j], space[i,
                                                                       j - 1]
                        check[i, j - 1] = 1
                    elif (direct == 7 and space[i - 1, j - 1] == 0):
                        space[i - 1,
                              j - 1], space[i, j] = space[i, j], space[i - 1,
                                                                       j - 1]
                        check[i - 1, j - 1]
                    else:
                        direct += shift
                        if (direct > 7):
                            direct = 0
                        elif (direct < 0):
                            direct = 7

                    if (count > 7):
                        break

    return space
예제 #39
0
    def plot_rc(self,
                save=False,
                xs=True,
                xsapprox=True,
                kind='power',
                dist=5000,
                raw=False,
                alpha=0.05,
                div=5,
                box=False):
        """Plot HAND and xs rating curves with confidence intervals
		'hand' - plot hand rating curve [T/F]
		'xs' - plot xs rating curves [T/F]
		'xsapprox' - plot xs rating curve approximation from n-value averages [T/F]
		'ci' - plot confidence intervals [T/F]
		'alpha' - alpha for confidence intervals [float(0.0,1.0)]
		'div' - number of intervals for confidence interval [R]"""

        fig, ax = plt.subplots()

        if usgs:  # Plot interpolated USGS rating curve
            # Plot curves
            for q, h in zip(self.usgsq, self.usgsh):
                if kind == 'cubic':
                    print 'USGS interpolation plotted as power-law fit'
                    f = self.interp(x=q, y=h, kind='power')
                else:
                    f = self.interp(x=q, y=h, kind=kind)
                ax.plot(q, f(q), label='usgs', c='g', linewidth=5)

        if hand:  # Plot interpolated HAND rating curve
            # Plot curves
            f = self.interp(x=self.handq, y=self.handh, kind=kind)
            ax.plot(self.handq,
                    f(self.handq),
                    label='hand',
                    c='b',
                    linewidth=5)

        if xs:  # Plot all linearly-interpolated XS rating curves
            intervals = scipy.arange(dist, self.handlen + dist, dist)
            # print 'Intervals:',intervals

            cutoffub = [i / self.handlen * 100 for i in intervals]
            cutofflb = scipy.copy(cutoffub)
            cutofflb = scipy.insert(cutofflb, 0, 0)[:-1]
            cutoffs = zip(cutofflb, cutoffub)
            for l, u in cutoffs:
                idx = scipy.where(
                    scipy.logical_and(scipy.greater_equal(self.xs_profs, l),
                                      scipy.less(self.xs_profs, u)))[0]
                if u > 100: u = 100.00

                fig, ax = plt.subplots()  # get figure and axes for plotting
                fname = 'results/by5000/{0}/rc__comid_{0}_from_{1}_to_{2}.png'.format(
                    self.comid, ('%.2f' % l), ('%.2f' % u))

                for prof, disch, stage in zip(self.xs_profs[idx],
                                              self.xs_disch[idx],
                                              self.xs_stage[idx]):
                    # Get interpolation function
                    # print (('%.2f' % prof) + str(disch))
                    # print (('%.2f' % prof) + str(stage))

                    f = self.interp(x=disch, y=stage, kind=kind)

                    if raw == True:  # Plot raw data (ie. only HEC-RAS points)
                        # interp over discharge
                        ax.plot(disch, f(disch), c='grey', linewidth=2)

                        # interp over stage (switched axes) for testing
                        # f = self.interp(x=stage,y=disch,kind=kind)
                        # ax.plot(f(stage),stage,c='purple',linewidth=1)

                    if raw == False:  # Plot interpolated data (ie. 'div' many interpolated points)
                        interval = disch[-1] / div
                        qvals = scipy.arange(0, (disch[-1] + interval),
                                             interval)  # [1:]
                        ax.plot(qvals, f(qvals), c='grey', linewidth=2)

                # Add one label for all cross-section curves
                ax.plot([], [], label='HEC-RAS', c='grey', linewidth=2)
                # Plot graph
                fig.set_size_inches(20, 16, forward=True)
                plt.gca().set_xlim(left=0, right=self.max_disch)
                plt.gca().set_ylim(bottom=0, top=self.max_stage)
                ax.set_xticks(ax.get_xticks()[::2])
                ax.set_yticks(ax.get_yticks()[::2])
                title = 'COMID {0}, ({1},{2})'.format(self.comid, ('%.2f' % l),
                                                      ('%.2f' % u))
                ax.set_title(title, y=1.04, fontsize=56)
                plt.xlabel('Q (cfs)', fontsize=56)
                plt.ylabel('H (ft)', fontsize=56)
                ax.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
                plt.rc('font', size=56)
                plt.legend(loc='upper left', fontsize=40)
                plt.tick_params(axis='both', labelsize=56)
                plt.grid()

                # print '\n------------------------\n'
                if xsapprox:
                    # Add approximate rating curve from average n-values
                    qvals, hvals = self.get_xs_q(low=0, upto=83)
                    f = self.interp(x=qvals, y=hvals, kind=kind)
                    ax.plot(qvals,
                            f(qvals),
                            label='Resistance Function',
                            c='red',
                            linewidth=5)

                    # Add approximate rating curve for these indices
                    idxqvals, idxhvals = self.get_xs_q(low=idx[0],
                                                       upto=idx[-1])

                    if len(idxqvals) == 0:
                        print 'No data found for profiles {0} to {1}'.format(
                            ('%.2f' % l), ('%.2f' % u))
                        break

                    # f = self.interp(x=idxqvals,y=idxhvals,kind=kind)
                    # ax.plot(idxqvals,f(idxqvals),label='Resistance Function Local Average',c='orange',linewidth=5)

        # else: fig,ax = plt.subplots()

        # Plot graph
        fig.set_size_inches(20, 16, forward=True)
        plt.gca().set_xlim(left=0, right=self.usgsq[0][-1])
        plt.gca().set_ylim(bottom=0, top=self.usgsh[0][-1])
        ax.set_xticks(ax.get_xticks()[::2])
        ax.set_yticks(ax.get_yticks()[::2])
        title = 'COMID {0}'.format(self.comid)
        ax.set_title(title, y=1.04, fontsize=56)
        plt.xlabel('Q (cfs)', fontsize=56)
        plt.ylabel('H (ft)', fontsize=56)
        ax.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
        plt.rc('font', size=56)
        plt.legend(loc='upper left', fontsize=40)
        plt.tick_params(axis='both', labelsize=56)
        plt.grid()

        if save:
            fig.savefig(save)
            plt.clf()

        if not save:
            # mng = plt.get_current_fig_manager()
            # mng.resize(*mng.window.maxsize())
            plt.show()
            plt.clf()
예제 #40
0
def _intersections(x1, y1, x2, y2):
    """X0,Y0 = intersections(X1,Y1,X2,Y2)
    INTERSECTIONS Intersections of curves.
      Computes the (x,y) locations where two curves intersect.  The curves
      can be broken with NaNs or have vertical segments.
    
    Example:
      [X0,Y0] = intersections(X1,Y1,X2,Y2);
    
    where X1 and Y1 are equal-length vectors of at least two points and
    represent curve 1.  Similarly, X2 and Y2 represent curve 2.
    X0 and Y0 are column vectors containing the points at which the two
    curves intersect.

    The algorithm can return two additional vectors that indicate which
    segment pairs contain intersections and where they are:

      [X0,Y0,I,J] = intersections(X1,Y1,X2,Y2);
    
    For each element of the vector I, I(k) = (segment number of (X1,Y1)) +
    (how far along this segment the intersection is).  For example, if I(k) =
    45.25 then the intersection lies a quarter of the way between the line
    segment connecting (X1(45),Y1(45)) and (X1(46),Y1(46)).  Similarly for
    the vector J and the segments in (X2,Y2).

    Version: 1.10, 25 February 2008
    Converted to Python October 2010 by Jeffrey Bush [email protected]
    Author:  Douglas M. Schwarz
    Email:   dmschwarz=ieee*org, dmschwarz=urgrad*rochester*edu
    Real_email = regexprep(Email,{'=','*'},{'@','.'})

    Theory of operation:
      Given two line segments, L1 and L2,
    
      L1 endpoints:  (x1(1),y1(1)) and (x1(2),y1(2))
      L2 endpoints:  (x2(1),y2(1)) and (x2(2),y2(2))
    
    we can write four equations with four unknowns and then solve them.  The
    four unknowns are t1, t2, x0 and y0, where (x0,y0) is the intersection of
    L1 and L2, t1 is the distance from the starting point of L1 to the
    intersection relative to the length of L1 and t2 is the distance from the
    starting point of L2 to the intersection relative to the length of L2.
    
    So, the four equations are
    
       (x1(2) - x1(1))*t1 = x0 - x1(1)
       (x2(2) - x2(1))*t2 = x0 - x2(1)
       (y1(2) - y1(1))*t1 = y0 - y1(1)
       (y2(2) - y2(1))*t2 = y0 - y2(1)
    
    Rearranging and writing in matrix form,
    
      [x1(2)-x1(1)       0       -1   0;      [t1;      [-x1(1);
            0       x2(2)-x2(1)  -1   0;   *   t2;   =   -x2(1);
       y1(2)-y1(1)       0        0  -1;       x0;       -y1(1);
            0       y2(2)-y2(1)   0  -1]       y0]       -y2(1)]
    
    Let's call that A*T = B.  We can solve for T with T = A\B.
    
    Once we have our solution we just have to look at t1 and t2 to determine
    whether L1 and L2 intersect.  If 0 <= t1 < 1 and 0 <= t2 < 1 then the two
    line segments cross and we can include (x0,y0) in the output.
    
    In principle, we have to perform this computation on every pair of line
    segments in the input data.  This can be quite a large number of pairs so
    we will reduce it by doing a simple preliminary check to eliminate line
    segment pairs that could not possibly cross.  The check is to look at the
    smallest enclosing rectangles (with sides parallel to the axes) for each
    line segment pair and see if they overlap.  If they do then we have to
    compute t1 and t2 (via the A\B computation) to see if the line segments
    cross, but if they don't then the line segments cannot cross.  In a
    typical application, this technique will eliminate most of the potential
    line segment pairs.
    """

    # x1 and y1 must be vectors with same number of points (at least 2).
    if sp.sum(sp.size(x1) > 1) != 1 or sp.sum(
            sp.size(y1) > 1) != 1 or len(x1) != len(y1):
        raise ValueError(
            'X1 and Y1 must be equal-length vectors of at least 2 points.')
    # x2 and y2 must be vectors with same number of points (at least 2).
    if sp.sum(sp.size(x2) > 1) != 1 or sp.sum(
            sp.size(y2) > 1) != 1 or len(x2) != len(y2):
        raise ValueError(
            'X2 and Y2 must be equal-length vectors of at least 2 points.')

    # Compute number of line segments in each curve and some differences we'll
    # need later.
    n1 = len(x1) - 1
    n2 = len(x2) - 1
    xy1 = sp.column_stack((x1, y1))
    xy2 = sp.column_stack((x2, y2))
    dxy1 = sp.diff(xy1, axis=0)
    dxy2 = sp.diff(xy2, axis=0)

    # Determine the combinations of i and j where the rectangle enclosing the
    # i'th line segment of curve 1 overlaps with the rectangle enclosing the
    # j'th line segment of curve 2.
    i, j = sp.nonzero(
        sp.logical_and(
            sp.logical_and(
                sp.logical_and(
                    sp.tile(sp.minimum(x1[0:-1], x1[1:]),
                            (n2, 1)).T <= sp.tile(sp.maximum(x2[0:-1], x2[1:]),
                                                  (n1, 1)),
                    sp.tile(sp.maximum(x1[0:-1], x1[1:]),
                            (n2, 1)).T >= sp.tile(sp.minimum(x2[0:-1], x2[1:]),
                                                  (n1, 1))),
                sp.tile(sp.minimum(y1[0:-1], y1[1:]),
                        (n2, 1)).T <= sp.tile(sp.maximum(y2[0:-1], y2[1:]),
                                              (n1, 1))),
            sp.tile(sp.maximum(y1[0:-1], y1[1:]),
                    (n2, 1)).T >= sp.tile(sp.minimum(y2[0:-1], y2[1:]),
                                          (n1, 1))))
    i = sp.copy(i)  # make the arrays writable
    j = sp.copy(j)

    # Find segments pairs which have at least one vertex = NaN and remove them.
    # This line is a fast way of finding such segment pairs.  We take
    # advantage of the fact that NaNs propagate through calculations, in
    # particular subtraction (in the calculation of dxy1 and dxy2, which we
    # need anyway) and addition.
    remove = sp.isnan(sp.sum(dxy1[i, :] + dxy2[j, :], axis=1))
    i[remove] = []
    j[remove] = []

    # Initialize matrices.  We'll put the T's and B's in matrices and use them
    # one column at a time.  AA is a 3-D extension of A where we'll use one
    # plane at a time.
    n = len(i)
    T = sp.zeros((4, n))
    AA = sp.zeros((4, 4, n))
    AA[[0, 1], 2, :] = -1
    AA[[2, 3], 3, :] = -1
    AA[[0, 2], 0, :] = dxy1[i, :].T
    AA[[1, 3], 1, :] = dxy2[j, :].T
    B = -sp.array([x1[i], x2[j], y1[i], y2[j]])

    # Loop through possibilities.  Trap singularity warning and then use
    # lastwarn to see if that plane of AA is near singular.  Process any such
    # segment pairs to determine if they are colinear (overlap) or merely
    # parallel.  That test consists of checking to see if one of the endpoints
    # of the curve 2 segment lies on the curve 1 segment.  This is done by
    # checking the cross product
    #
    #   (x1(2),y1(2)) - (x1(1),y1(1)) x (x2(2),y2(2)) - (x1(1),y1(1)).
    #
    # If this is close to zero then the segments overlap.
    for k in sp.arange(n):
        L, U = lin.lu(AA[:, :, k], True)
        T[:, k] = lin.solve(U, lin.solve(L, B[:, k]))

    # Find where t1 and t2 are between 0 and 1 and return the corresponding
    # x0 and y0 values.
    in_range = sp.logical_and(
        sp.logical_and(sp.logical_and(T[0, :] >= 0, T[1, :] >= 0),
                       T[0, :] < 1), T[1, :] < 1)
    x0 = T[2, in_range].T
    y0 = T[3, in_range].T

    return x0, y0
예제 #41
0
 def test_calculates_time(self):
     self.Data.calc_time()
     self.assertTrue(hasattr(self.Data, 'time'))
     t_copy = sp.copy(self.Data.time)
     t_copy.sort()
     self.assertTrue(sp.allclose(t_copy, self.Data.time))
예제 #42
0
#Autor: Pablo Gullith
#Bibliotecas
from scipy import floor, linspace, array, zeros, copy, loadtxt
from scipy.fftpack import rfft, irfft, dct, idct
from pylab import plot, show, xlabel, ylabel

dow2 = loadtxt('dow2.txt', float)
plot(dow2)
show()


dow2_fourier = rfft(dow2)
N = len(dow2_fourier)
Primeiros_2_porcento = zeros(N, float)
Primeiros_2_porcento[0 : int(N / 50)] = copy(dow2_fourier[0 : int(N / 50)])
Suavizada_dow2 = irfft(Primeiros_2_porcento)


dow2_cos = dct(dow2)
n = len(dow2_cos)
Primeiros_cos_2_porcento = zeros(n, float)
Primeiros_cos_2_porcento[0 : int(n / 50)] = copy(dow2_cos[0 : int(n / 50)])
Suavizada_cos_dow2 = idct(Primeiros_cos_2_porcento) / (2*n) 

plot(dow2, 'k')
plot(Suavizada_dow2, 'g')
show()
plot(dow2, 'k')
plot(Suavizada_cos_dow2, 'r')
show()
예제 #43
0
def trim_saddle_points(peaks, dt, max_iters=10):
    r"""
    Removes peaks that were mistakenly identified because they lied on a
    saddle or ridge in the distance transform that was not actually a true
    local peak.

    Parameters
    ----------
    peaks : ND-array
        A boolean image containing True values to mark peaks in the distance
        transform (``dt``)

    dt : ND-array
        The distance transform of the pore space for which the true peaks are
        sought.

    max_iters : int
        The maximum number of iterations to run while eroding the saddle
        points.  The default is 10, which is usually not reached; however,
        a warning is issued if the loop ends prior to removing all saddle
        points.

    Returns
    -------
    image : ND-array
        An image with fewer peaks than the input image

    References
    ----------
    [1] Gostick, J. "A versatile and efficient network extraction algorithm
    using marker-based watershed segmenation".  Physical Review E. (2017)

    """
    peaks = sp.copy(peaks)
    if dt.ndim == 2:
        from skimage.morphology import square as cube
    else:
        from skimage.morphology import cube
    labels, N = spim.label(peaks)
    slices = spim.find_objects(labels)
    for i in range(N):
        s = extend_slice(s=slices[i], shape=peaks.shape, pad=10)
        peaks_i = labels[s] == i+1
        dt_i = dt[s]
        im_i = dt_i > 0
        iters = 0
        peaks_dil = sp.copy(peaks_i)
        while iters < max_iters:
            iters += 1
            peaks_dil = spim.binary_dilation(input=peaks_dil,
                                             structure=cube(3))
            peaks_max = peaks_dil*sp.amax(dt_i*peaks_dil)
            peaks_extended = (peaks_max == dt_i)*im_i
            if sp.all(peaks_extended == peaks_i):
                break  # Found a true peak
            elif sp.sum(peaks_extended*peaks_i) == 0:
                peaks_i = False
                break  # Found a saddle point
        peaks[s] = peaks_i
        if iters >= max_iters:
            print('Maximum number of iterations reached, consider'
                  + 'running again with a larger value of max_iters')
    return peaks
예제 #44
0
def ldpred_gibbs(beta_hats,
                 genotypes=None,
                 start_betas=None,
                 h2=None,
                 n=1000,
                 ld_radius=100,
                 num_iter=60,
                 burn_in=10,
                 p=None,
                 zero_jump_prob=0.05,
                 ld_dict=None,
                 reference_ld_mats=None,
                 ld_boundaries=None,
                 verbose=False):
    """
    LDpred (Gibbs Sampler) 
    """
    t0 = time.time()
    m = len(beta_hats)
    n = float(n)

    # If no starting values for effects were given, then use the infinitesimal model starting values.
    if start_betas is None:
        print(
            'Initializing LDpred effects with posterior mean LDpred-inf effects.'
        )
        print('Calculating LDpred-inf effects.')
        start_betas = LDpred_inf.ldpred_inf(
            beta_hats,
            genotypes=genotypes,
            reference_ld_mats=reference_ld_mats,
            h2=h2,
            n=n,
            ld_window_size=2 * ld_radius,
            verbose=False)
    curr_betas = sp.copy(start_betas)
    assert len(
        curr_betas
    ) == m, 'Betas returned by LDpred_inf do not have the same length as expected.'
    curr_post_means = sp.zeros(m)
    avg_betas = sp.zeros(m)

    # Iterating over effect estimates in sequential order
    iter_order = sp.arange(m)

    # Setting up the marginal Bayes shrink
    Mp = m * p
    hdmp = (h2 / Mp)
    hdmpn = hdmp + 1.0 / n
    hdmp_hdmpn = (hdmp / hdmpn)
    c_const = (p / sp.sqrt(hdmpn))
    d_const = (1.0 - p) / (sp.sqrt(1.0 / n))

    for k in range(num_iter):  # Big iteration

        # Force an alpha shrink if estimates are way off compared to heritability estimates.  (Improves MCMC convergence.)
        h2_est = max(0.00001, sp.sum(curr_betas**2))
        alpha = min(1 - zero_jump_prob, 1.0 / h2_est,
                    (h2 + 1.0 / sp.sqrt(n)) / h2_est)

        rand_ps = sp.random.random(m)
        rand_norms = stats.norm.rvs(0.0, (hdmp_hdmpn) * (1.0 / n), size=m)

        if ld_boundaries is None:
            for i, snp_i in enumerate(iter_order):
                start_i = max(0, snp_i - ld_radius)
                focal_i = min(ld_radius, snp_i)
                stop_i = min(m, snp_i + ld_radius + 1)

                # Local LD matrix
                D_i = ld_dict[snp_i]

                # Local (most recently updated) effect estimates
                local_betas = curr_betas[start_i:stop_i]

                # Calculate the local posterior mean, used when sampling.
                local_betas[focal_i] = 0.0
                res_beta_hat_i = beta_hats[snp_i] - sp.dot(D_i, local_betas)
                b2 = res_beta_hat_i**2

                d_const_b2_exp = d_const * sp.exp(-b2 * n / 2.0)
                if sp.isreal(d_const_b2_exp):
                    numerator = c_const * sp.exp(-b2 / (2.0 * hdmpn))
                    if sp.isreal(numerator):
                        if numerator == 0.0:
                            postp = 0.0
                        else:
                            postp = numerator / (numerator + d_const_b2_exp)
                            assert sp.isreal(
                                postp
                            ), 'The posterior mean is not a real number?  Possibly due to problems with summary stats, LD estimates, or parameter settings.'
                    else:
                        postp = 0.0
                else:
                    postp = 1.0
                curr_post_means[snp_i] = hdmp_hdmpn * postp * res_beta_hat_i

                if rand_ps[i] < postp * alpha:
                    # Sample from the posterior Gaussian dist.
                    proposed_beta = rand_norms[i] + hdmp_hdmpn * res_beta_hat_i

                else:
                    # Sample 0
                    proposed_beta = 0.0

                curr_betas[snp_i] = proposed_beta  # UPDATE BETA
        else:
            for i, snp_i in enumerate(iter_order):
                start_i = ld_boundaries[snp_i][0]
                stop_i = ld_boundaries[snp_i][1]
                focal_i = snp_i - start_i

                # Local LD matrix
                D_i = ld_dict[snp_i]

                # Local (most recently updated) effect estimates
                local_betas = curr_betas[start_i:stop_i]

                # Calculate the local posterior mean, used when sampling.
                local_betas[focal_i] = 0.0
                res_beta_hat_i = beta_hats[snp_i] - sp.dot(D_i, local_betas)
                b2 = res_beta_hat_i**2

                d_const_b2_exp = d_const * sp.exp(-b2 * n / 2.0)
                if sp.isreal(d_const_b2_exp):
                    numerator = c_const * sp.exp(-b2 / (2.0 * hdmpn))
                    if sp.isreal(numerator):
                        if numerator == 0.0:
                            postp = 0.0
                        else:
                            postp = numerator / (numerator + d_const_b2_exp)
                            assert sp.isreal(
                                postp
                            ), 'Posterior mean is not a real number? Possibly due to problems with summary stats, LD estimates, or parameter settings.'
                    else:
                        postp = 0.0
                else:
                    postp = 1.0
                curr_post_means[snp_i] = hdmp_hdmpn * postp * res_beta_hat_i

                if rand_ps[i] < postp * alpha:
                    # Sample from the posterior Gaussian dist.
                    proposed_beta = rand_norms[i] + hdmp_hdmpn * res_beta_hat_i

                else:
                    # Sample 0
                    proposed_beta = 0.0

                curr_betas[snp_i] = proposed_beta  # UPDATE BETA
        if verbose:
            sys.stdout.write('\b\b\b\b\b\b\b%0.2f%%' %
                             (100.0 * (min(1,
                                           float(k + 1) / num_iter))))
            sys.stdout.flush()

        if k >= burn_in:
            avg_betas += curr_post_means  # Averaging over the posterior means instead of samples.

    avg_betas = avg_betas / float(num_iter - burn_in)
    t1 = time.time()
    t = (t1 - t0)
    if verbose:
        print('\nTook %d minutes and %0.2f seconds' % (t / 60, t % 60))
    return {'betas': avg_betas, 'inf_betas': start_betas}
예제 #45
0
def trim_nearby_peaks(peaks, dt):
    r"""
    Finds pairs of peaks that are nearer to each other than to the solid phase,
    and removes the peak that is closer to the solid.

    Parameters
    ----------
    peaks : ND-array
        A boolean image containing True values to mark peaks in the distance
        transform (``dt``)

    dt : ND-array
        The distance transform of the pore space for which the true peaks are
        sought.

    Returns
    -------
    image : ND-array
        An array the same size as ``peaks`` containing a subset of the peaks
        in the original image.

    Notes
    -----
    Each pair of peaks is considered simultaneously, so for a triplet of peaks
    each pair is considered.  This ensures that only the single peak that is
    furthest from the solid is kept.  No iteration is required.

    References
    ----------
    [1] Gostick, J. "A versatile and efficient network extraction algorithm
    using marker-based watershed segmenation".  Physical Review E. (2017)
    """
    peaks = sp.copy(peaks)
    if dt.ndim == 2:
        from skimage.morphology import square as cube
    else:
        from skimage.morphology import cube
    peaks, N = spim.label(peaks, structure=cube(3))
    crds = spim.measurements.center_of_mass(peaks, labels=peaks,
                                            index=sp.arange(1, N+1))
    crds = sp.vstack(crds).astype(int)  # Convert to numpy array of ints
    # Get distance between each peak as a distance map
    tree = sptl.cKDTree(data=crds)
    temp = tree.query(x=crds, k=2)
    nearest_neighbor = temp[1][:, 1]
    dist_to_neighbor = temp[0][:, 1]
    del temp, tree  # Free-up memory
    dist_to_solid = dt[tuple(crds.T)]  # Get distance to solid for each peak
    hits = sp.where(dist_to_neighbor < dist_to_solid)[0]
    # Drop peak that is closer to the solid than it's neighbor
    drop_peaks = []
    for peak in hits:
        if dist_to_solid[peak] < dist_to_solid[nearest_neighbor[peak]]:
            drop_peaks.append(peak)
        else:
            drop_peaks.append(nearest_neighbor[peak])
    drop_peaks = sp.unique(drop_peaks)
    # Remove peaks from image
    slices = spim.find_objects(input=peaks)
    for s in drop_peaks:
        peaks[slices[s]] = 0
    return (peaks > 0)
예제 #46
0
def main(argv=None):
    print("")
    print("pyOZ - iterative solver of the Ornstein-Zernike equation")
    print("version %s, Lubos Vrbka, 2008-2009" % pyoz_version)
    print("")

    if (argv == None):
        argv = sys.argv

    # parse the input file
    cmdline = inputdata.parse_cmdline(argv)
    # if -o is specified on command line, then it is used as stdout
    # if -o is not specified, then console is used
    if (cmdline['output'] != None):
        try:
            sys.stdout = open(cmdline['output'], "wt")
        except IOError as msg:
            sys.stdout = sys.__stdout__
            print("error opening output file %s" % cmdline['output'])
            print(msg)
            sys.exit(2)
        sys.stderr.write("output redirected to " + cmdline['output'] + "\n")

    # parse the control file with settings and parameters
    # return 4 collections and class with constants
    ctrl, syst, parm, outp, const = inputdata.parse_input(cmdline)

    # allocate distance arrays
    # array of distance in real space
    r = np.array(list(map(lambda x: (x + 1) * ctrl['deltar'], range(ctrl['npoints']))))
    # array of distances in reciprocal space
    k = np.array(list(map(lambda x: (x + 1) * ctrl['deltak'], range(ctrl['npoints']))))

    # initialize the DFT class
    print("initializing DFT routines")
    dft = ft.dft(ctrl['npoints'], ctrl['deltar'], ctrl['deltak'], r, k)
    dft.print_status()
    print("")

    # initialize the plotting subsystem if requested
    if (ctrl['do_graphics']):
        import pyoz_plot
        pyoz_plot.plot_initialize(ctrl, syst, const, r)
    # end if(do_graphics):

    # calculate the total U_ij potential, contributions of individual potentials (hs, lj, coulomb, ...)
    # also numerical derivatives of the contributions (where no analytical form is available)
    # and get the information on discontinuities
    # calculate also the erf-corrected direct correlation functions in real and fourier space
    # according to Ng
    U_ij, U_ij_individual, dU_ij_individual, U_discontinuity, U_erf_ij = potential.def_potential(
        ctrl, syst, parm, const, dft, r, k)
    # write the pair potential to the file, if requested
    if (outp['U_ij_write']):
        print("writing pair potential\t(%s)" % outp['U_ij_name'])
        try:
            fw = open(outp['U_ij_name'], "wt")
            for dr in range(ctrl['npoints']):
                fw.write("%8.3f" % r[dr])
                for i in range(syst['ncomponents']):
                    for j in range(i, syst['ncomponents']):
                        fw.write("%20.5e" % U_ij[i, j, dr])
                # end for i,j in range ncomponents...
                fw.write("\n")
            fw.close()
        except IOError as msg:
            print("error while saving interaction potential")
            print(msg)
            sys.exit(1)
        print("")

    # calculate the exp(-beta U_ij) for total potential (Mayer function + 1)
    # calculate the exp(-beta U_ij) for all individual potentials (hs, lj, coulomb) and evaluate
    # discontinuities where necessary
    # calculate the erf-correction contribution exp(U_erf_ij)
    # store all in a dictionary modMayerFunc
    modMayerFunc = potential.def_modMayerFunc(
        syst, U_ij, U_ij_individual, U_discontinuity, U_erf_ij['real'])
    # store the mayer function itself for the purpose of CG procedure with PY closure
    M = modMayerFunc['u_ij'] - 1.0

    # allocate arrays with direct, total and pair corr. functions, Gamma function
    # some arrays will emerge from arithmetic operations,
    # showing them here makes the code clearer
    # real space: _r_, Fourier space _f_
    # direct correlation function with Ng-correction (cs) applied
    # direct correlation function without (c) and with (C) density factor applied
    # w/o density correction in real space; w/ density correction in Fourier space
    # c_r_ij
    C_f_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    Cs_f_ij = np.zeros(
        (syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    # cs_r_ij
    # pair correlation function
    # g_r_ij
    # h in Fourier space without (h) and with (H) density factor applied
    # H_f_ij
    # matrix of partial structure factors
    # S
    # actual, old (o) and new (n) values for Gamma
    # these are short-ranged Gamma (see the code for more details)
    G_r_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    # G_o_ij
    G_n_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    G_f_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))

    # identity matrix for the solver (dr copies!)
    e_ij = np.eye(syst['ncomponents'])
    #o_ij = ones((syst['ncomponents'],syst['ncomponents']))
    E_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    #O_ij = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    for dr in range(ctrl['npoints']):
        E_ij[:, :, dr] = e_ij
        #O_ij[:,:,dr] = o_ij
    # zero array for the newton-raphson
    Z = np.zeros((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    # arrays for the Newton-Raphson procedure (allocated even if not used)
    CFXq = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    AX = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    AXq = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    Rq = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    SRS = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))
    SRSq = np.empty((syst['ncomponents'], syst['ncomponents'], ctrl['npoints']))

    # process initial Gamma for the first iteration
    # IMPORTANT: do not forget, that we are dealing with the short-range Gamma all the time
    # in case of noncharged systems, it is equal to normal Gamma
    if (cmdline['gamma'] != None):
        print("attempting load of G_ij from %s" % cmdline['gamma'])
        # attempt load of data from external file
        # short-ranged Gamma is loaded
        try:
            if (cmdline['binarygamma']):
                # binary file
                G_r_ij = fromfile(cmdline['gamma'])
            else:
                # text file
                G_r_ij = fromfile(cmdline['gamma'], float, -1, " ")
            # end if (cmdline['binarygamma']):
            G_r_ij.shape = (syst['ncomponents'],
                            syst['ncomponents'], ctrl['npoints'])
        # end of the try block
        except:
            print("\tload failed, using zero Gamma function")
            # set G_r_ij to a zero Gamma and apply the Ng-correction
            # in reality the short-ranged Gamma function is then not zero, but the original Gamma is
            # when no long-ranged potential (coulomb) is present, the correction is zero
            G_r_ij = -U_erf_ij['real']
        else:
            print("\tsuccesfully loaded")
            # symmetrize the 'matrix' for all values of r
            # G(1,2) = G(2,1) - the pair potentials are symmetric
            for dr in range(ctrl['npoints']):
                G_r_ij[:, :, dr] = (G_r_ij[:, :, dr] +
                                    G_r_ij[:, :, dr].transpose()) / 2
        # end else of the try/except/else block
    else:
        # the Gamma is not loaded - we have zero Gamma, but need to apply the Ng-correction
        # in order to have short-range Gamma
        print("using zero Gamma function")
        G_r_ij = -U_erf_ij['real']
    # end if (cmdline['gamma'] == None)
    print("")

    # update the plot if requested
    if (ctrl['do_graphics']):
        # Gamma will be updated after the closure is called - just to save some unnecessary calls
        pyoz_plot.plot_update(
            syst, const, U_r=U_ij, U_erf=U_erf_ij['real'], G_r=None, c_r=None, g_r=None, c_f=None)
    # end if(do_graphics):

    # the last thing to decide - which solver will be used?
    # linalg.solve is not very efficient on 1x1 and 2x2 matrices that are most frequently used
    # let's try with own functions for such cases
    if (syst['ncomponents'] == 1):
        from pyoz_solver import solver_1 as solver_function
        print("using optimized solver for 1 component")
    elif (syst['ncomponents'] == 2):
        # solver_2 works but is slower!
        from pyoz_solver import solver_2 as solver_function
        print("using optimized solver for 2 components")
    elif (syst['ncomponents'] == 3):
        from pyoz_solver import solver_n as solver_function
        print("using numpy linalg solver for 3 components")
    elif (syst['ncomponents'] > 3):
        from pyoz_solver import solver_n as solver_function
        print("using numpy linalg solver")
    # the correct solver has been selected

    print("\nstarting iteration\n==================")

    total_iter = 0
    converged = 0
    niter = 0

    while (not converged and niter < ctrl['max_iter']):
        # timing purposes
        time_beg = time()

        niter += 1
        total_iter += 1

        print("main\t%4u     " % niter),

        # show "progress bar" when output is redirected
        if (cmdline['output'] != None):
            sys.stderr.write(".")
            sys.stderr.flush()
            if ((total_iter % 25) == 0):
                sys.stderr.write("\n")

        # create copy of original Gamma function
        G_o_ij = np.copy(G_r_ij)

        # call closure relation and get c_r_ij
        # the Ng-formalism is already applied, the erf-correction is taken care of
        # in the definition of the modified Mayer function
        cs_r_ij, g_r_ij = syst['closure'](
            syst, r, modMayerFunc, U_discontinuity, G_r_ij)

        # update the plot if requested
        if (ctrl['do_graphics']):
            pyoz_plot.plot_update(
                syst, const, U_r=None, U_erf=None, G_r=G_r_ij, c_r=cs_r_ij, g_r=g_r_ij, c_f=None)
        # end if(do_graphics):

        # FT c_r_ij to c_f_ij
        # we are using Fourier-sine transform; there are some steps involved in between FBT (Bessel) and FST
        # this will not be discussed here, check the documentation and pyoz_dft.py for further information
        # the whole program is using FTs normalized with the density prefactors
        # sqrt(rho_i * rho_j) in order to have dimensionless functions in k-space
        # i.e., the FTs are multiplied by this factor, iFTs are divided by this factor
        # it follows then, that infinite dilution is taken care of there as well
        for i in range(syst['ncomponents']):
            for j in range(syst['ncomponents']):
                # perform the Fourier-Bessel transform of the short-ranged direct correlation function
                # compensate for the ng correction, return short-ranged and full c in fourier space
                (Cs_f_ij[i, j], C_f_ij[i, j]) = dft.dfbt(
                    cs_r_ij[i, j], norm=syst['dens']['ij'][i, j], corr=-U_erf_ij['fourier'][i, j])
            # end for j in range(ncomponents)
        # end for i in range(ncomponents)

        # update the plot if requested
        if (ctrl['do_graphics']):
            pyoz_plot.plot_update(
                syst, const, U_r=None, U_erf=None, G_r=None, c_r=None, g_r=None, c_f=C_f_ij)
        # end if(do_graphics):

        # now we have to solve the matrix problem in the Fourier space
        # note that the convolution theorem involves a constant factor ('a')
        # depending on the used forward fourier transform normalization constant
        # H = C + aCH
        # H - aCH = C
        # (E - aC)H = C
        # H = {E - aC}^-1 * C
        # however, thanks to the normalization chosen so that for FT it is 1, we can write
        # H = {E - C}^-1 * C
        # E + H = {E - C}^-1 * (C + E - C)
        # S = {E - C}^-1 * E
        H_f_ij = solver_function(
            (E_ij - dft.ft_convolution_factor * C_f_ij), C_f_ij, ctrl['npoints'])
        from math import pi
        S = E_ij + H_f_ij
        #S = solver_function((E_ij - C_f_ij), E_ij, ctrl['npoints'])

        # convert H to short ranged Gamma G(k) = H(k) - Cs(k)
        #G_f_ij = H_f_ij - Cs_f_ij
        # convert S to short ranged Gamma G(k) = S(k) - E - Cs(k)
        G_f_ij = S - E_ij - Cs_f_ij

        # FT G_f_ij to G_r_ij
        for i in range(syst['ncomponents']):
            for j in range(syst['ncomponents']):
                # perform the inverse Fourier transform of the Gamma function
                G_n_ij[i, j] = dft.idfbt(
                    G_f_ij[i, j], norm=syst['dens']['ij'][i, j], corr=-U_erf_ij['real'][i, j])
            # end for j in range(ncomponents)
        # end for i in range(ncomponents)

        # *********************************************************************************************

        # test for convergence and write the gamma if everything is OK
        norm_dsqn = convergence_dsqn(ctrl, syst, G_o_ij, G_n_ij)
        time_end = time()
        print("%f sec - DSQN %.3e -" % ((time_end - time_beg), norm_dsqn)),
        if (norm_dsqn > ctrl['max_dsqn'] or (not np.isfinite(norm_dsqn))):
            print("\nDSQN too large, calculation is probably diverging")
            print("check inputs and outputs and/or increase the value of max_dsqn (%e at the moment)" %
                  ctrl['max_dsqn'])
            sys.exit(2)

        if (norm_dsqn <= ctrl['convergence_crit']):
            print("converged")
            converged = 1
        else:
            print("not converged")

            # test if we do picard or newton-raphson
            if (not ctrl['do_nr']):
                # perform the picard mixing
                # calculate the new Gamma
                G_r_ij = (1.0 - ctrl['mix_param']) * \
                    G_o_ij + ctrl['mix_param'] * G_n_ij
            else:
                # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
                # Newton-Raphson/Conjugate gradients method

                # we are trying to solve the problem AX = B where
                # A is a linear operator, X is dgamma and B is difference between input and output gamma
                # we are using iterative method to get the solution (non-symmetric conjugate gradients)
                # more details are given in my notes or in
                # Zerah: J Comp Phys 61 1985, 280
                # Belloni: J Chem Phys 88 (8) 1988, 5143
                # the names of the respective valuables in the following section will be kept consistent
                # with the papers

                nr_converged = 0
                nr_niter = 0

                # calculate the convergence criterion
                # it's done relatively to the DSQN of the main cycle in order to avoid
                # unnecessary iterations in the beginning, where the linear
                # approximation is not exact
                nr_convergence_crit = norm_dsqn * ctrl['nr_convergence_factor']

                # calculate B, as defined in the paper
                B = G_n_ij - G_o_ij

                # now initialize the system for step n=0 - iteration 1 will provide X(1)
                # the values of X(n) and X(n-1) needed for the iterative algorithm
                # let's choose X(0)=X(-1)=const * B
                # X(n)=Xcur; X(n-1)=Xold; X(n+1)=Xnew
                Xold = B * ctrl['nr_mix_param']
                Xcur = copy(Xold)
                Xnew = copy(Xold)

                # R(0) definition
                Rcur = 0.0
                # alpha(0) definition - will use L instead!
                Lcur = 0.0

                # for the first iteration, set W(2) to 1.0
                Wnew = 1.0
                # we don't need Wcur here, but let's define it...
                Wcur = 1.0

                # we will also need matrices S (both HNC and PY) and H (HNC) or M (PY - Mayer function) for the operator A (defined in the nr-cycle)
                # we use functions from the main cycle
                # we do it with arrays here and convert to matrices where needed and appropriate
                # we have to take the H using the old Gamma! i.e., taking g(r) provided by the closure shortly after the main iteration
                # cycle is started and subtracting 1
                H = g_r_ij - 1.0
                # modMayerFunc is MayerFunc + 1.0
                # this has been done before, so commenting out
                #M = modMayerFunc['u_ij'] - 1.0

                # the arrays for the operations involving operators A and At were created during the initialization

                # the code will operate with the matrix CF (closure factor), which is set according to closure
                # to either H (total correlation function, for HNC) or to M (mayer function, for PY)
                # check below for the algorithm details
                if (syst['closure_name'] == 'hnc'):
                    CF = H.copy()
                elif (syst['closure_name'] == 'py'):
                    CF = M
                else:
                    sys.stderr.write("unsupported closure! \n")
                    sys.exit(1)

                # timing purposes
                nr_time_beg = time()

                # increase the counter for the next half-iteration
                total_iter += 1

                # we make one half-iteration (number 0) and then carry on with full cycles until convergence
                while (not nr_converged and nr_niter <= ctrl['nr_max_iter']):
                    print("  nr/cg\t    :%-4u" % (nr_niter)),
                    # show "progress bar" when output is redirected - this time with plus sign
                    # normal iterations are done with "." as an indicator
                    if (cmdline['output'] != None):
                        sys.stderr.write("+")
                        sys.stderr.flush()
                        if ((total_iter % 25) == 0):
                            sys.stderr.write("\n")
                    # end if (cmdline['output'] != None)

                    # 'shift' the respective functions/values) (except R, which will be done later)
                    Xold = copy(Xcur)
                    Xcur = copy(Xnew)
                    # store R from previous iteration R(n) to R(n-1)
                    Rold = copy(Rcur)
                    # store alpha (number) from previus iteraation alpha(n) to alpha(n-1)
                    Lold = Lcur
                    # store W (number) from previous iteration W(n+1) to W(n)
                    Wcur = Wnew

                    # for matricial relations (part of operators A and At) we need to do everything separately for each discretization step
                    # perform the calculation of AX
                    # this will be done in several steps since
                    # !!!!!!!!!!!!!!!!!!! in HNC !!!!!!!!!!!!!!!!!!!
                    # AX = 1X - iFT ( S FT(HX) S - FT(HX))
                    # !!!!!!!!!!!!!!!!!!! in PY !!!!!!!!!!!!!!!!!!!
                    # AX = 1X - iFT ( S FT(MX) S - FT(MX))
                    # where M is the Mayer function exp(-betaU) - 1

                    # the code will operate with the matrix CF (closure factor), which is set according to closure
                    # to either H (total correlation function, for HNC) or to M (mayer function, for PY)
                    # this was done outside of this cycle

                    # H.X is not matricial product!
                    CFX = CF*Xcur

                    for i in range(syst['ncomponents']):
                        for j in range(syst['ncomponents']):
                            CFXq[i, j] = dft.dfbt(CFX[i, j])[0]

                    # matricial products here
                    for dr in range(ctrl['npoints']):
                        AXq[:, :, dr] = mat(
                            S[:, :, dr])*mat(CFXq[:, :, dr])*mat(S[:, :, dr]) - mat(CFXq[:, :, dr])

                    for i in range(syst['ncomponents']):
                        for j in range(syst['ncomponents']):
                            AX[i, j] = Xcur[i, j] - dft.idfbt(AXq[i, j])

                    # calculate Rcur = R(n) = B - AX(n)
                    # do the calculation
                    Rcur = B - AX

                    # check for convergence here - if converged, abandon the cycle!
                    # we check how far is Rcur from zero (Z is zero array with the same dimensions as Rcur)
                    nr_norm_dsqn = convergence_dsqn(ctrl, syst, Rcur, Z)
                    #nr_norm_dsqn = convergence_dsqn(ctrl, syst, B, AX)

                    nr_time_end = time()

                    # convergence is tested relatively to the DSQN of the 'outer' cycle
                    print("%f sec - rel. DSQN %.3e -" %
                          ((nr_time_end - nr_time_beg), nr_norm_dsqn/norm_dsqn)),
                    if (nr_norm_dsqn > ctrl['max_dsqn'] or (not isfinite(nr_norm_dsqn))):
                        print("\n\tDSQN too large, calculation is probably diverging")
                        break

                    if (nr_norm_dsqn <= nr_convergence_crit):
                        print("converged")
                        nr_converged = 1
                    else:
                        # the conjugate gradients algorithm needed - NR has not converged
                        print("not converged")

                        nr_time_beg = time()

                        nr_niter += 1
                        total_iter += 1

                        # perform the calculation of AtR
                        # this will be done in several steps since and At is an adjoint of the operator A
                        # !!!!! in HNC !!!!!
                        # AT R = 1R - FT (S iFT(R) S - iFT(R))H
                        # !!!!! in PY !!!!!
                        # AT R = 1R - FT (S iFT(R) S - iFT(R))M

                        # the operator works with the matrix CF, set according to the used closure
                        for i in range(syst['ncomponents']):
                            for j in range(syst['ncomponents']):
                                # even though R is r-space function, we are using the inverse FT here
                                # the definition of the adjoint requires its usage here!
                                # the problem is also the normalization sqrt(rho_i rho_j) that would be applied
                                # incorrectly in case normal FT would be used here
                                # note that the sinus transform is the same in r- and k-spaces => the difference really
                                # lies in the normalization
                                #Rq[i,j] = dft.dfbt(Rcur[i,j])[0]
                                Rq[i, j] = dft.idfbt(Rcur[i, j])

                        # matricial product
                        for dr in range(ctrl['npoints']):
                            SRS[:, :, dr] = mat(
                                S[:, :, dr])*mat(Rq[:, :, dr])*mat(S[:, :, dr])-mat(Rq[:, :, dr])

                        for i in range(syst['ncomponents']):
                            for j in range(syst['ncomponents']):
                                # even though SRS is k-space function, we are using the forward FT here
                                # the definition of the adjoint requires its usage here!
                                # the problem is also the normalization sqrt(rho_i rho_j) that would be applied
                                # incorrectly in case iFT would be used here
                                # note that the sinus transform is the same in r- and k-spaces => the difference really
                                # lies in the normalization
                                # remember that FT returns 2 functions in this case
                                #SRSq[i,j] = dft.idfbt(SRS[i,j])
                                SRSq[i, j] = dft.dfbt(SRS[i, j])[0]

                        # not a matricial product!
                        AtR = Rcur - SRSq*CF

                        # calculate Lcur = alpha(n) = (R(n),R(n))/(AtR(n),AtR(n))
                        # where (Y,Z) is inner product \sum_ij rho_i rho_j \int Y_ij Z_ij 4 \pi r^2 dr
                        Lcur = abs(dotproduct(ctrl, syst, r, Rcur, Rcur)) / \
                            abs(dotproduct(ctrl, syst, r, AtR, AtR))

                        # calculate Wnew = W(n+1) (except for first iteration)
                        if (nr_niter != 1):
                            # do the full calculation, in the first iteration the value is pre-set to 1.0
                            Wpartial = 1.0 - Lcur * abs(dotproduct(ctrl, syst, r, Rcur, Rcur))/(
                                Lold * Wcur * abs(dotproduct(ctrl, syst, r, Rold, Rold)))
                            Wnew = 1.0 / Wpartial
                        # end calculation of Wnew

                        # calculate X(n+1) = X(n-1) + W(n+1)(alpha(n)ATR(n) + X(n) + X(n-1)
                        Xnew = Xold + Wnew * (Lcur * AtR + Xcur - Xold)
                    # end if (nr_norm_dsqn <= nr_convergence_crit) - handling of the else-branch (not converged)
                # end while (not nr-converged and nr_niter < ctrl['nr_max_iter'])

                # in case convergence was not reached, do Picard
                if (not nr_converged):
                    print("\tcouldn't converge NR/CG cycle,"),
                    if (not ctrl['nr_noconv_incr']):
                        print("using Picard iteration instead")
                        G_r_ij = (1.0 - ctrl['mix_param']) * \
                            G_o_ij + ctrl['mix_param'] * G_n_ij
                    else:
                        print("using non-converged increment")
                        G_r_ij = G_o_ij + Xnew
                else:
                    #G_r_ij = G_o_ij + Xcur
                    G_r_ij = G_o_ij + Xnew

                # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            # end else (Newton-Raphson method)
        # end else (calculation not converged)

        # output data
        # test whether some function of interest should be saved
        # if savefreq is 0, then skip
        # first element of the result of modf() call is a remainder after division
        # if it is zero, then save the file...
        # Gamma function
        if ((outp['G_ij_write']) and (outp['G_ij_savefreq'] != 0) and (not divmod(niter, outp['G_ij_savefreq'])[1])):
            # store the Gamma function
            print("\tGamma function stored")
            try:
                if (outp['G_ij_binary']):
                    G_r_ij.tofile(outp['G_ij_name'])
                else:
                    G_r_ij.tofile(outp['G_ij_name'], " ", "%e")
            except IOError as msg:
                print("error while saving Gamma function")
                print(msg)
                sys.exit(1)
        # end if

        # *********************************************************************************************

    # end while (not converged)

    print("\niteration process completed in iteration %u" % niter)
    if (converged):
        print("\tcalculation converged")
    else:
        print("\tcalculation not converged; maximum number of iterations reached\n")

    # do closure
    cs_r_ij, g_r_ij = syst['closure'](
        syst, r, modMayerFunc, U_discontinuity, G_r_ij)
    # and evaluate uncorrected c(r) as well
    c_r_ij = cs_r_ij - U_erf_ij['real']

    # update the plot if requested
    if (ctrl['do_graphics']):
        pyoz_plot.plot_update(syst, const, U_r=None, U_erf=None,
                              G_r=G_r_ij, c_r=cs_r_ij, g_r=g_r_ij, c_f=None)
    # end if(do_graphics):

    print("\nsaving outputs")
    # some error checking should be added here! for both g, G
    try:
        # save g_r_ij to file
        if (outp['g_ij_write']):
            print("\tpair correlation function\t(%s)" % outp['g_ij_name'])
            fw = open(outp['g_ij_name'], "wt")
            fw.write("%8.3f" % 0.0)
            for i in range(syst['ncomponents']):
                for j in range(i, syst['ncomponents']):
                    fw.write("%10.5f" % 0.0)
            fw.write("\n")
            for dr in range(ctrl['npoints']):
                fw.write("%8.3f" % r[dr])
                for i in range(syst['ncomponents']):
                    for j in range(i, syst['ncomponents']):
                        fw.write("%10.5f" % g_r_ij[i, j, dr])
                # end for i,j in range ncomponents...
                fw.write("\n")
            fw.close()

        # save c_r_ij to file
        if (outp['c_ij_write']):
            print("\tdirect correlation function\t(%s)" % outp['c_ij_name'])
            fw = open(outp['c_ij_name'], "wt")
            fw.write("%8.3f" % 0.0)
            for i in range(syst['ncomponents']):
                for j in range(i, syst['ncomponents']):
                    fw.write("%10.5f" % 0.0)
            fw.write("\n")
            for dr in range(ctrl['npoints']):
                fw.write("%8.3f" % r[dr])
                for i in range(syst['ncomponents']):
                    for j in range(i, syst['ncomponents']):
                        # careful, we have to write the complete c(r), i.e., we need to compensate for the
                        # Ng-correction!
                        # cs(r) = c(r) + Ucorr(r) => c(r) = cs(r) - Ucorr(r)
                        fw.write("%10.5f" % c_r_ij[i, j, dr])
                # end for i,j in range ncomponents...
                fw.write("\n")
            fw.close()

        # save c_r_sr_ij to file
        if (outp['c_ij_sr_write']):
            print("\tdirect short range correlation function\t(%s)" % outp['c_ij_sr_name'])
            fw = open(outp['c_ij_sr_name'], "wt")
            fw.write("%8.3f" % 0.0)
            for i in range(syst['ncomponents']):
                for j in range(i, syst['ncomponents']):
                    fw.write("%10.5f" % 0.0)
            fw.write("\n")
            for dr in range(ctrl['npoints']):
                fw.write("%8.3f" % r[dr])
                for i in range(syst['ncomponents']):
                    for j in range(i, syst['ncomponents']):
                        # careful, we have to write the complete c(r), i.e., we need to compensate for the
                        # Ng-correction!
                        # cs(r) = c(r) + Ucorr(r) => c(r) = cs(r) - Ucorr(r)
                        fw.write("%10.5f" % c_r_sr_ij[i, j, dr]) + U_ij[i, j, dr]
                # end for i,j in range ncomponents...
                fw.write("\n")
            fw.close()

        #TODO: See below for the short range direct correlation fn c_r_ij_sr - copy above and write out too
        # c_r_ij_sr = c_r_ij + U_ij_individual[index]

        # save S to file
        if (outp['S_ij_write']):
            print("\tpartial structure factors\t(%s)" % outp['S_ij_name'])
            fw = open(outp['S_ij_name'], "wt")
            fw.write("%8.3f" % 0.0)
            for i in range(syst['ncomponents']):
                for j in range(i, syst['ncomponents']):
                    fw.write("%10.5f" % 0.0)
            fw.write("\n")
            for dr in range(ctrl['npoints']):
                fw.write("%8.3f" % k[dr])
                for i in range(syst['ncomponents']):
                    for j in range(i, syst['ncomponents']):
                        # careful, we have to write the complete c(r), i.e., we need to compensate for the
                        # Ng-correction!
                        # cs(r) = c(r) + Ucorr(r) => c(r) = cs(r) - Ucorr(r)
                        fw.write("%10.5f" % S[i, j, dr])
                # end for i,j in range ncomponents...
                fw.write("\n")
            fw.close()

        if (outp['G_ij_write']):
            print("\tGamma function\t\t\t(%s)" % outp['G_ij_name'])
            # store the Gamma function if required
            # short-ranged Gamma is saved!
            if (outp['G_ij_binary']):
                G_r_ij.tofile(outp['G_ij_name'])
            else:
                G_r_ij.tofile(outp['G_ij_name'], " ", "%e")

        # save total interaction (U+Gamma(long-ranged!)) to file
        if (outp['Utot_ij_write']):
            print("\ttotal potential (U+Gamma)\t(%s)" % outp['Utot_ij_name'])
            fw = open(outp['Utot_ij_name'], "wt")
            for dr in range(ctrl['npoints']):
                fw.write("%8.3f" % r[dr])
                for i in range(syst['ncomponents']):
                    for j in range(i, syst['ncomponents']):
                        fw.write("%20.5e" % (
                            U_ij[i, j, dr] + G_r_ij[i, j, dr] + U_erf_ij['real'][i, j, dr]))
                # end for i,j in range ncomponents...
                fw.write("\n")
            fw.close()
    # end try block
    except IOError as msg:
        print("error while saving output")
        print(msg)
        sys.exit(1)
    # end try/except block

    # now 'remove' the Ng-renormalization
    G_r_ij += U_erf_ij['real']
    # we also have the short range G in the Fourier space
    # if needed, it can be of course used here!
    # from now on, the G_r_ij is the real Gamma function without the
    # stuff making the convergence easier

    # calculate also the term involving Gamma in HNC (exp(Gamma)) and PY (1+Gamma)
    # and save it
    G_term_ij = calcGammaTerm(syst, G_r_ij)

    # calculation of thermodynamic properties
    print("\ncalculation of (thermodynamic) properties")
    if (converged):
        # only evaluate the properties for converged calculation!
        # all information is printed inside these functions

        # for excess chem potential and compressibility, we need short range version of c_ij
        # it's different short-range than the one coming from Ng (which is finite for r=0)
        # our is given just by c_{ij}^s = c_{ij} + \beta U_{ij}^{coulomb} since for coulomb,
        # c(r) = -\beta U_ij^{coulomb} for r->\infty
        # we check here, if coulomb potential is used and if yes, we just subtract the coulomb interaction from it
        # step 1 - get the index in the parm array, where coulomb info is stored. the same index is used in the
        # U_ij_individual array
        print("\ttesting for long-ranged potentials")
        index = -1
        for i in range(len(parm)):
            if ('coulomb' in parm[i].values()):
                index = i
        if (index >= 0):
            print("\t\tfound, using short-ranged c(r)\n")
            # ----- was done already! firstly, we get rid of ng, (subtract U_erf_ij) converting cs_r_ij to c_r_ij
            # then we add the coulomb (lr correction) as shown above, to get c_r_ij_sr
            #c_r_ij_sr = cs_r_ij -U_erf_ij['real'] + U_ij_individual[index]
            c_r_ij_sr = c_r_ij + U_ij_individual[index]
        else:
            print("\t\tnot found, using original c(r)\n")
            # in this case, ng-correction is zero and cs_r_ij is already the function we need
            c_r_ij_sr = c_r_ij

        # kirkwood-buff integrals
        properties.kirkwood_buff(ctrl, syst, r, g_r_ij)

        # osmotic coefficients
        properties.osmotic_coeff(ctrl, syst, parm, const, r, g_r_ij, G_r_ij, G_term_ij,
                                 U_ij_individual, dU_ij_individual, U_discontinuity, modMayerFunc['contrib'])

        # excess chemical potential/activity
        # only supported for hnc!
        if (syst['closure_name'] == 'hnc'):
            properties.excess_chempot(
                ctrl, syst, const, r, g_r_ij - 1.0, G_r_ij, c_r_ij_sr, index, parm)
        else:
            print("\texcess chemical potentials available only with HNC\n")

        # isothermal compressibility
        properties.compressibility(ctrl, syst, const, r, c_r_ij_sr)

    # end if (converged)
    else:
        print("\tnon-converged calculation, properties won't be evaluated\n")
    # end of calculation of thermodynamic properties

    print("calculation finished\n")
    if (cmdline['output'] != None):
        sys.stderr.write("\ncalculation finished\n")

    if(ctrl['do_graphics']):
        # stop in order not to destroy the window with the plotted functions
        sys.stderr.write("press enter to close the graphics window and exit\n")
        sys.stdin.readline()
예제 #47
0
 def get_kernel_basis(self):
     """ Returns the kernel as a kernel_dim x G numpy array """
     return sp.copy(self._kernel_basis)
P_UAV = 5000
Pc_UAV = 4000
rmin = 0.4*log(2)
tau_ini = Parameter(value=0.5)


# mat = spio.loadmat('XModel_Ex6_4D2D.mat', squeeze_me=True)
# mat = spio.loadmat('XModel_Ex6_6D2D.mat', squeeze_me=True)
# mat = spio.loadmat('XModel_Ex6_8D2D.mat', squeeze_me=True)
mat = spio.loadmat('XModel_Ex6_10D2D.mat', squeeze_me=True)
h_d2d = mat['h_d2d']
h_uav_d2d = mat['h_uav_d2d']
# ## the size of h_d2d is (num_d2d, num_d2d)
# ## the size of h_uav_d2d is (1,num_d2d)

max_d2d_gains_diff = sp.copy(h_d2d[:, :])
sp.fill_diagonal(max_d2d_gains_diff, 0)
d2d_to_d2d_gains_diff = max_d2d_gains_diff[:num_d2d, :num_d2d]
d2d_to_d2d_gains_diag = sp.subtract(h_d2d, d2d_to_d2d_gains_diff)


# # ############################################################
# # This code is used to solve the opt problem
# # ############################################################
t0 = time.time()

pow_co = Variable(num_d2d)
objective = Minimize( sum(pow_co)*sp.subtract(1, tau_ini.value) )

constraints = []
constraints.append(d2d_to_d2d_gains_diag * pow_co >= (exp(rmin / (1-tau_ini.value)) - 1) * (d2d_to_d2d_gains_diff*pow_co + 1))
예제 #49
0
def get_freq_modes_over_f(power_mat,
                          window_function,
                          frequency,
                          n_modes,
                          plots=False):
    """Fines the most correlated frequency modes and fits thier noise."""

    n_f = len(frequency)
    d_f = sp.mean(sp.diff(frequency))
    dt = 1. / 2. / frequency[-1]
    n_chan = power_mat.shape[-1]
    n_time = window_function.shape[0]
    # The threshold for assuming there isn't enough data to measure anything.
    no_data_thres = 10. / n_time
    # Initialize the dictionary that will hold all the parameters.
    output_params = {}
    # First take the low frequency part of the spetrum matrix and average over
    # enough bins to get a well conditioned matrix.
    low_f_mat = sp.mean(power_mat[:4 * n_chan, :, :].real, 0)
    # Factor the matrix to get the most correlated modes.
    e, v = linalg.eigh(low_f_mat)
    # Make sure they are sorted.
    if not sp.alltrue(sp.diff(e) >= 0):
        raise RuntimeError("Eigenvalues not sorted")
    # Power matrix striped of the biggest modes.
    reduced_power = sp.copy(power_mat)
    mode_list = []
    # Solve for the spectra of these modes.
    for ii in range(n_modes):
        this_mode_params = {}
        # Get power spectrum and window function for this mode.
        mode = v[:, -1 - ii]
        mode_power = sp.sum(mode * power_mat.real, -1)
        mode_power = sp.sum(mode * mode_power, -1)
        mode_window = sp.sum(mode[:, None]**2 * window_function, 1)
        mode_window = sp.sum(mode_window * mode[None, :]**2, 1)
        # Protect against no data.
        if sp.mean(mode_window).real < no_data_thres:
            this_mode_params['amplitude'] = 0.
            this_mode_params['index'] = 0.
            this_mode_params['f_0'] = 1.
            this_mode_params['thermal'] = T_infinity**2 * dt
        else:
            # Fit the spectrum.
            p = fit_overf_const(mode_power, mode_window, frequency)
            # Put all the parameters we measured into the output.
            this_mode_params['amplitude'] = p[0]
            this_mode_params['index'] = p[1]
            this_mode_params['f_0'] = p[2]
            this_mode_params['thermal'] = p[3]
        this_mode_params['mode'] = mode
        output_params['over_f_mode_' + str(ii)] = this_mode_params
        # Remove the mode from the power matrix.
        tmp_amp = sp.sum(reduced_power * mode, -1)
        tmp_amp2 = sp.sum(reduced_power * mode[:, None], -2)
        tmp_amp3 = sp.sum(tmp_amp2 * mode, -1)
        reduced_power -= tmp_amp[:, :, None] * mode
        reduced_power -= tmp_amp2[:, None, :] * mode[:, None]
        reduced_power += tmp_amp3[:, None, None] * mode[:, None] * mode
        mode_list.append(mode)
    # Initialize the compensation matrix, that will be used to restore thermal
    # noise that gets subtracted out.  See Jan 29, Feb 17th, 2012 of Kiyo's
    # notes.
    compensation = sp.eye(n_chan, dtype=float)
    for mode1 in mode_list:
        compensation.flat[::n_chan + 1] -= 2 * mode1**2
        for mode2 in mode_list:
            mode_prod = mode1 * mode2
            compensation += mode_prod[:, None] * mode_prod[None, :]
    # Now that we've striped the noisiest modes, measure the auto power
    # spectrum, averaged over channels.
    auto_spec_mean = reduced_power.view()
    auto_spec_mean.shape = (n_f, n_chan**2)
    auto_spec_mean = auto_spec_mean[:, ::n_chan + 1].real
    auto_spec_mean = sp.mean(auto_spec_mean, -1)
    diag_window = window_function.view()
    diag_window.shape = (n_time, n_chan**2)
    diag_window = diag_window[:, ::n_chan + 1]
    auto_spec_window = sp.mean(diag_window, -1)
    if sp.mean(auto_spec_window).real < no_data_thres:
        auto_cross_over = 0.
        auto_index = 0.
        auto_thermal = 0
    else:
        auto_spec_params = fit_overf_const(auto_spec_mean, auto_spec_window,
                                           frequency)
        auto_thermal = auto_spec_params[3]
        if (auto_spec_params[0] <= 0 or auto_spec_params[3] <= 0
                or auto_spec_params[1] > -0.599):
            auto_cross_over = 0.
            auto_index = 0.
        else:
            auto_index = auto_spec_params[1]
            auto_cross_over = auto_spec_params[2] * (
                auto_spec_params[0] / auto_spec_params[3])**(-1. / auto_index)
            #if auto_cross_over < d_f:
            #    auto_index = 0.
            #    auto_cross_over = 0.
    # Plot the mean auto spectrum if desired.
    if plots:
        h = plt.gcf()
        a = h.add_subplot(*h.current_subplot)
        norm = sp.mean(auto_spec_window).real
        auto_plot = auto_spec_mean / norm
        plotable = auto_plot > 0
        lines = a.loglog(frequency[plotable], auto_plot[plotable])
        c = lines[-1].get_color()
        # And plot the fit in a light color.
        if auto_cross_over > d_f / 4.:
            spec = npow.overf_power_spectrum(auto_thermal, auto_index,
                                             auto_cross_over, dt, n_time)
        else:
            spec = sp.zeros(n_time, dtype=float)
        spec += auto_thermal
        spec[0] = 0
        spec = npow.convolve_power(spec, auto_spec_window)
        spec = npow.prune_power(spec)
        spec = spec[1:].real
        if norm > no_data_thres:
            spec /= norm
        plotable = spec > 0
        a.loglog(frequency[plotable],
                 spec[plotable],
                 c=c,
                 alpha=0.4,
                 linestyle=':')
    output_params['all_channel_index'] = auto_index
    output_params['all_channel_corner_f'] = auto_cross_over
    # Finally measure the thermal part of the noise in each channel.
    cross_over_ind = sp.digitize([auto_cross_over * 4], frequency)[0]
    cross_over_ind = max(cross_over_ind, n_f // 2)
    cross_over_ind = min(cross_over_ind, int(9. * n_f / 10.))
    thermal = reduced_power[cross_over_ind:, :, :].real
    n_high_f = thermal.shape[0]
    thermal.shape = (n_high_f, n_chan**2)
    thermal = sp.mean(thermal[:, ::n_chan + 1], 0)
    thermal_norms = sp.mean(diag_window, 0).real
    bad_inds = thermal_norms < no_data_thres
    thermal_norms[bad_inds] = 1.
    # Compensate for power lost in mode subtraction.
    compensation[:, bad_inds] = 0
    compensation[bad_inds, :] = 0
    for ii in xrange(n_chan):
        if bad_inds[ii]:
            compensation[ii, ii] = 1.
    thermal = linalg.solve(compensation, thermal)
    # Normalize
    thermal /= thermal_norms
    thermal[bad_inds] = T_infinity**2 * dt
    # Occationally the compensation fails horribly on a few channels.
    # When this happens, zero out the offending indices.
    thermal[thermal < 0] = 0
    output_params['thermal'] = thermal
    # Now that we know what thermal is, we can subtract it out of the modes we
    # already measured.
    for ii in range(n_modes):
        mode_params = output_params['over_f_mode_' + str(ii)]
        thermal_contribution = sp.sum(mode_params['mode']**2 * thermal)
        # Subtract a maximum of 90% of the white noise to keep things positive
        # definate.
        new_white = max(mode_params['thermal'] - thermal_contribution,
                        0.1 * mode_params['thermal'])
        if mode_params['thermal'] < 0.5 * T_infinity**2 * dt:
            mode_params['thermal'] = new_white
    return output_params
예제 #50
0
## calculating the charge-symmetric case (n=0.5) to get Lambda0
if chat: print('#\n# calculating the charge-symmetric solution:')
if chat: print('# norm[G0]: {0: .6f}, n[G0]: {1: .6f}'\
.format(float(IntDOS(GFzero_A)),float(Filling(GFzero_A))))

if chat: print('# calculating the charge-symmetric two-particle bubble...')
Bubble_A = TwoParticleBubble(GFzero_A,GFzero_A,'eh') # Bubble[0] is negative
BubZero = Bubble_A[int(N/2)]
Uc = -1.0/sp.real(BubZero)
if chat: print('# - Bubble[0] = {0: .6f},   critical U = {1: .6f}'.format(BubZero,Uc))
if chat: print('# calculating the charge-symmetric Lambda vertex...')
Lambda = CalculateLambda(Bubble_A,GFzero_A,GFzero_A)
if chat: print('# - Lambda = {0: .6f}'.format(Lambda))

SigmaT = SigmaT_old = -ed
GFtherm_A = sp.copy(GFzero_A)
nT = 0.5
k = 1
## calculating the non-charge-symmetric case ##############
if ed!=0.0:
	nTold = 1e5
	Lambdaold = 1e5
	if chat: print('#\n# calculating the non-symmetric solution:')
	if chat: print('# iterating the thermodynamic self-energy SigmaT, mixing: alpha = {0: .3f}:'\
	.format(float(alpha)))
	while any([sp.fabs(nT-nTold)>epst,sp.fabs(SigmaT-SigmaT_old)>epst]):
		nTold = nT
		Lambdaold = Lambda
		SigmaT_old = SigmaT
		if GFtype in ['cubic','square']: GFtherm_A = ShiftGreensFunction(GFzero_A,-ed-SigmaT)
		else:                     GFtherm_A = GFlambda(En_A-ed-SigmaT)
예제 #51
0
파일: tools.py 프로젝트: Python3pkg/OpenPNM
def subdivide(network, pores, shape, labels=[]):
    r'''
    It trim the pores and replace them by cubic networks with the sent shape.

    Parameters
    ----------
    network : OpenPNM Network Object

    pores : array_like
        The first group of pores to be replaced

    shape : array_like
        The shape of cubic networks in the target locations

    Notes
    -----
    - It works only for cubic networks.

    Examples
    --------
    >>> import OpenPNM
    >>> pn = OpenPNM.Network.Cubic(shape=[5,6,5], spacing=0.001)
    >>> pn.Np
    150
    >>> nano_pores = [2,13,14,15]
    >>> pn.subdivide(pores=nano_pores, shape=[4,7,3], labels='nano')
    >>> pn.Np
    482
    >>> assert pn.Np == (150+4*(4*7*3)-4)

    '''
    mro = [item.__name__ for item in network.__class__.__mro__]
    if 'Cubic' not in mro:
        raise Exception('Subdivide is only supported for Cubic Networks')
    from OpenPNM.Network import Cubic
    pores = _sp.array(pores, ndmin=1)

    # Checks to find boundary pores in the selected pores
    if 'pore.boundary' in network.labels():
        if (_sp.in1d(pores, network.pores('boundary'))).any():
            raise Exception('boundary pores cannot be subdivided!')
    if not hasattr(network, '_subdivide_flag'):
        network._subdivide_flag = True
    else:
        raise Exception('The network has subdivided pores, so the method ' +
                        'does not support another subdivision.')
    # Assigning right shape and division
    if _sp.size(shape) != 2 and _sp.size(shape) != 3:
        raise Exception('Subdivide not implemented for Networks other than 2D \
                         and 3D')
    elif _sp.size(shape) == 3 and 1 not in shape:
        div = _sp.array(shape, ndmin=1)
        single_dim = None
    else:
        single_dim = _sp.where(_sp.array(network._shape) == 1)[0]
        if _sp.size(single_dim) == 0:
            single_dim = None
        if _sp.size(shape) == 3:
            div = _sp.array(shape, ndmin=1)
        else:
            div = _sp.zeros(3, dtype=_sp.int32)
            if single_dim is None:
                dim = 2
            else:
                dim = single_dim
            div[dim] = 1
            div[-_sp.array(div, ndmin=1, dtype=bool)] = _sp.array(shape,
                                                                  ndmin=1)

    # Creating small network and handling labels
    network_spacing = network._spacing
    new_net_spacing = network_spacing/div
    new_net = Cubic(shape=div, spacing=new_net_spacing)
    main_labels = ['left', 'right', 'front', 'back', 'top', 'bottom']
    if single_dim is not None:
        label_groups = _sp.array([['front', 'back'],
                                  ['left', 'right'],
                                  ['top', 'bottom']])
        non_single_labels = label_groups[_sp.array([0, 1, 2]) != single_dim]
    for l in main_labels:
        new_net['pore.surface_' + l] = False
        network['pore.surface_' + l] = False
        if single_dim is None:
            new_net['pore.surface_' + l][new_net.pores(labels=l)] = True
        else:
            for ind in [0, 1]:
                loc = (non_single_labels[ind] == l)
                temp_pores = new_net.pores(non_single_labels[ind][loc])
                new_net['pore.surface_' + l][temp_pores] = True

    old_coords = _sp.copy(new_net['pore.coords'])
    if labels == []:
        labels = ['pore.subdivided_' + new_net.name]
    for P in pores:
        # Shifting the new network to the right location and attaching it to
        # the main network
        shift = network['pore.coords'][P] - network_spacing/2
        new_net['pore.coords'] += shift
        Pn = network.find_neighbor_pores(pores=P)
        try:
            Pn_new_net = network.pores(labels)
        except:
            Pn_new_net = []
        Pn_old_net = Pn[~_sp.in1d(Pn, Pn_new_net)]
        Np1 = network.Np
        extend(pore_coords=new_net['pore.coords'],
               throat_conns=new_net['throat.conns'] + Np1,
               labels=labels, network=network)

        # Moving the temporary labels to the big network
        for l in main_labels:
            network['pore.surface_'+l][Np1:] = new_net['pore.surface_'+l]

        # Stitching the old pores of the main network to the new extended pores
        surf_pores = network.pores('surface_*')
        surf_coord = network['pore.coords'][surf_pores]
        for neighbor in Pn:
            neighbor_coord = network['pore.coords'][neighbor]
            dist = [round(_sp.inner(neighbor_coord-x, neighbor_coord-x),
                          20) for x in surf_coord]
            nearest_neighbor = surf_pores[dist == _sp.amin(dist)]
            if neighbor in Pn_old_net:
                coplanar_labels = network.labels(pores=nearest_neighbor)
                new_neighbors = network.pores(coplanar_labels,
                                              mode='intersection')
                # This might happen to the edge of the small network
                if _sp.size(new_neighbors) == 0:
                    labels = network.labels(pores=nearest_neighbor,
                                            mode='intersection')
                    common_label = [l for l in labels if 'surface_' in l]
                    new_neighbors = network.pores(common_label)
            elif neighbor in Pn_new_net:
                new_neighbors = nearest_neighbor
            connect_pores(network=network, pores1=neighbor,
                          pores2=new_neighbors, labels=labels)

        # Removing temporary labels
        for l in main_labels:
            network['pore.surface_' + l] = False
        new_net['pore.coords'] = _sp.copy(old_coords)

    network._label_surfaces()
    for l in main_labels:
        del network['pore.surface_'+l]
    trim(network=network, pores=pores)
    _mgr.purge_object(obj=new_net, mode='complete')
예제 #52
0
def ld_pruning(data_file=None,
               ld_radius=None,
               out_file_prefix=None,
               p_thres=None,
               verbose=False,
               max_r2=0.2):
    """
    LD pruning + P-value thresholding 
    """

    df = h5py.File(data_file, 'r')
    has_phenotypes = False
    if 'y' in df.keys():
        'Validation phenotypes found.'
        y = df['y'][...]  # Phenotype
        num_individs = len(y)
        risk_scores = sp.zeros(num_individs)
        has_phenotypes = True

    print ''
    if max_r2 < 1:
        print 'Applying LD-pruning + P-value thresholding with p-value threshold of %0.2e, a LD radius of %d SNPs, and a max r2 of %0.2f' % (
            p_thres, ld_radius, max_r2)
    else:
        if p_thres < 1:
            print 'Applying P-value thresholding with p-value threshold of %0.2e' % (
                p_thres)
        else:
            print 'Calculating polygenic risk score using all SNPs'
    results_dict = {}
    num_snps = 0
    cord_data_g = df['cord_data']

    chromsomes = []
    for chrom_str in cord_data_g.keys():
        g = cord_data_g[chrom_str]
        betas = g['betas'][...]
        n_snps = len(betas)
        num_snps += n_snps
        chromsomes.append(int((chrom_str.split('_'))[1]))

    chromsomes.sort()
    p_str = '%0.4f' % p_thres
    results_dict[p_str] = {}

    if out_file_prefix:
        #Preparing output files
        raw_effect_sizes = []
        raw_pval_effect_sizes = []
        updated_effect_sizes = []
        updated_pval_effect_sizes = []
        sids = []
        chromosomes = []
        positions = []
        nts = []

    tot_num_snps = 0
    num_snps_used = 0
    for chrom in chromsomes:
        chrom_str = 'chrom_%d' % chrom
        #print 'Chromosome %s:' % chrom_str
        g = cord_data_g[chrom_str]
        pvalues = g['ps'][...]
        snp_filter = pvalues < p_thres
        num_snps = sp.sum(snp_filter)
        if num_snps == 0:
            #print 'No SNPs, skipping chromosome'
            continue
        tot_num_snps += num_snps

        pvalues = pvalues[snp_filter]
        if 'raw_snps_val' in g.keys():
            raw_snps = g['raw_snps_val'][...][snp_filter]

        else:
            raw_snps = g['raw_snps_ref'][...][snp_filter]

        snp_means = g['snp_means_ref'][...][snp_filter]
        snp_stds = g['snp_stds_ref'][...][snp_filter]
        raw_betas = g['log_odds'][...][snp_filter]
        pval_derived_betas = g['betas'][...][snp_filter]
        if out_file_prefix:
            chromosomes.extend([chrom_str] * len(pval_derived_betas))
            positions.extend(g['positions'][...][snp_filter])
            sids.extend(g['sids'][...][snp_filter])
            raw_effect_sizes.extend(raw_betas)
            raw_pval_effect_sizes.extend(pval_derived_betas)
            nts.extend(g['nts'][...][snp_filter])

        if max_r2 < 1:
            #print 'Generating LD table from genotypes.'
            snp_means.shape = (len(snp_means), 1)
            snp_stds.shape = (len(snp_means), 1)
            #Normalize SNPs..
            norm_ref_snps = sp.array((raw_snps - snp_means) / snp_stds,
                                     dtype='float32')
            ld_table = ld.calc_ld_table(norm_ref_snps,
                                        max_ld_dist=ld_radius,
                                        min_r2=max_r2,
                                        verbose=verbose)

            updated_raw_betas, pruning_vector = smart_ld_pruning(
                raw_betas,
                ld_table,
                pvalues=pvalues,
                max_ld=max_r2,
                verbose=verbose)
            updated_pval_derived_betas = pval_derived_betas * pruning_vector
            num_snps_used += sp.sum(pruning_vector)
        else:
            updated_raw_betas = sp.copy(raw_effect_sizes)
            updated_pval_derived_betas = sp.copy(pval_derived_betas)
            updated_pval_derived_betas = updated_pval_derived_betas / (
                snp_stds.flatten())
            pruning_vector = sp.ones(len(pval_derived_betas))
            num_snps_used += sp.sum(pruning_vector)

        if out_file_prefix:
            updated_effect_sizes.extend(updated_raw_betas)
            updated_pval_effect_sizes.extend(updated_pval_derived_betas)

        if has_phenotypes:
            print 'Calculating scores for Chromosome %s' % chrom_str
            prs = sp.dot(updated_raw_betas, raw_snps)
            risk_scores += prs
            corr = sp.corrcoef(y, prs)[0, 1]
            r2 = corr**2
            print 'The R2 prediction accuracy of PRS using %s was: %0.4f' % (
                chrom_str, r2)

    print 'There were %d (SNP) effects after p-value thresholding' % tot_num_snps
    print 'After LD-pruning %d SNPs had non-zero effects' % num_snps_used
    if has_phenotypes:
        num_indivs = len(y)
        results_dict[p_str]['y'] = y
        results_dict[p_str]['risk_scores'] = risk_scores
        print 'Prediction accuracy was assessed using %d individuals.' % (
            num_indivs)

        corr = sp.corrcoef(y, risk_scores)[0, 1]
        r2 = corr**2
        results_dict[p_str]['r2_pd'] = r2
        print 'The  R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)' % (
            r2, ((1 - r2)**2) / num_indivs)

        if corr < 0:
            risk_scores = -1 * risk_scores


#         auc = calc_auc(y,risk_scores_pval_derived)
#         print 'AUC for the whole genome was: %0.4f'%auc

#Now calibration
        denominator = sp.dot(risk_scores.T, risk_scores)
        y_norm = (y - sp.mean(y)) / sp.std(y)
        numerator = sp.dot(risk_scores.T, y_norm)
        regression_slope = (numerator / denominator)
        print 'The slope for predictions with P-value derived  effects is:', regression_slope
        results_dict[p_str]['slope_pd'] = regression_slope

    if max_r2 == 1:
        weights_out_file = '%s_all_snps.txt' % (out_file_prefix)
    else:
        weights_out_file = '%s_P+T_p%0.4e.txt' % (out_file_prefix, p_thres)
    with open(weights_out_file, 'w') as f:
        f.write(
            'chrom    pos    sid    nt1    nt2    raw_beta    raw_pval_beta    updated_beta    updated_pval_beta \n'
        )
        for chrom, pos, sid, nt, raw_beta, raw_pval_beta, upd_beta, upd_pval_beta in it.izip(
                chromosomes, positions, sids, nts, raw_effect_sizes,
                raw_pval_effect_sizes, updated_effect_sizes,
                updated_pval_effect_sizes):
            nt1, nt2 = nt[0], nt[1]
            f.write(
                '%s    %d    %s    %s    %s    %0.4e    %0.4e    %0.4e    %0.4e\n'
                % (chrom, pos, sid, nt1, nt2, raw_beta, raw_pval_beta,
                   upd_beta, upd_pval_beta))
예제 #53
0
    Lambda = Lambda0

[nTupOld, nTdnOld] = [1e8, 1e8]
[Sigma0,
 Sigma1] = [U * (nTup + nTdn - 1.0) / 2.0, Lambda * (nTdn - nTup) / 2.0]

k = 1
sumsq = 1e8 if FSC else 0.0  ## converence criterium for FSC scheme
while any([
        sp.fabs(nTupOld - nTup) > epsn,
        sp.fabs(nTdnOld - nTdn) > epsn, sumsq > 0.01
]):
    if chat: print('#\n# Iteration {0: 3d}'.format(k))
    [nTupOld, nTdnOld] = [nTup, nTdn]
    if FSC:
        GFTupOld_A = sp.copy(GFTup_A)
    ## Lambda vertex
    if chat: print('# - calculating Lambda vertex:')
    Lambda = CalculateLambdaD(GFTup_A, GFTdn_A, Lambda)
    if chat: print('# - - Lambda vertex:  Lambda: {0: .8f}'.format(Lambda))
    if True:  ## print auxiliary functions, development only
        #	if False:
        K = KvertexD(Lambda, GFTup_A, GFTdn_A)
        if chat: print('# - - K vertex:            K: {0: .8f}'.format(K))
        ## check the integrals:
        XD = ReBDDFDD(GFTup_A, GFTdn_A, 0)
        if chat: print('# - - aux. integral:       X: {0: .8f}'.format(XD))
    ## HF self-energy
    if chat: print('# - calculating static self-energy:')
    [Sigma0, Sigma1] = CalculateSigmaT(Lambda, Sigma0, Sigma1, GFlambda,
                                       DensityLambda)
예제 #54
0
def non_infinitesimal_mcmc(beta_hats,
                           Pi,
                           Sigi2,
                           sig_12,
                           start_betas=None,
                           h2=None,
                           n=1000,
                           ld_radius=100,
                           num_iter=60,
                           burn_in=10,
                           zero_jump_prob=0.05,
                           ld_dict=None):
    """
    MCMC of non-infinitesimal model
    """
    m = len(beta_hats)

    curr_betas = sp.copy(start_betas)
    curr_post_means = sp.zeros(m)
    avg_betas = sp.zeros(m)

    # Iterating over effect estimates in sequential order
    iter_order = sp.arange(m)

    for k in range(num_iter):  #Big iteration

        #Force an alpha shrink if estimates are way off compared to heritability estimates.  (Improves MCMC convergence.)
        h2_est = max(0.00001, sp.sum(curr_betas**2))
        alpha = min(1 - zero_jump_prob, 1.0 / h2_est,
                    (h2 + 1 / sp.sqrt(n)) / h2_est)
        rand_ps = sp.random.random(m)

        for i, snp_i in enumerate(iter_order):
            if Sigi2[snp_i] == 0:
                curr_post_means[snp_i] = 0
                curr_betas[snp_i] = 0
            else:
                hdmp = (Sigi2[snp_i] / Pi[snp_i])  #(h2 / Mp)
                hdmpn = hdmp + sig_12  #1.0 / n
                hdmp_hdmpn = (hdmp / hdmpn)
                c_const = (Pi[snp_i] / sp.sqrt(hdmpn))
                d_const = (1 - Pi[snp_i]) / (sp.sqrt(sig_12))

                start_i = max(0, snp_i - ld_radius)
                focal_i = min(ld_radius, snp_i)
                stop_i = min(m, snp_i + ld_radius + 1)

                #Local LD matrix
                D_i = ld_dict[snp_i]

                #Local (most recently updated) effect estimates
                local_betas = curr_betas[start_i:stop_i]

                #Calculate the local posterior mean, used when sampling.
                local_betas[focal_i] = 0
                res_beta_hat_i = beta_hats[snp_i] - sp.dot(D_i, local_betas)
                b2 = res_beta_hat_i**2

                d_const_b2_exp = d_const * sp.exp(-b2 / (2.0 * sig_12))
                if sp.isreal(d_const_b2_exp):
                    numerator = c_const * sp.exp(-b2 / (2.0 * hdmpn))
                    if sp.isreal(numerator):
                        if numerator == 0:
                            postp = 0
                        else:
                            postp = numerator / (numerator + d_const_b2_exp)
                            assert sp.isreal(
                                postp), 'Posterior mean is not a real number?'
                    else:
                        postp = 0
                else:
                    postp = 1
                curr_post_means[snp_i] = hdmp_hdmpn * postp * res_beta_hat_i

                if rand_ps[i] < postp * alpha:
                    #Sample from the posterior Gaussian dist.
                    proposed_beta = stats.norm.rvs(
                        0, (hdmp_hdmpn) * sig_12,
                        size=1) + hdmp_hdmpn * res_beta_hat_i

                else:
                    #Sample 0
                    proposed_beta = 0

                curr_betas[snp_i] = proposed_beta  #UPDATE BETA

        if k >= burn_in:
            avg_betas += curr_post_means  #Averaging over the posterior means instead of samples.

    avg_betas = avg_betas / float(num_iter - burn_in)

    return {'betas': avg_betas, 'inf_betas': start_betas}
예제 #55
0
def flood(im, regions=None, mode='max'):
    r"""
    Floods/fills each region in an image with a single value based on the
    specific values in that region.  The ``mode`` argument is used to
    determine how the value is calculated.

    Parameters
    ----------
    im : array_like
        An ND image with isolated regions containing 0's elsewhere.

    regions : array_like
        An array the same shape as ``im`` with each region labeled.  If None is
        supplied (default) then ``scipy.ndimage.label`` is used with its
        default arguments.

    mode : string
        Specifies how to determine which value should be used to flood each
        region.  Options are:

    *'max'* : Floods each region with the local maximum in that region

    *'min'* : Floods each region the local minimum in that region

    *'size'* : Floods each region with the size of that region

    Returns
    -------
    An ND-array the same size as ``im`` with new values placed in each
    forground voxel based on the ``mode``.

    See Also
    --------
    props_to_image

    """
    mask = im > 0
    if regions is None:
        labels, N = spim.label(mask)
    else:
        labels = sp.copy(regions)
        N = labels.max()
    I = im.flatten()
    L = labels.flatten()
    if mode.startswith('max'):
        V = sp.zeros(shape=N + 1, dtype=float)
        for i in range(len(L)):
            if V[L[i]] < I[i]:
                V[L[i]] = I[i]
    elif mode.startswith('min'):
        V = sp.ones(shape=N + 1, dtype=float) * sp.inf
        for i in range(len(L)):
            if V[L[i]] > I[i]:
                V[L[i]] = I[i]
    elif mode.startswith('size'):
        V = sp.zeros(shape=N + 1, dtype=int)
        for i in range(len(L)):
            V[L[i]] += 1
    im_flooded = sp.reshape(V[labels], newshape=im.shape)
    im_flooded = im_flooded * mask
    return im_flooded
예제 #56
0
range_num_d2d_pairs = [2, 3, 4, 5, 6, 7, 8, 9, 10]
# range_num_d2d_pairs = [10]
time_sol_vec_Mon = []
EE_sol_vec_Mon = []

avg = {}
num_infeasible = sp.zeros(len(range_num_d2d_pairs))
for prin in range_num_d2d_pairs:
    num_d2d_pairs = prin
    # rmin = sp.multiply(0.4, sp.log(2))
    time_sol_vec = []
    EE_sol_vec = []

    for Mon in xrange(max_chan_realizaion):
        try:
            max_d2d_to_d2d_gains_diff = sp.copy(max_d2d_to_d2d_gains[:, :,
                                                                     Mon])
            sp.fill_diagonal(max_d2d_to_d2d_gains_diff, 0)
            max_d2d_to_d2d_gains_diag = sp.subtract(
                max_d2d_to_d2d_gains[:, :, Mon], max_d2d_to_d2d_gains_diff)

            uav_to_d2d_gains = max_uav_to_d2d_gains[:num_d2d_pairs, Mon]
            d2d_to_d2d_gains = max_d2d_to_d2d_gains[:num_d2d_pairs, :
                                                    num_d2d_pairs, Mon]
            d2d_to_d2d_gains_diff = max_d2d_to_d2d_gains_diff[:num_d2d_pairs, :
                                                              num_d2d_pairs]
            d2d_to_d2d_gains_diag = sp.subtract(d2d_to_d2d_gains,
                                                d2d_to_d2d_gains_diff)

            # ############################################################
            # This code is used to find the initial point for EEmax algorithm
            # ############################################################
def bi_get_initial(p_dict):
    local_ld_dict_file = '%s_ldradius%d.pickled.gz'%(p_dict['local_ld_prefix'], p_dict['ld_radius'])
    if not os.path.isfile(local_ld_dict_file):
        df1 = h5py.File(p_dict['coord_D1'])
        df2 = h5py.File(p_dict['coord_D2'])   

        chrom_ld_scores_dict1 = {}
        chrom_ld_dict1 = {}
        chrom_ref_ld_mats1 = {}
        ld_score_sum1 = 0
        num_snps1 = 0
        chrom_snps1 = {}
        chrom_betas1 = {}
        chrom_snpids = {}
        chrom_betas2 = {}

        print 'Calculating LD information w. radius %d'% p_dict['ld_radius']

        cord_data_g1 = df1['cord_data']
        cord_data_g2 = df2['cord_data']
        # find overlap of chrom list
        chr_list = list(set(cord_data_g1.keys()) & set(cord_data_g2.keys()))
        for chrom_str in chr_list:
            print 'Working on %s'%chrom_str
            print 'Sorting disease 1'
            g1 = cord_data_g1[chrom_str]
            if 'raw_snps_ref' in g1.keys():
                raw_snps1 = g1['raw_snps_ref'][...]
                snp_stds1 = g1['snp_stds_ref'][...]
                snp_means1 = g1['snp_means_ref'][...]
                betas1 = g1['betas'][...]
            #Filter monomorphic SNPs
            ok_snps_filter1 = snp_stds1>0
            ok_snps_filter1 = ok_snps_filter1.flatten()
            sids1 = g1['sids'][...]
            sids1 = sids1[ok_snps_filter1]

            print 'Sorting disease 2'
            g2 = cord_data_g2[chrom_str]
            if 'raw_snps_ref' in g2.keys():
                raw_snps2 = g2['raw_snps_ref'][...]
                snp_stds2 = g2['snp_stds_ref'][...]
                snp_means2 = g2['snp_means_ref'][...]
                betas2 = g2['betas'][...]
            #Filter monomorphic SNPs
            ok_snps_filter2 = snp_stds2>0
            ok_snps_filter2 = ok_snps_filter2.flatten()
            sids2 = g2['sids'][...]
            sids2 = sids2[ok_snps_filter2]

            print 'Extracting SNPs shared by both disease 1 and 2'
            ind1 = np.in1d(sids1,sids2)
            ind2 = np.in1d(sids2,sids1)
            sids_shared1 = sids1[ind1]
            sids_shared2 = sids2[ind2]
            raw_snps1 = raw_snps1[ok_snps_filter1][ind1]
            snp_means1 = snp_means1[ok_snps_filter1][ind1]
            snp_stds1 = snp_stds1[ok_snps_filter1][ind1]
            betas1 = betas1[ok_snps_filter1][ind1]
            betas2 = betas2[ok_snps_filter2][ind2]
            n_snps1 = len(raw_snps1)
            snp_means1.shape = (n_snps1,1)   
            snp_stds1.shape = (n_snps1,1)
            ### check order ###
            if sum(sids_shared1==sids_shared2)==len(sids_shared2):
                print 'Good!'
            else:
                print 'Shit happens, sorting sids1 and sids2'
                O1 = np.argsort(sids_shared1)
                O2 = np.argsort(sids_shared2)
                O3 = np.argsort(O2)
                sids_shared1 = sids_shared1[O1][O3]
                if sum(sids_shared1==sids_shared2)==len(sids_shared2):
                    raw_snps1 = raw_snps1[O1][O3]
                    snp_means1 = snp_means1[O1][O3]
                    snp_stds1 = snp_stds1[O1][O3]
                    betas1 = betas1[O1][O3]
                else:
                    print 'Stop! Problems with sorting!'

            # Normalize SNPs..
            chrom_snpids[chrom_str] = sids_shared1

            snps1 = sp.array((raw_snps1 - snp_means1)/snp_stds1,dtype='float32')
            assert snps1.shape==raw_snps1.shape, 'Array Shape mismatch'
            chrom_snps1[chrom_str] = snps1
            ret_dict1 = get_LDpred_ld_tables(snps1, ld_radius=p_dict['ld_radius'], ld_window_size=2*p_dict['ld_radius'])
            chrom_ld_dict1[chrom_str] = ret_dict1['ld_dict']
            chrom_ref_ld_mats1[chrom_str] = ret_dict1['ref_ld_matrices']
            ld_scores1 = ret_dict1['ld_scores']
            chrom_ld_scores_dict1[chrom_str] = {'ld_scores':ld_scores1, 'avg_ld_score':sp.mean(ld_scores1)}
            ld_score_sum1 += sp.sum(ld_scores1)
            num_snps1 += n_snps1

            chrom_betas1[chrom_str] = betas1
            chrom_betas2[chrom_str] = betas2


        avg_gw_ld_score1 = ld_score_sum1 / float(num_snps1)
        ld_scores_dict1 = {'avg_gw_ld_score': avg_gw_ld_score1, 'chrom_dict':chrom_ld_scores_dict1}    

        print 'Done calculating the LD table and LD score, writing to file:', local_ld_dict_file
        print 'Genome-wide average LD score was:', ld_scores_dict1['avg_gw_ld_score']
        ld_dict = {'ld_scores_dict':ld_scores_dict1, 'chrom_ld_dict':chrom_ld_dict1, 
        'chrom_ref_ld_mats':chrom_ref_ld_mats1, 'chrom_snps':chrom_snps1, 
        'chrom_betas1':chrom_betas1, 'chrom_betas2':chrom_betas2, 
        'chrom_betas3':chrom_betas3, 'chrom_snpids':chrom_snpids}

        f = gzip.open(local_ld_dict_file, 'wb')
        cPickle.dump(ld_dict, f, protocol=2)
        f.close()
        print 'LD information is now pickled.'
    else:
        print 'Loading LD information from file: %s'%local_ld_dict_file
        f = gzip.open(local_ld_dict_file, 'r')
        ld_dict = cPickle.load(f)
        f.close()

    print 'Starting calculation using h2 files as priors'
    print 'Loading prior information from file: %s'%p_dict['hfile']
    with open(p_dict['hfile']) as f:
        data = f.readlines()
    prf_chr = sp.empty(len(data),dtype='int8')
    prf_sids = []
    prf_h2_D1 = sp.zeros(len(data))
    prf_h2_D2 = sp.zeros(len(data))
    for i,line in enumerate(data):
        li = line.split()
        prf_chr[i] = int(li[0])
        prf_sids.append(li[1]) 
        #prf_pi[i] = p_dict['PS'][0]         
        prf_h2_D1[i] = float(li[2])
        prf_h2_D2[i] = float(li[3])  
    prf_sids = sp.array(prf_sids,dtype='str')
    prf = {}
    prf['chrom'] = prf_chr
    prf['sids'] = prf_sids
    prf['h2_D1'] = prf_h2_D1
    prf['h2_D2'] = prf_h2_D2
    
    data_file_D1=p_dict['coord_D1']
    data_file_D2=p_dict['coord_D2']
    out_file_prefix=p_dict['out']
    ld_radius=p_dict['ld_radius']
    ld_dict = ld_dict
    n1=p_dict['N1']
    n2=p_dict['N2']
    PRF = prf
    prf_chr = PRF['chrom']
    prf_sids = PRF['sids']
    h2_D1 = PRF['h2_D1']
    h2_D2 = PRF['h2_D2']
    df1 = h5py.File(data_file_D1,'r')
    df2 = h5py.File(data_file_D2,'r')
    cord_data_g1 = df1['cord_data']
    cord_data_g2 = df2['cord_data']
    has_phenotypes1=False
    if 'y' in df1.keys():
        'Validation phenotypes of disease 1 found.'
        y1 = df1['y'][...]  # Phenotype
        num_individs1 = len(y1)
        prs_D1 = sp.zeros(num_individs1)
        has_phenotypes1=True
    has_phenotypes2=False
    if 'y' in df2.keys():
        'Validation phenotypes of disease 2 found.'
        y2 = df2['y'][...]  # Phenotype
        num_individs2 = len(y2)
        prs_D2 = sp.zeros(num_individs2)
        has_phenotypes2=True

    ld_scores_dict1 = ld_dict1['ld_scores_dict']
    chrom_ld_dict1 = ld_dict1['chrom_ld_dict']
    chrom_ref_ld_mats1 = ld_dict1['chrom_ref_ld_mats']
    chrom_snps1 = ld_dict1['chrom_snps']
    chrom_betas1 = ld_dict1['chrom_betas']
    chrom_snpids = ld_dict1['chrom_snpids']
    ld_scores_dict2 = ld_dict2['ld_scores_dict']
    chrom_ld_dict2 = ld_dict2['chrom_ld_dict']
    chrom_ref_ld_mats2 = ld_dict2['chrom_ref_ld_mats']
    chrom_snps2 = ld_dict2['chrom_snps']
    chrom_betas2 = ld_dict2['chrom_betas']
        
    #results_dict = {}
    num_snps1 = 0
    sum_beta2s1 = 0
    num_snps2 = 0
    sum_beta2s2 = 0
    chr_list = list(set(cord_data_g1.keys()) & set(cord_data_g2.keys()))
    for chrom_str in chromosomes_list: 
        if chrom_str in chr_list:
            betas1 = chrom_betas1[chrom_str]
            n_snps1 = len(betas1)
            num_snps1 += n_snps1
            sum_beta2s1 += sp.sum(betas1 ** 2)
            betas2 = chrom_betas2[chrom_str]
            n_snps2 = len(betas2)
            num_snps2 += n_snps2
            sum_beta2s2 += sp.sum(betas2 ** 2)
        
    L1 = ld_scores_dict['avg_gw_ld_score']
    chi_square_lambda1 = sp.mean(n1 * sum_beta2s1 / float(num_snps1))
    print 'Genome-wide lambda inflation of D1:', chi_square_lambda1
    print 'Genome-wide mean LD score of D1:', L1
    gw_h2_ld_score_est1 = max(0.0001, (max(1, chi_square_lambda1) - 1) / (n1 * (L1 / num_snps1)))
    print 'Estimated genome-wide heritability of D1:', gw_h2_ld_score_est1
    L2 = ld_scores_dict['avg_gw_ld_score']
    chi_square_lambda2 = sp.mean(n2 * sum_beta2s2 / float(num_snps2))
    print 'Genome-wide lambda inflation of D2:', chi_square_lambda2
    print 'Genome-wide mean LD score of D2:', L2
    gw_h2_ld_score_est2 = max(0.0001, (max(1, chi_square_lambda2) - 1) / (n2 * (L2 / num_snps2)))
    print 'Estimated genome-wide heritability of D2:', gw_h2_ld_score_est2
    h2_new1 = sp.sum(h2_D1)
    sig_12_D1 = (1.0)/n1    
    pr_sig1 = {}
    h2_new2 = sp.sum(h2_D2)
    sig_12_D2 = (1.0)/n2 
    pr_sig2 = {}
    anno_post1 = {}
    anno_post2 = {}
    post_betas1 = {}
    post_betas2 = {}
    ld_post1 = {}
    ld_post2 = {}
    ## main calculation, chr by chr, posterior betas and prs ##
    for chrom_str in chromosomes_list:
        if chrom_str in chr_list:
            print 'Calculating scores for Chromosome %s'%((chrom_str.split('_'))[1])           
            pval_derived_betas1 = chrom_betas1[chrom_str]
            pval_derived_betas2 = chrom_betas2[chrom_str]
            snps1 = chrom_snps1[chrom_str]
            snps2 = chrom_snps2[chrom_str]
            sids = chrom_snpids[chrom_str]
            n_snps_chrom = len(sids)
            chri = int(chrom_str.split('_')[1])
            prf_sids_chri = prf_sids[prf_chr==chri]
            h2_D1_chri = h2_D1[prf_chr==chri]
            h2_D2_chri = h2_D2[prf_chr==chri]
            if len(prf_sids_chri)==len(sids):
                if sum(prf_sids_chri==sids)==len(prf_sids_chri):
                    pr_sig1[chrom_str] = sp.copy(h2_D1_chri)
                    pr_sig2[chrom_str] = sp.copy(h2_D2_chri)
                else:
                    print 'sorting prior files'
                    pr_sig1[chrom_str] = sp.zeros(len(sids))
                    pr_sig2[chrom_str] = sp.zeros(len(sids))
                    for i, sid in enumerate(sids):
                        pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri==sid]
                        pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri==sid]
            else:
                print 'extracting prior files'
                pr_sig1[chrom_str] = sp.zeros(len(sids))
                pr_sig2[chrom_str] = sp.zeros(len(sids))
                for i, sid in enumerate(sids):
                    pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri==sid]
                    pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri==sid]
            pr_sig1[chrom_str] = gw_h2_ld_score_est1*pr_sig1[chrom_str]/h2_new1
            pr_sig2[chrom_str] = gw_h2_ld_score_est2*pr_sig2[chrom_str]/h2_new2
    ########################### using AnnoPred-baseline as initial values ###############################
            annopred_betas1 = annopred_inf(
                pval_derived_betas1, 
                pr_sigi=pr_sig1[chrom_str], 
                reference_ld_mats=chrom_ref_ld_mats1[chrom_str], 
                n=n1, 
                ld_window_size=2*ld_radius
                )
            annopred_betas2 = annopred_inf(
                pval_derived_betas2, 
                pr_sigi=pr_sig2[chrom_str], 
                reference_ld_mats=chrom_ref_ld_mats2[chrom_str], 
                n=n2, 
                ld_window_size=2*ld_radius
                )
            anno_post1[chrom_str] = annopred_betas1
            anno_post2[chrom_str] = annopred_betas2
    anno_post = {'anno_post1':anno_post1, 'anno_post2':anno_post2}

    f = gzip.open(out_file_prefix, 'wb')
    cPickle.dump(anno_post, f, protocol=2)
    f.close()
예제 #58
0
def pleiopred_genomewide(data_file_D1,
                         data_file_D2,
                         rho,
                         ld_radius=None,
                         ld_dict=None,
                         out_file_prefix=None,
                         n1=None,
                         n2=None,
                         PRF=None,
                         user_h1=None,
                         user_h2=None):
    """
    Calculate LDpred for a genome
    """
    prf_chr = PRF['chrom']
    prf_sids = PRF['sids']
    h2_D1 = PRF['h2_D1']
    h2_D2 = PRF['h2_D2']

    df1 = h5py.File(data_file_D1, 'r')
    df2 = h5py.File(data_file_D2, 'r')
    cord_data_g1 = df1['cord_data']
    cord_data_g2 = df2['cord_data']

    has_phenotypes1 = False
    if 'y' in df1.keys():
        'Validation phenotypes of disease 1 found.'
        y1 = df1['y'][...]  # Phenotype
        num_individs1 = len(y1)
        prs_D1 = sp.zeros(num_individs1)
        #        prs_anno_D1 = sp.zeros(num_individs1)
        #        prs_ld_D1 = sp.zeros(num_individs1)
        prs_ld_pleio_D1 = sp.zeros(num_individs1)
        has_phenotypes1 = True

    has_phenotypes2 = False
    if 'y' in df2.keys():
        'Validation phenotypes of disease 2 found.'
        y2 = df2['y'][...]  # Phenotype
        num_individs2 = len(y2)
        prs_D2 = sp.zeros(num_individs2)
        #        prs_anno_D2 = sp.zeros(num_individs2)
        #        prs_ld_D2 = sp.zeros(num_individs2)
        prs_ld_pleio_D2 = sp.zeros(num_individs2)
        has_phenotypes2 = True

    ld_scores_dict = ld_dict['ld_scores_dict']
    chrom_ld_dict = ld_dict['chrom_ld_dict']
    chrom_ref_ld_mats = ld_dict['chrom_ref_ld_mats']
    chrom_snps = ld_dict['chrom_snps']
    chrom_snpids = ld_dict['chrom_snpids']

    chrom_betas1 = ld_dict['chrom_betas1']
    chrom_betas2 = ld_dict['chrom_betas2']

    #results_dict = {}

    num_snps1 = 0
    sum_beta2s1 = 0
    num_snps2 = 0
    sum_beta2s2 = 0

    chr_list = list(set(cord_data_g1.keys()) & set(cord_data_g2.keys()))

    for chrom_str in chromosomes_list:
        if chrom_str in chr_list:
            betas1 = chrom_betas1[chrom_str]
            n_snps1 = len(betas1)
            num_snps1 += n_snps1
            sum_beta2s1 += sp.sum(betas1**2)
            betas2 = chrom_betas2[chrom_str]
            n_snps2 = len(betas2)
            num_snps2 += n_snps2
            sum_beta2s2 += sp.sum(betas2**2)

    if user_h1 is None or user_h2 is None:
        L1 = ld_scores_dict['avg_gw_ld_score']
        chi_square_lambda1 = sp.mean(n1 * sum_beta2s1 / float(num_snps1))
        print 'Genome-wide lambda inflation of D1:', chi_square_lambda1
        print 'Genome-wide mean LD score of D1:', L1
        gw_h2_ld_score_est1 = max(0.0001, (max(1, chi_square_lambda1) - 1) /
                                  (n1 * (L1 / num_snps1)))
        print 'Estimated genome-wide heritability of D1:', gw_h2_ld_score_est1

        assert chi_square_lambda1 > 1, 'Something is wrong with the GWAS summary statistics of D1.  Perhaps there were issues parsing of them, or the given GWAS sample size (N) was too small. Either way, lambda (the mean Chi-square statistic) is too small.  '

        L2 = ld_scores_dict['avg_gw_ld_score']
        chi_square_lambda2 = sp.mean(n2 * sum_beta2s2 / float(num_snps2))
        print 'Genome-wide lambda inflation of D2:', chi_square_lambda2
        print 'Genome-wide mean LD score of D2:', L2
        gw_h2_ld_score_est2 = max(0.0001, (max(1, chi_square_lambda2) - 1) /
                                  (n2 * (L2 / num_snps2)))
        print 'Estimated genome-wide heritability of D2:', gw_h2_ld_score_est2

        assert chi_square_lambda2 > 1, 'Something is wrong with the GWAS summary statistics of D2.  Perhaps there were issues parsing of them, or the given GWAS sample size (N) was too small. Either way, lambda (the mean Chi-square statistic) is too small.  '
    else:
        gw_h2_ld_score_est1 = user_h1
        gw_h2_ld_score_est2 = user_h2

    h2_new1 = sp.sum(h2_D1)
    sig_12_D1 = (1.0) / n1
    pr_sig1 = {}

    h2_new2 = sp.sum(h2_D2)
    sig_12_D2 = (1.0) / n2
    pr_sig2 = {}

    post_betas1 = {}
    post_betas2 = {}
    #    anno_post1 = {}
    #    anno_post2 = {}
    #    ld_post1 = {}
    #    ld_post2 = {}
    #
    ld_pleio_post1 = {}
    ld_pleio_post2 = {}

    out1 = []
    out1.append('Estimated Genome-wide heritability: ' +
                str(gw_h2_ld_score_est1) + '\n')
    out1.append('Posterior variance for each snp: ' + str(sig_12_D1) + '\n')

    out2 = []
    out2.append('Estimated Genome-wide heritability: ' +
                str(gw_h2_ld_score_est2) + '\n')
    out2.append('Posterior variance for each snp: ' + str(sig_12_D2) + '\n')

    #    out_anno1 = []
    #    out_anno1.append('Estimated Genome-wide heritability: '+str(gw_h2_ld_score_est1)+'\n')
    #    out_anno1.append('Posterior variance for each snp: '+str(sig_12_D1)+'\n')
    #
    #    out_anno2 = []
    #    out_anno2.append('Estimated Genome-wide heritability: '+str(gw_h2_ld_score_est2)+'\n')
    #    out_anno2.append('Posterior variance for each snp: '+str(sig_12_D2)+'\n')
    #
    #    out_ld1 = []
    #    out_ld1.append('Estimated Genome-wide heritability: '+str(gw_h2_ld_score_est1)+'\n')
    #    out_ld1.append('Posterior variance for each snp: '+str(sig_12_D1)+'\n')
    #
    #    out_ld2 = []
    #    out_ld2.append('Estimated Genome-wide heritability: '+str(gw_h2_ld_score_est2)+'\n')
    #    out_ld2.append('Posterior variance for each snp: '+str(sig_12_D2)+'\n')

    out_ld_pleio1 = []
    out_ld_pleio1.append('Estimated Genome-wide heritability: ' +
                         str(gw_h2_ld_score_est1) + '\n')
    out_ld_pleio1.append('Posterior variance for each snp: ' + str(sig_12_D1) +
                         '\n')

    out_ld_pleio2 = []
    out_ld_pleio2.append('Estimated Genome-wide heritability: ' +
                         str(gw_h2_ld_score_est2) + '\n')
    out_ld_pleio2.append('Posterior variance for each snp: ' + str(sig_12_D2) +
                         '\n')

    ## main calculation, chr by chr, posterior betas and prs ##
    print 'Starting with rho = ', rho
    for chrom_str in chromosomes_list:
        if chrom_str in chr_list:
            print 'Calculating scores for Chromosome %s' % (
                (chrom_str.split('_'))[1])

            pval_derived_betas1 = chrom_betas1[chrom_str]
            pval_derived_betas2 = chrom_betas2[chrom_str]
            sids = chrom_snpids[chrom_str]

            n_snps_chrom = len(sids)

            chri = int(chrom_str.split('_')[1])
            prf_sids_chri = prf_sids[prf_chr == chri]
            h2_D1_chri = h2_D1[prf_chr == chri]
            h2_D2_chri = h2_D2[prf_chr == chri]
            if len(prf_sids_chri) == len(sids):
                if sum(prf_sids_chri == sids) == len(prf_sids_chri):
                    pr_sig1[chrom_str] = sp.copy(h2_D1_chri)
                    pr_sig2[chrom_str] = sp.copy(h2_D2_chri)
                else:
                    print 'sorting prior files'
                    pr_sig1[chrom_str] = sp.zeros(len(sids))
                    pr_sig2[chrom_str] = sp.zeros(len(sids))
                    for i, sid in enumerate(sids):
                        pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri ==
                                                           sid]
                        pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri ==
                                                           sid]
            else:
                print 'extracting prior files'
                pr_sig1[chrom_str] = sp.zeros(len(sids))
                pr_sig2[chrom_str] = sp.zeros(len(sids))
                for i, sid in enumerate(sids):
                    pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri == sid]
                    pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri == sid]

            pr_sig1[
                chrom_str] = gw_h2_ld_score_est1 * pr_sig1[chrom_str] / h2_new1
            pr_sig2[
                chrom_str] = gw_h2_ld_score_est2 * pr_sig2[chrom_str] / h2_new2

            posterior_betas = pleiopred_inf(
                beta_hats1=pval_derived_betas1,
                beta_hats2=pval_derived_betas2,
                pr_sig1=pr_sig1[chrom_str],
                pr_sig2=pr_sig2[chrom_str],
                rho=rho,
                ref_ld_mats1=chrom_ref_ld_mats[chrom_str],
                ref_ld_mats2=chrom_ref_ld_mats[chrom_str],
                n1=n1,
                n2=n2,
                ld_window_size=2 * ld_radius)
            post_betas1[chrom_str] = posterior_betas['D1']
            post_betas2[chrom_str] = posterior_betas['D2']

            #            annopred_betas1 = annopred_inf(
            #                pval_derived_betas1,
            #                pr_sigi=pr_sig1[chrom_str],
            #                reference_ld_mats=chrom_ref_ld_mats1[chrom_str],
            #                n=n1,
            #                ld_window_size=2*ld_radius
            #                )
            #            annopred_betas2 = annopred_inf(
            #                pval_derived_betas2,
            #                pr_sigi=pr_sig2[chrom_str],
            #                reference_ld_mats=chrom_ref_ld_mats2[chrom_str],
            #                n=n2,
            #                ld_window_size=2*ld_radius
            #                )
            #            anno_post1[chrom_str] = annopred_betas1
            #            anno_post2[chrom_str] = annopred_betas2
            #
            #            ldpred_betas1 = ldpred_inf(
            #                pval_derived_betas1,
            #                genotypes=None,
            #                reference_ld_mats=chrom_ref_ld_mats1[chrom_str],
            #                h2=gw_h2_ld_score_est1 * (n_snps_chrom / float(num_snps1)),
            #                n=n1,
            #                ld_window_size=2*ld_radius,
            #                verbose=False
            #                )
            #            ldpred_betas2 = ldpred_inf(
            #                pval_derived_betas2,
            #                genotypes=None,
            #                reference_ld_mats=chrom_ref_ld_mats2[chrom_str],
            #                h2=gw_h2_ld_score_est2 * (n_snps_chrom / float(num_snps2)),
            #                n=n2,
            #                ld_window_size=2*ld_radius,
            #                verbose=False
            #                )
            #            ld_post1[chrom_str] = ldpred_betas1
            #            ld_post2[chrom_str] = ldpred_betas2

            ldpred_pleio_betas = ld_pred_inf_pleio(
                beta_hats1=pval_derived_betas1,
                beta_hats2=pval_derived_betas2,
                h2_D1=gw_h2_ld_score_est1 * (n_snps_chrom / float(num_snps1)),
                h2_D2=gw_h2_ld_score_est2 * (n_snps_chrom / float(num_snps2)),
                rho=rho,
                ref_ld_mats1=chrom_ref_ld_mats[chrom_str],
                ref_ld_mats2=chrom_ref_ld_mats[chrom_str],
                n1=n1,
                n2=n2,
                ld_window_size=2 * ld_radius)
            ld_pleio_post1[chrom_str] = ldpred_pleio_betas['D1']
            ld_pleio_post2[chrom_str] = ldpred_pleio_betas['D2']

            ## prs and auc ##
            if has_phenotypes1:
                prs_inf1 = sp.dot(posterior_betas['D1'], chrom_snps[chrom_str])
                prs_D1 += prs_inf1
                #                prs_anno1 = sp.dot(annopred_betas1, snps1)
                #                prs_anno_D1 += prs_anno1
                #                prs_ld1 = sp.dot(ldpred_betas1, snps1)
                #                prs_ld_D1 += prs_ld1
                prs_ld_pleio1 = sp.dot(ldpred_pleio_betas['D1'],
                                       chrom_snps[chrom_str])
                prs_ld_pleio_D1 += prs_ld_pleio1

            if has_phenotypes2:
                prs_inf2 = sp.dot(posterior_betas['D2'], chrom_snps[chrom_str])
                prs_D2 += prs_inf2
                #                prs_anno2 = sp.dot(annopred_betas2, snps2)
                #                prs_anno_D2 += prs_anno2
                #                prs_ld2 = sp.dot(ldpred_betas2, snps2)
                #                prs_ld_D2 += prs_ld2
                prs_ld_pleio2 = sp.dot(ldpred_pleio_betas['D2'],
                                       chrom_snps[chrom_str])
                prs_ld_pleio_D2 += prs_ld_pleio2


############ PleioPred results #############
    corr_inf1 = sp.corrcoef(y1, prs_D1)[0, 1]
    r2_inf1 = corr_inf1**2
    #results_dict[p_str]['r2_pd']=r2_inf
    print 'D1: the R2 prediction accuracy (observed scale) of PleioPred was: %0.4f (%0.6f)' % (
        r2_inf1, ((1 - r2_inf1)**2) / num_individs1)
    out1.append(
        'D1: the R2 prediction accuracy (observed scale) of PleioPred was: ' +
        str(r2_inf1) + ' (' + str(((1 - r2_inf1)**2) / num_individs1) + ')\n')

    if corr_inf1 < 0:
        prs_D1 = -1 * prs_D1
    auc1 = pred_accuracy(y1, prs_D1)
    print 'D1: PleioPred AUC for the whole genome was: %0.4f' % auc1
    out1.append('D1: PleioPred AUC for the whole genome was: ' + str(auc1) +
                '\n')

    sp.savetxt('%s_y_' % (out_file_prefix) + '_D1.txt', y1)
    sp.savetxt('%s_prs-inf' % (out_file_prefix) + '_PleioPred_D1.txt', prs_D1)

    #Now calibration
    ff_inf = open('%s_inf_auc_' % (out_file_prefix) + '_PleioPred_D1.txt', "w")
    ff_inf.writelines(out1)
    ff_inf.close()

    corr_inf2 = sp.corrcoef(y2, prs_D2)[0, 1]
    r2_inf2 = corr_inf2**2
    #results_dict[p_str]['r2_pd']=r2_inf
    print 'D2: the R2 prediction accuracy (observed scale) of PleioPred was: %0.4f (%0.6f)' % (
        r2_inf2, ((1 - r2_inf2)**2) / num_individs2)
    out2.append(
        'D2: the R2 prediction accuracy (observed scale) of PleioPred was: ' +
        str(r2_inf2) + ' (' + str(((1 - r2_inf2)**2) / num_individs2) + ')\n')

    if corr_inf2 < 0:
        prs_D2 = -1 * prs_D2
    auc2 = pred_accuracy(y2, prs_D2)
    print 'D2: PleioPred AUC for the whole genome was: %0.4f' % auc2
    out2.append('D2: PleioPred AUC for the whole genome was: ' + str(auc2) +
                '\n')

    sp.savetxt('%s_y_' % (out_file_prefix) + '_D2.txt', y2)
    sp.savetxt('%s_prs-inf' % (out_file_prefix) + '_PleioPred_D2.txt', prs_D2)

    #Now calibration
    ff_inf = open('%s_inf_auc_' % (out_file_prefix) + '_PleioPred_D2.txt', "w")
    ff_inf.writelines(out2)
    ff_inf.close()

    ############ AnnoPred results #############
    #    corr_inf1 = sp.corrcoef(y1, prs_anno_D1)[0, 1]
    #    r2_inf1 = corr_inf1 ** 2
    #    #results_dict[p_str]['r2_pd']=r2_inf
    #    print 'D1: the R2 prediction accuracy (observed scale) of AnnoPred_inf was: %0.4f (%0.6f)' % (r2_inf1, ((1-r2_inf1)**2)/num_individs1)
    #    out_anno1.append('The R2 prediction accuracy (observed scale) AnnoPred_inf was: '+str(r2_inf1)+' ('+str(((1-r2_inf1)**2)/num_individs1)+')\n')
    #
    #    if corr_inf1<0:
    #        prs_anno_D1 = -1* prs_anno_D1
    #    auc1 = pred_accuracy(y1,prs_anno_D1)
    #    print 'D1: AnnoPred AUC for the whole genome was: %0.4f'%auc1
    #    out_anno1.append('D1: AnnoPred AUC for the whole genome was: '+str(auc1)+'\n')
    #
    # #   sp.savetxt('%s_y_'%(out_file_prefix)+'_D1.txt',y1)
    #    sp.savetxt('%s_prs-inf'%(out_file_prefix)+'_AnnoPred_D1.txt',prs_anno_D1)
    #
    #    #Now calibration
    #    ff_inf = open('%s_inf_auc_'%(out_file_prefix)+'_AnnoPred_D1.txt',"w")
    #    ff_inf.writelines(out_anno1)
    #    ff_inf.close()
    #
    #    corr_inf2 = sp.corrcoef(y2, prs_anno_D2)[0, 1]
    #    r2_inf2 = corr_inf2 ** 2
    #    #results_dict[p_str]['r2_pd']=r2_inf
    #    print 'D2: the R2 prediction accuracy (observed scale) of AnnoPred_inf was: %0.4f (%0.6f)' % (r2_inf2, ((1-r2_inf2)**2)/num_individs2)
    #    out_anno2.append('D2: the R2 prediction accuracy (observed scale) of AnnoPred_inf was: '+str(r2_inf2)+' ('+str(((1-r2_inf2)**2)/num_individs2)+')\n')
    #
    #    if corr_inf2<0:
    #        prs_anno_D2 = -1* prs_anno_D2
    #    auc2 = pred_accuracy(y2,prs_anno_D2)
    #    print 'D2: AnnoPred AUC for the whole genome was: %0.4f'%auc2
    #    out_anno2.append('D2: AnnoPred AUC for the whole genome was: '+str(auc2)+'\n')
    #
    ##    sp.savetxt('%s_y_'%(out_file_prefix)+'_D2.txt',y2)
    #    sp.savetxt('%s_prs-inf'%(out_file_prefix)+'_AnnoPred_D2.txt',prs_D2)
    #
    #    #Now calibration
    #    ff_inf = open('%s_inf_auc_'%(out_file_prefix)+'_AnnoPred_D2.txt',"w")
    #    ff_inf.writelines(out_anno2)
    #    ff_inf.close()

    ############ LDpred results #############
    #    corr_inf1 = sp.corrcoef(y1, prs_ld_D1)[0, 1]
    #    r2_inf1 = corr_inf1 ** 2
    #    #results_dict[p_str]['r2_pd']=r2_inf
    #    print 'D1: the R2 prediction accuracy (observed scale) of LDpred_inf was: %0.4f (%0.6f)' % (r2_inf1, ((1-r2_inf1)**2)/num_individs1)
    #    out_ld1.append('The R2 prediction accuracy (observed scale) LDpred_inf was: '+str(r2_inf1)+' ('+str(((1-r2_inf1)**2)/num_individs1)+')\n')
    #
    #    if corr_inf1<0:
    #        prs_ld_D1 = -1* prs_ld_D1
    #    auc1 = pred_accuracy(y1,prs_ld_D1)
    #    print 'D1: LDpred AUC for the whole genome was: %0.4f'%auc1
    #    out_ld1.append('D1: LDpred AUC for the whole genome was: '+str(auc1)+'\n')
    #
    # #   sp.savetxt('%s_y_'%(out_file_prefix)+'_D1.txt',y1)
    #    sp.savetxt('%s_prs-inf'%(out_file_prefix)+'_LDpred_D1.txt',prs_ld_D1)
    #
    #    #Now calibration
    #    ff_inf = open('%s_inf_auc_'%(out_file_prefix)+'_LDpred_D1.txt',"w")
    #    ff_inf.writelines(out_ld1)
    #    ff_inf.close()
    #
    #    corr_inf2 = sp.corrcoef(y2, prs_ld_D2)[0, 1]
    #    r2_inf2 = corr_inf2 ** 2
    #    #results_dict[p_str]['r2_pd']=r2_inf
    #    print 'D2: the R2 prediction accuracy (observed scale) of LDpred_inf was: %0.4f (%0.6f)' % (r2_inf2, ((1-r2_inf2)**2)/num_individs2)
    #    out_ld2.append('D2: the R2 prediction accuracy (observed scale) of LDpred_inf was: '+str(r2_inf2)+' ('+str(((1-r2_inf2)**2)/num_individs2)+')\n')
    #
    #    if corr_inf2<0:
    #        prs_ld_D2 = -1* prs_ld_D2
    #    auc2 = pred_accuracy(y2,prs_ld_D2)
    #    print 'D2: LDpred_inf AUC for the whole genome was: %0.4f'%auc2
    #    out_ld2.append('D2: LDpred_inf AUC for the whole genome was: '+str(auc2)+'\n')
    #
    ##    sp.savetxt('%s_y_'%(out_file_prefix)+'_D2.txt',y2)
    #    sp.savetxt('%s_prs-inf'%(out_file_prefix)+'_LDpred_D2.txt',prs_D2)
    #
    #    #Now calibration
    #    ff_inf = open('%s_inf_auc_'%(out_file_prefix)+'_LDpred_D2.txt',"w")
    #    ff_inf.writelines(out_ld2)
    #    ff_inf.close()

    ############ LDpred_pleio results #############
    corr_inf1 = sp.corrcoef(y1, prs_ld_pleio_D1)[0, 1]
    r2_inf1 = corr_inf1**2
    #results_dict[p_str]['r2_pd']=r2_inf
    print 'D1: the R2 prediction accuracy (observed scale) of LDpred_inf_pleio was: %0.4f (%0.6f)' % (
        r2_inf1, ((1 - r2_inf1)**2) / num_individs1)
    out_ld_pleio1.append(
        'The R2 prediction accuracy (observed scale) LDpred_inf_pleio was: ' +
        str(r2_inf1) + ' (' + str(((1 - r2_inf1)**2) / num_individs1) + ')\n')

    if corr_inf1 < 0:
        prs_ld_pleio_D1 = -1 * prs_ld_pleio_D1
    auc1 = pred_accuracy(y1, prs_ld_pleio_D1)
    print 'D1: LDpred_pleio AUC for the whole genome was: %0.4f' % auc1
    out_ld_pleio1.append(
        'D1: LDpred_inf_pleio AUC for the whole genome was: ' + str(auc1) +
        '\n')

    #   sp.savetxt('%s_y_'%(out_file_prefix)+'_D1.txt',y1)
    sp.savetxt('%s_prs-inf' % (out_file_prefix) + '_LDpred_pleio_D1.txt',
               prs_ld_pleio_D1)

    #Now calibration
    ff_inf = open('%s_inf_auc_' % (out_file_prefix) + '_LDpred_pleio_D1.txt',
                  "w")
    ff_inf.writelines(out_ld_pleio1)
    ff_inf.close()

    corr_inf2 = sp.corrcoef(y2, prs_ld_pleio_D2)[0, 1]
    r2_inf2 = corr_inf2**2
    #results_dict[p_str]['r2_pd']=r2_inf
    print 'D2: the R2 prediction accuracy (observed scale) of LDpred_inf_pleio was: %0.4f (%0.6f)' % (
        r2_inf2, ((1 - r2_inf2)**2) / num_individs2)
    out_ld_pleio2.append(
        'D2: the R2 prediction accuracy (observed scale) of LDpred_inf_pleio was: '
        + str(r2_inf2) + ' (' + str(((1 - r2_inf2)**2) / num_individs2) +
        ')\n')

    if corr_inf2 < 0:
        prs_ld_pleio_D2 = -1 * prs_ld_pleio_D2
    auc2 = pred_accuracy(y2, prs_ld_pleio_D2)
    print 'D2: LDpred_inf_pleio AUC for the whole genome was: %0.4f' % auc2
    out_ld_pleio2.append(
        'D2: LDpred_inf_pleio AUC for the whole genome was: ' + str(auc2) +
        '\n')

    #    sp.savetxt('%s_y_'%(out_file_prefix)+'_D2.txt',y2)
    sp.savetxt('%s_prs-inf' % (out_file_prefix) + '_LDpred_pleio_D2.txt',
               prs_ld_pleio_D2)

    #Now calibration
    ff_inf = open('%s_inf_auc_' % (out_file_prefix) + '_LDpred_pleio_D2.txt',
                  "w")
    ff_inf.writelines(out_ld_pleio2)
    ff_inf.close()
예제 #59
0
from scipy import floor, linspace, array, zeros, copy, loadtxt
from scipy.fftpack import rfft, irfft, dct, idct
from pylab import plot, show, xlabel, ylabel

dow2 = loadtxt('../../cpresources/dow2.txt', float)
# plot(dow2)
# show()

# Using the discrete Fourier transform
dow2_fourier = rfft(dow2)
N = len(dow2_fourier)
first_2_percent = zeros(N, float)
first_2_percent[0:int(N / 50)] = copy(dow2_fourier[0:int(N / 50)])
smoothed_dow2 = irfft(first_2_percent)

# using the discrete cosine transform
dow2_cos = dct(dow2)
n = len(dow2_cos)
first_cos_2_percent = zeros(n, float)
first_cos_2_percent[0:int(n / 50)] = copy(dow2_cos[0:int(n / 50)])
smoothed_cos_dow2 = idct(first_cos_2_percent) / (
    2 * n)  # need factor of 1 / 2n for normalization

plot(dow2, 'k')
plot(smoothed_dow2, 'g')
plot(smoothed_cos_dow2, 'r')
show()
예제 #60
0
def read_ansys_mesh(mesh_dir, filename, nodes_subset=[], elem_subset=[], debug=False):
    """Read an ansys .in file

    Only Linear lagrange elements supported.
    CODE NEEDS TO BE UPDATED FOR TET MESHES

    Keyword arguments:
    nodes_subset -- nodes to load (all if empty)
    elem_subset -- elements to load (all if empty)
    """

    # Load ansys .in file
    f = open(os.path.join(mesh_dir,filename), 'r')
    lines = f.readlines()
    num_lines = len(lines)

    # Initialising empty arrays in which to store node values and coordinates
    nodes_per_elem = 4 # for a tet mesh

    node_array = numpy.empty((0,1),int)
    node_coordinates = numpy.empty((0,3),int)
    element_array = numpy.empty((0,1),int)
    element_nodes_array = numpy.empty((0,nodes_per_elem),int)

    # Add nodes
    for line_idx, line in enumerate(lines):
        if line.split(' ,')[0] == 'NBLOCK':
            for node_line_idx in range(line_idx+2, num_lines+1):
                node_line = lines[node_line_idx]
                if node_line.split()[0] == 'N':
                    break
                else:
                    coordinates = node_line.split('       ')[-1]
                    x = float(coordinates[1:17])
                    y = float(coordinates[17:33])
                    z = float(coordinates[33:-1])
                    node_num = int(node_line.split()[0])
                    if node_num in nodes_subset or nodes_subset == []:

                        # Save node numbers (node_num) and coordinates (x, y, z) to arrays
                        node_array = numpy.append(node_array,node_num)
                        node_coordinates = numpy.append(node_coordinates,numpy.array([[x,y,z]]), axis = 0)
            break

    # Add elements
    for line_idx, line in enumerate(lines):
        if line.split(' ,')[0] == 'EBLOCK':
            for node_line_idx in range(line_idx+2, num_lines+1):
                node_line = lines[node_line_idx]
                if node_line.split() == []:
                    break
                else:
                    element_nodes = node_line.split()[11:-1]
                    element_nodes, idx_array = scipy.unique(scipy.array([int(node) for node in element_nodes]), return_index=True)
                    idx_array = [3 if idx==4 else idx for idx in idx_array]     
               
                    # Reordering the node arrangement
                    renumbered_nodes = scipy.copy(element_nodes)
                    for position, idx in enumerate(idx_array):
                        renumbered_nodes[idx] = element_nodes[position]

                    element_num = int(node_line.split()[10])
                    if element_num in elem_subset or elem_subset == []:

                        # Save element number (element_num) and element nodes (element_nodes) to arrays
                        element_array = numpy.append(element_array,element_num)
                        element_nodes_array = numpy.append(element_nodes_array,numpy.array([renumbered_nodes]), axis = 0)
            break

    inlet_node_array = numpy.empty((0,1),int)
    outlet_node_array = numpy.empty((0,1),int)

    # Find which nodes are part of the inlet
    for line_idx, line in enumerate(lines):
        if line.split(',')[0] == 'CMBLOCK' and line.split(',')[1] == 'MOUTH':
            for node_line_idx in range(line_idx+2, num_lines+1):
                node_line = lines[node_line_idx]
                if node_line.split(',')[0] == 'CMBLOCK':
                    break
                else:
                    inlet_node_row = node_line.split()
                    inlet_node_row = scipy.array([int(node) for node in inlet_node_row])
                    for inlet_node in inlet_node_row:
                        inlet_node_array = numpy.append(inlet_node_array,inlet_node)
            break

    # Find which nodes are part of the outlet
    for line_idx, line in enumerate(lines):
        if line.split(',')[0] == 'CMBLOCK' and line.split(',')[1] == 'OUTLET':
            for node_line_idx in range(line_idx+2, num_lines+1):
                node_line = lines[node_line_idx]
                if node_line.split(',')[0] == 'CMBLOCK':
                    break
                else:
                    outlet_node_row = node_line.split()
                    outlet_node_row = scipy.array([int(node) for node in outlet_node_row])
                    for outlet_node in outlet_node_row:
                        outlet_node_array = numpy.append(outlet_node_array,outlet_node)
            break

    # Return node number and coordinate arrays, and element number and element node arrays
    return node_array, node_coordinates, element_array, element_nodes_array, inlet_node_array, outlet_node_array