def score(aPDB,aFASTA,exe=None,logf=None): ''' Gets alignment score irregardless of alignment method. ''' scores = [] # Get PDB structure. p = PDBnet.PDBstructure(aPDB) # Get length of alignment. alignlen = len(p) # See what scores need to be done. if exe: scoresToDo = exe.scoresToDo if not scoresToDo: scoresToDo = SCORE_TYPES else: scoresToDo = SCORE_TYPES rrmsd,rpval,rmsd,tmsc,tpval,gdt = None,None,None,None,None,None # Get RRMSD and RMSD if length of alignment >= 100 residues. if 'RRMSD' in scoresToDo or 'RMSD' in scoresToDo: rrmsd, rmsd = homology.rrmsd(aPDB,aFASTA,True) if not exe or not exe.scpdbs or alignlen >= 100: rpval = 1 - truncnorm.sf(rrmsd, 0, 1, loc=0.177, scale=0.083)#normpdf(rrmsd,0.177,0.083) elif exe and logf and 'RRMSD' in scoresToDo: # Perform alignments in order to generate null distribution. logf.setTotalNum(logf.totalnum+2*(len(pdbli)+1)) logf.writeTemporary( 'Generating null distribution from SCOP for %s...' % (aPDB)) scfolders = [] run(exe.scpdbs,logf,ref=aPDB,exe=exe,quick=None) alignfldr = IO.getFileName(aPDB) o = open('%s/ref.pickl' % (alignfldr)) dic, _, _ = cPickle.load(o) vals = dic.values() o.close() pdf = gaussian_kde(vals) rpval = pdf(rrmsd) # Get GDT and TMscore. if 'TMscore' in scoresToDo: tmsc = p.tmscore(aFASTA) tpval = 1 - math.exp(-math.exp((0.1512-tmsc)/0.0242)) if 'GDT' in scoresToDo: gdt = p.gdt(aFASTA) # Add them to list in order as given. for it in scoresToDo: if it == 'RRMSD': scores.append(alignmentScore('RRMSD',rrmsd,rpval)) elif it == 'RMSD': scores.append(alignmentScore('RMSD',rmsd)) elif it == 'TMscore': scores.append(alignmentScore('TMscore',tmsc,tpval)) elif it == 'GDT': scores.append(alignmentScore('GDT',gdt)) # Return the scoring values. return scores
def __init__(self, gsim_by_imt, truncation=6.0, nsample=100): super().__init__(gsim_by_imt) self.imts = [] for imt in self.gsim_by_imt: self.REQUIRES_SITES_PARAMETERS = ( self.REQUIRES_SITES_PARAMETERS | self.gsim_by_imt[imt].REQUIRES_SITES_PARAMETERS) self.REQUIRES_RUPTURE_PARAMETERS = ( self.REQUIRES_RUPTURE_PARAMETERS | self.gsim_by_imt[imt].REQUIRES_RUPTURE_PARAMETERS) self.REQUIRES_DISTANCES = ( self.REQUIRES_DISTANCES | self.gsim_by_imt[imt].REQUIRES_DISTANCES) self.imts.append(imt) # Geotechnical hazard requires the definition of an integral of # the expected displacement conditional upon the shaking at the surface # As this is integrated numerically we can pre-calculate the integral # bins according to the specific level of truncation and the number # of samples. In the geotechnical modules the bins and their # probabilities are integrated upon. xvals = np.linspace(-truncation, truncation, nsample + 1) self.truncnorm_probs = truncnorm.sf(xvals, -truncation, truncation, loc=0., scale=1.) self.truncnorm_probs = self.truncnorm_probs[:-1] -\ self.truncnorm_probs[1:] self.epsilons = (xvals[:-1] + xvals[1:]) / 2.
def get_p_val(self, stat, eta, l_thres, u_thres, mu=0): """Calculation of one p-value for one-sided hypothesis test""" sigma = np.dot(eta, np.dot(self.cov, eta)) scale = np.sqrt(sigma) p_val = truncnorm.sf(stat, (l_thres - mu) / scale, (u_thres - mu) / scale, loc=mu, scale=scale) return p_val
def pretty_print(self): a,b = (0-self.constraint.mean)/self.constraint.std,(1e6-self.constraint.mean)/self.constraint.std ub_survival = truncnorm.sf(self.allocated_ub, a,b,loc=self.constraint.mean, scale=self.constraint.std) lb_mass = truncnorm.cdf(self.allocated_lb, a,b,loc=self.constraint.mean, scale=self.constraint.std) print(self.constraint.name + ": [" + str(self.allocated_lb) + "," + str( self.allocated_ub) + "] (Risk: " + str(lb_mass+ub_survival) + ")")
def usrf(status, x, needF, neF, F, needG, neG, G, cu, iu, ru): """ ================================================================== Computes the nonlinear objective and constraint terms for the problem. ================================================================== """ # print('called usrfun with ' + str(len(G)) + ' non-linear variables') if (needF[0] != 0): # the second last row is for chance constraint F[neF[0] - 2] = 0 if cc_var > 0: F[neF[0] - 2] += x[cc_var] for idx in range(0, int(len(G) / 2)): mean = prob_means[idx] sigma = prob_stds[idx] lb_var = prob_vars[2 * idx] ub_var = prob_vars[2 * idx + 1] # print("Mean: " + str(mean) + " / Sigma: " + str(sigma)) a, b = (0 - mean) / sigma, (1e6 - mean) / sigma ub_survival = truncnorm.sf(x[ub_var], a, b, loc=mean, scale=sigma) lb_mass = truncnorm.cdf(x[lb_var], a, b, loc=mean, scale=sigma) F[neF[0] - 2] += ub_survival + lb_mass # print('Updating F['+str(neF[0] - 2)+']: ' + str(x[lb_var]) + '-' + str(x[ub_var]) + ': ' + str(lb_mass) + "+" +str(ub_survival) + "="+str(F[neF[0] - 2])) if (needG[0] != 0): # Compute the partial derivatives of the chance constraint # over the lower and upper bounds of the # probabilistic durations for idx in range(0, int(len(G) / 2)): mean = prob_means[idx] sigma = prob_stds[idx] lb_var = prob_vars[2 * idx] ub_var = prob_vars[2 * idx + 1] a, b = (0 - mean) / sigma, (1e6 - mean) / sigma # For the lower bound, the derivative is the Gaussian pdf G[2 * idx] = truncnorm.pdf(x[lb_var], a, b, loc=mean, scale=sigma) # For the upper bound, it is the negation of the Gaussian pdf G[2 * idx + 1] = -1 * truncnorm.pdf(x[ub_var], a, b, loc=mean, scale=sigma)
def usrf(status, x, needF, neF, F, needG, neG, G, cu, iu, ru): """ ================================================================== Computes the nonlinear objective and constraint terms for the problem. ================================================================== """ # print('called usrfun with ' + str(len(G)) + ' non-linear variables') if (needF[0] != 0): # the second last row is for chance constraint F[neF[0] - 2] = 0 if cc_var > 0: F[neF[0] - 2] += x[cc_var] for idx in range(0, int(len(G)/ 2)): mean = prob_means[idx] sigma = prob_stds[idx] lb_var = prob_vars[2 * idx] ub_var = prob_vars[2 * idx+1] # print("Mean: " + str(mean) + " / Sigma: " + str(sigma)) a, b = (0 - mean) / sigma, (1e6 - mean) / sigma ub_survival = truncnorm.sf(x[ub_var],a,b, loc=mean, scale=sigma) lb_mass = truncnorm.cdf(x[lb_var],a,b, loc=mean, scale=sigma) F[neF[0] - 2] += ub_survival + lb_mass # print('Updating F['+str(neF[0] - 2)+']: ' + str(x[lb_var]) + '-' + str(x[ub_var]) + ': ' + str(lb_mass) + "+" +str(ub_survival) + "="+str(F[neF[0] - 2])) if (needG[0] != 0): # Compute the partial derivatives of the chance constraint # over the lower and upper bounds of the # probabilistic durations for idx in range(0, int(len(G) / 2)): mean = prob_means[idx] sigma = prob_stds[idx] lb_var = prob_vars[2 * idx] ub_var = prob_vars[2 * idx + 1] a, b = (0 - mean) / sigma, (1e6 - mean) / sigma # For the lower bound, the derivative is the Gaussian pdf G[2 * idx] = truncnorm.pdf(x[lb_var], a,b, loc=mean, scale=sigma) # For the upper bound, it is the negation of the Gaussian pdf G[2 * idx + 1] = -1 * truncnorm.pdf(x[ub_var], a,b, loc=mean, scale=sigma)
def pretty_print(self): a, b = (0 - self.constraint.mean) / self.constraint.std, ( 1e6 - self.constraint.mean) / self.constraint.std ub_survival = truncnorm.sf(self.allocated_ub, a, b, loc=self.constraint.mean, scale=self.constraint.std) lb_mass = truncnorm.cdf(self.allocated_lb, a, b, loc=self.constraint.mean, scale=self.constraint.std) print(self.constraint.name + ": [" + str(self.allocated_lb) + "," + str(self.allocated_ub) + "] (Risk: " + str(lb_mass + ub_survival) + ")")
def psi_inf(A,b,eta, mu, cov, z): """ Returns the p-value of the truncated normal. The mean, variance, and truncated points [a,b] is determined by Lee et al 2016. """ l_thres, u_thres= calculate_threshold(z, A, b, eta, cov) sigma2 = np.matmul(eta,np.matmul(cov,eta)) scale = np.sqrt(sigma2) params = {"u_thres":u_thres, "l_thres":l_thres, "mean": np.matmul(eta,mu), "scale":scale, } ppf = lambda x: truncnorm_ppf(x, l_thres, u_thres, loc=np.matmul(eta,mu), scale=scale) sf = lambda x: truncnorm.sf(x, l_thres/scale, u_thres/scale, scale=scale) return ppf, sf