def mergeProfiles(self, p0, p1, maxOverlap=3): """ Merge profile p0 with profile p1, as long as they overlap in at most maxOverlap positions @param p0: profile @type p0: [float] @param p1: profile @type p1: [float] @param maxOverlap: maximal allowed overlap between profiles @type maxOverlap: int @return: array @rtype: """ p0 = self.__list2array(p0) p1 = self.__list2array(p1) overlap = N0.greater(N0.greater(p0, 0) + N0.greater(p1, 0), 1) if N0.sum(overlap) <= maxOverlap: ## one of the two profiles will in most cases not belong to these ## positions. We can't decide which one is wrong, let's eliminate ## both values. Alternatively we could keep one, or the average, .. N0.put(p1, N0.nonzero(overlap), 0) N0.put(p0, N0.nonzero(overlap), 0) p0 = p0 + p1 return p0
def __exposedResidues( self, ASA_values, sidechainCut=0.0, backboneCut=0.0, totalCut=0.0 ): """ Decide what is a surface exposed residue and what is not. sidechainCut, backboneCut, totalCut - float, cutoff value for what will be considered as a exposed residue. All three values have to pass the test. @param ASA_values: array with ASA values for side chains, backbone and total calculated in L{__read_residueASA}. @type ASA_values: array @param sidechainCut: cutoff ASA value for considering the side chain to consider thew residue being exposed (default: 0.0) @type sidechainCut: float @param backboneCut: cutoffvalue for back bone ASA @type backboneCut: float @param totalCut: cutoff for total ASA @type totalCut: float @return: residue mask, where 0 = burried @rtype: [1|0] """ col_0 = N0.greater( N0.transpose(ASA_values)[0], totalCut ) col_1 = N0.greater( N0.transpose(ASA_values)[1], backboneCut ) col_2 = N0.greater( N0.transpose(ASA_values)[2], sidechainCut ) col_012 = N0.concatenate( ([col_0],[col_1],[col_2]) ) exposedList = N0.greater(N0.sum(col_012), 0) return exposedList
def __exposedResidues(self, ASA_values, sidechainCut=0.0, backboneCut=0.0, totalCut=0.0): """ Decide what is a surface exposed residue and what is not. sidechainCut, backboneCut, totalCut - float, cutoff value for what will be considered as a exposed residue. All three values have to pass the test. @param ASA_values: array with ASA values for side chains, backbone and total calculated in L{__read_residueASA}. @type ASA_values: array @param sidechainCut: cutoff ASA value for considering the side chain to consider thew residue being exposed (default: 0.0) @type sidechainCut: float @param backboneCut: cutoffvalue for back bone ASA @type backboneCut: float @param totalCut: cutoff for total ASA @type totalCut: float @return: residue mask, where 0 = burried @rtype: [1|0] """ col_0 = N0.greater(N0.transpose(ASA_values)[0], totalCut) col_1 = N0.greater(N0.transpose(ASA_values)[1], backboneCut) col_2 = N0.greater(N0.transpose(ASA_values)[2], sidechainCut) col_012 = N0.concatenate(([col_0], [col_1], [col_2])) exposedList = N0.greater(N0.sum(col_012), 0) return exposedList
def mergeProfiles( self, p0, p1, maxOverlap=3 ): """ Merge profile p0 with profile p1, as long as they overlap in at most maxOverlap positions @param p0: profile @type p0: [float] @param p1: profile @type p1: [float] @param maxOverlap: maximal allowed overlap between profiles @type maxOverlap: int @return: array @rtype: """ p0 = self.__list2array( p0 ) p1 = self.__list2array( p1 ) overlap = N0.greater( N0.greater(p0,0) + N0.greater(p1,0), 1 ) if N0.sum( overlap ) <= maxOverlap: ## one of the two profiles will in most cases not belong to these ## positions. We can't decide which one is wrong, let's eliminate ## both values. Alternatively we could keep one, or the average, .. N0.put( p1, N0.nonzero( overlap ), 0 ) N0.put( p0, N0.nonzero( overlap ), 0 ) p0 = p0 + p1 return p0
def __checkProfileIntegrity( self, profile, upperLimit=1.0, lowerLimit=-1.0): """ In some cases SurfaceRacer generates incorrect curvature values for some atoms. This function sets values outside a given range to 0 @param profile: profile name @type profile: str @param upperLimit: upper limit for a valid value (default: 1.0) @type upperLimit: float @param lowerLimit: lower limit for a valid value (default: -1.0) @type lowerLimit: float @return: profile with inspected values @rtype: [float] """ mask = N0.greater( profile, upperLimit ) mask += N0.less( profile, lowerLimit ) for i in N0.nonzero(mask): print 'WARNING! Profile value %.2f set to O\n'%profile[i] profile[i] = 0 return profile
def __categorizeHexSurf(self, cutoff=0.1): """ Compare complexes of list to native complex to see if their contact surfaces overlapp with the native complex. @param cutoff: fraction cutoff for defining a overlap (default: 0.1) @type cutoff: float @return: list of len(self.hexContacts) overlapping with native contact surface of lig and rec (0 - no overlap, 1 - rec OR lig overlapps, 2- rec AND lig overlapps) @rtype: [0|1|2] """ result = [ self.com.fractionNativeSurface( c, self.contacts ) for c in self.hexContacts ] result = [ N0.sum( N0.greater( o, cutoff ) ) for o in result ] return result
def __find_intervals(self, l): l = N0.array(l) l = N0.take(l, N0.argsort(l)) globals().update(locals()) break_points = N0.nonzero(N0.greater(l[1:] - l[:-1], 1)) start = 0 intervals = [] for i in range(len(break_points)): index = break_points[i] intervals.append(tuple(N0.take(l, range(start, index + 1)))) start = index + 1 intervals.append(tuple(l[start:])) return intervals
def memberFrames(self, threshold=0.): """ Get indices of all frames belonging to each cluster. Each frame is guaranteed to belong, at least, to the cluster for which it has its maximum membership. If threshold > 0, it can additionally pop up in other clusters. @param threshold: minimal cluster membership or 0 to consider only max membership (default: 0) @type threshold: float @return: n_cluster, lst of lst of int, frame indices @rtype: [[int]] """ ## best cluster for each frame msm = self.memberships() maxMemb = N0.argmax(msm, 0) r = [ N0.nonzero(N0.equal(maxMemb, i)) for i in range(0, self.n_clusters) ] r = [x.tolist() for x in r] ## same thing but now taking all above threshold ## -> same frame can end up in several clusters if threshold > 0.: r2 = [N0.nonzero(N0.greater(l, threshold)) for l in msm] ## add only additional frames for i in range(0, len(r)): try: frames = r[i].tolist() except: frames = r[i] r[i] = frames + [fr for fr in r2[i] if fr not in r[i]] ## sort frames within each cluster by their membership r = [self.membershipSort(r[i], i) for i in range(0, len(r))] return r
def identities(self, aln_dictionary): """ Create a dictionary that contains information about all the alignments in the aln_dictionary using pairwise comparisons. @param aln_dictionary: alignment dictionary @type aln_dictionary: dict @return: a dictionary of dictionaries with the sequence name as the top key. Each sub dictionary then has the keys: - 'name' - str, sequence name - 'seq' - str, sequence of - 'template_info' - list of the same length as the 'key' sequence excluding deletions. The number of sequences in the multiple alignment that contain information at this position. - 'ID' - dict, sequence identity in percent comparing the 'key' sequence to all other sequences (excluding deletions) - 'info_ID' - dict, same as 'ID' but compared to the template sequence length (i.e excluding deletions and insertions in the 'key' sequence ) - 'cov_ID' - dict, same as 'info_ID' but insertions are defined comparing to all template sequences (i.e where 'template_info' is zero ) @rtype: dict """ ## loop over all sequences in alignment for i in self.sequences_name: template_names = [] ## don't compare to self, remove current sequence for name in self.sequences_name: if(name is not i): template_names.append(name) ## loop over all sequences in alignment info_ID, ID, cov_ID = {}, {}, {} for y in self.sequences_name: ## identity = 0 ## info_identity = 0 ## cov_identity = 0 nb_of_identities = 0 nb_of_template = 0 template_info = [] nb_of_residues = 0 ## loop over the full length of the alignment for w in range(len(aln_dictionary["target"]["seq"])): ## skip deletions nb_of_info_res=0 if(aln_dictionary[i]["seq"][w] is not '-'): nb_of_residues += 1 ## count identities if(aln_dictionary[i]["seq"][w] == \ aln_dictionary[y]["seq"][w]): nb_of_identities += 1 ## length excluding insertions if(aln_dictionary[y]["seq"][w] is not '-'): nb_of_template += 1 ## loop over all sequences but self for z in template_names: ## count how many sequences contain alignment ## information at this position if(aln_dictionary[z]["seq"][w] is not '-'): nb_of_info_res += 1 template_info.append(nb_of_info_res) ## number of positions in which any other sequence ## contains alignment information nb_cov_res = N0.sum( N0.greater(template_info, 0) ) ## calculate identities info_ID[y] = ID[y] = cov_ID[y] = 0 ## RAIK: Hack, nb_of_... can turn 0 for fragmented alignments if nb_of_template: info_ID[y] = 100. * nb_of_identities / nb_of_template if nb_of_residues: ID[y] = 100. * nb_of_identities / nb_of_residues if nb_cov_res: cov_ID[y] = 100. * nb_of_identities / nb_cov_res aln_dictionary[i]["info_ID"] = info_ID aln_dictionary[i]["ID"] = ID aln_dictionary[i]["cov_ID"] = cov_ID aln_dictionary[i]["template_info"] = template_info return aln_dictionary
def identities(self, aln_dictionary): """ Create a dictionary that contains information about all the alignments in the aln_dictionary using pairwise comparisons. @param aln_dictionary: alignment dictionary @type aln_dictionary: dict @return: a dictionary of dictionaries with the sequence name as the top key. Each sub dictionary then has the keys: - 'name' - str, sequence name - 'seq' - str, sequence of - 'template_info' - list of the same length as the 'key' sequence excluding deletions. The number of sequences in the multiple alignment that contain information at this position. - 'ID' - dict, sequence identity in percent comparing the 'key' sequence to all other sequences (excluding deletions) - 'info_ID' - dict, same as 'ID' but compared to the template sequence length (i.e excluding deletions and insertions in the 'key' sequence ) - 'cov_ID' - dict, same as 'info_ID' but insertions are defined comparing to all template sequences (i.e where 'template_info' is zero ) @rtype: dict """ ## loop over all sequences in alignment for i in self.sequences_name: template_names = [] ## don't compare to self, remove current sequence for name in self.sequences_name: if (name is not i): template_names.append(name) ## loop over all sequences in alignment info_ID, ID, cov_ID = {}, {}, {} for y in self.sequences_name: ## identity = 0 ## info_identity = 0 ## cov_identity = 0 nb_of_identities = 0 nb_of_template = 0 template_info = [] nb_of_residues = 0 ## loop over the full length of the alignment for w in range(len(aln_dictionary["target"]["seq"])): ## skip deletions nb_of_info_res = 0 if (aln_dictionary[i]["seq"][w] is not '-'): nb_of_residues += 1 ## count identities if(aln_dictionary[i]["seq"][w] == \ aln_dictionary[y]["seq"][w]): nb_of_identities += 1 ## length excluding insertions if (aln_dictionary[y]["seq"][w] is not '-'): nb_of_template += 1 ## loop over all sequences but self for z in template_names: ## count how many sequences contain alignment ## information at this position if (aln_dictionary[z]["seq"][w] is not '-'): nb_of_info_res += 1 template_info.append(nb_of_info_res) ## number of positions in which any other sequence ## contains alignment information nb_cov_res = N0.sum(N0.greater(template_info, 0)) ## calculate identities info_ID[y] = ID[y] = cov_ID[y] = 0 ## RAIK: Hack, nb_of_... can turn 0 for fragmented alignments if nb_of_template: info_ID[y] = 100. * nb_of_identities / nb_of_template if nb_of_residues: ID[y] = 100. * nb_of_identities / nb_of_residues if nb_cov_res: cov_ID[y] = 100. * nb_of_identities / nb_cov_res aln_dictionary[i]["info_ID"] = info_ID aln_dictionary[i]["ID"] = ID aln_dictionary[i]["cov_ID"] = cov_ID aln_dictionary[i]["template_info"] = template_info return aln_dictionary
def createHexInp(recPdb, recModel, ligPdb, ligModel, comPdb=None, outFile=None, macDock=None, silent=0, sol=512): """ Prepare a Hex macro file for the docking of the receptor(s) against ligand(s). @param recPdb: hex-formatted PDB @type recPdb: str @param recModel: hex-formatted PDB @type recModel: str @param ligPdb: PDBModel, get distances from this one @type ligPdb: PDBModel @param ligModel: PDBModel, getdistances from this one @type ligModel: PDBModel @param comPdb: reference PDB @type comPdb: str @param outFile: base of file name for mac and out @type outFile: str @param macDock: None -> hex decides (from the size of the molecule), 1 -> force macroDock, 0-> force off (default: None) @type macDock: None|1|0 @param silent: don't print distances and macro warnings (default: 0) @type silent: 0|1 @param sol: number of solutions that HEx should save (default: 512) @type sol: int @return: HEX macro file name, HEX out generated bu the macro, macro docking status @rtype: str, str, boolean """ ## files and names recCode = t.stripFilename(recPdb)[0:4] ligCode = t.stripFilename(ligPdb)[0:4] outFile = outFile or recCode + '-' + ligCode ## hex macro name macName = t.absfile(outFile + '_hex.mac') ## hex rotation matrix output name outName_all = t.absfile(outFile + '_hex.out') outName_clust = t.absfile(outFile + '_hex_cluster.out') ## add surface profiles if not there if not recModel.atoms.has_key('relAS'): #t.flushPrint('\nCalculating receptor surface profile') rec_asa = PDBDope(recModel) rec_asa.addSurfaceRacer() if not ligModel.atoms.has_key('relAS'): #t.flushPrint('\nCalculating ligand surface profile') lig_asa = PDBDope(ligModel) lig_asa.addSurfaceRacer() ## surface masks, > 95% exposed rec_surf_mask = N0.greater(recModel.profile('relAS'), 95) lig_surf_mask = N0.greater(ligModel.profile('relAS'), 95) ## maximun and medisn distance from centre of mass to any surface atom recMax, recMin = centerSurfDist(recModel, rec_surf_mask) ligMax, ligMin = centerSurfDist(ligModel, lig_surf_mask) ## approxinate max and min center to centre distance maxDist = recMax + ligMax minDist = recMin + ligMin ## molecular separation and search range to be used in the docking molSep = (maxDist + minDist) / 2 molRange = 2 * (maxDist - molSep) if not silent: print 'Docking setup: %s\nRecMax: %.1f RecMin: %.1f\nLigMax: %.1f LigMin: %.1f\nMaxDist: %.1f MinDist: %.1f\nmolecular_separation: %.1f r12_range: %.1f\n' % ( outFile, recMax, recMin, ligMax, ligMin, maxDist, minDist, molSep, molRange) if recMax > 30 and ligMax > 30 and not silent: print '\nWARNING! Both the receptor and ligand radius is ', print 'greater than 30A.\n' ## determine docking mode to use macroDocking = 0 if macDock == None: if recMax > 35 and not silent: print '\nReceptor has a radius that exceeds 35A ', print '-> Macro docking will be used' macroDocking = 1 else: macroDocking = macDock ##################### ## write macro file macOpen = open(macName, 'w') macOpen.write('# -- ' + macName + ' --\n') macOpen.write(' \n') macOpen.write('open_receptor ' + t.absfile(recPdb) + '\n') macOpen.write('open_ligand ' + t.absfile(ligPdb) + '\n') if comPdb and comPdb[-4:] == '.pdb': macOpen.write('open_complex ' + comPdb + '\n') macOpen.write('\n') head = """ # -------------- general settings ---------------- disc_cache 1 # disc cache on (0 off) docking_sort_mode 1 # Sort solutions by cluster (0 by energy) docking_cluster_mode 1 # Display all clusters (0 display best) docking_cluster_threshold 2.00 # docking_cluster_bumps number # ------------ molecule orientation -------------- molecule_separation %(separation)i commit_view """ % ({ 'separation': round(molSep) }) macro = """ # -------------- macro docking ------------------- macro_min_coverage 25 macro_sphere_radius 15 macro_docking_separation 25 activate_macro_model""" tail = """ # -------------- docking setup ------------------- docking_search_mode 0 # full rotational search receptor_range_angle 180 # 0, 15, 30, 45, 60, 75, 90, 180 docking_receptor_samples 720 # 362, 492, 642, 720, 980, 1280 ligand_range_angle 180 docking_ligand_samples 720 twist_range_angle 360 # 0, 15, 30, 60, 90, 180, 360 docking_alpha_samples 128 # 64, 128, 256 r12_step 0.500000 # 0.1, 0.2, 0.25, 0.5, 0.75, 1, 1.5, 2 r12_range %(range)i docking_radial_filter 0 # Radial Envelope Filter - None grid_size 0.600 # 0.4, 0.5, 0.6, 0.75, 1.0 # docking_electrostatics 0 # use only surface complimentarity docking_electrostatics 1 # use electrostatic term for scoring clusters docking_main_scan 16 # docking_main_search 26 max_docking_solutions %(nr_sol)i # number of solutions to save # -------------- post-processing ---------------- docking_refine 0 # None # docking_refine 1 # Backbone Bumps # docking_refine 2 # MM energies # docking_refine 3 # MM minimization # ---------------- run docking ------------------ activate_docking # save_docking %(output_clust)s # save_range 1 512 ./ dock .pdb # ------------ also save all solutions ---------- docking_sort_mode 0 # Sort solutions by energy (1 by cluster) save_docking %(output_all)s""" \ %({'range':round(molRange), 'output_all':outName_all, 'nr_sol':int(sol), 'output_clust':outName_clust} ) macOpen.writelines(head) ## macro docking will not work with multiple models, if both are added to ## the hex macro file - macrodocking will be skipped during the docking run if macroDocking: macOpen.writelines(macro) macOpen.writelines(tail) macOpen.close() return macName, outName_all, macroDocking