def process(args): Sfn = args['Sfn'] # Open matrix file in parallel mode Sf = h5py.File(Sfn, 'r') model_list = args['pdb_list'] atypes_t = Sf['atypes'][:][0] atypes_, rtypes = gu.choose_artypes(atypes_t) atypes_ = Sf.keys() try: atypes_.remove('origin') atypes_.remove('step') atypes_.remove('atypes') atypes_.remove('H') # we do not need hydrogens! except: pass atypes_ = set(atypes_) atypes = atypes_ excl = args['excl'] incl = args['incl'] if excl: excl = set(excl) atypes = atypes - excl if incl: incl = set(incl) atypes.intersection_update(incl) atypes = atypes if excl and incl: raise ('You can not use include and exclude options simultaneiously!') print(excl, incl, atypes) extractor = er.PepExtractor(**args) lM = len(model_list) # Init storage for matrices # Get file name # tSfn = 'tmp.' + Sfn tSfn = args['output'] tSf = h5py.File(tSfn, 'w') # fix of pos number // zlo pivot_pept_ = extractor.plist[0] pivot_pept = extractor.extract_result(pivot_pept_) num_poses = pivot_pept.numCoordsets() score = tSf.create_dataset('score', (lM, ), dtype=np.float) GminXYZ = Sf['origin'][:] step = Sf['step'][0] NUCS = set(Sf.keys()) protected = set(['origin', 'step']) NUCS -= protected gNUCS = dict() for i in NUCS: gNUCS[i] = Sf[i][:] for cm in tqdm.trange(lM): m_ = model_list[cm] S = prody.parsePDB(m_) mscore = 0.0 ac = 0 for a in S.iterAtoms(): try: atype = rtypes[(a.getResname(), a.getName())] except: continue if atype not in atypes: continue C = a.getCoords() adj = (C - GminXYZ) adj = (adj / step).astype(np.int) x, y, z = adj try: tscore = gNUCS[atype][x, y, z] except (IndexError, ValueError): continue mscore += tscore ac += 1 if ac > 0: mscore /= float(ac) score[cm] = mscore tscore = np.zeros((lM, ), dtype=[('name', 'S128'), ('score', 'f8')]) for n in range(lM): tscore['name'][n] = model_list[n] tscore['score'] = score[:] tscore.sort(order='score') tscore['score'] /= tscore['score'][-1] np.savetxt('a_score_table.csv', tscore[::-1], fmt="%s\t%.2f") tSf.close() Sf.close()
def process(args): step = args['step'] padding = max(gu.avdw.values()) * 2 * \ args['pad'] # Largest VdW radii - 1.7A box_config = dp.ConfigLedock(args['config']) GminXYZ = box_config.get_min_xyz() GminXYZ = gu.adjust_grid(GminXYZ, step, padding) box = box_config.get_box_size() L = box + padding N = np.ceil((L / step)).astype(np.int) GmaxXYZ = GminXYZ + N * step print('GMIN', GminXYZ, N) print('BOX', L) with open('box_coords.txt', 'w') as f: f.write('BOX: ' + ' '.join(GminXYZ.astype(np.str)) + '\n') f.write('STEP: %.2f\n' % step) f.write('NSTEPS: ' + ';'.join(N.astype(np.str)) + '\n') Sfn = args['Sfn'] # atypes_, rtypes = gu.choose_artypes(args['atypes']) atypes_ = set(gu.three_let) try: atypes_.remove('H') # we do not need hydrogens! except KeyError: pass atypes = tuple(atypes_) extractor = er.PepExtractor(**args) NUCS = gu.three_let iNUCS = dict(map(lambda x: ( x[1], x[0], ), enumerate(NUCS))) lnucs = len(NUCS) fNUCS = np.zeros((lnucs, ), dtype=np.int) # Init storage for matrices # Get file name tSf = dict() for i in NUCS: tSf[i] = np.zeros(N, dtype=np.float32) lM = len(extractor.plist) t0 = time.time() for cm in range(lM): m = extractor.plist[cm] t1 = time.time() dt = t1 - t0 t0 = t1 print('STEP: %d PERCENT: %.2f%% TIME: %s' % (cm, float(cm) / lM * 100, dt)) try: S = extractor.extract_result(m) except: print('ERROR: BAD PEPTIDE: %s' % m) continue lS = S.numCoordsets() for S_ in range(lS): for a in S.iterResidues(): a.setACSIndex(S_) try: atype = a.getResname().upper() except: continue if atype not in atypes: continue AminXYZ, Ashape = gu.get_bounding(a.getCoords(), padding, step) adj = (AminXYZ - GminXYZ) adj = (adj / step).astype(np.int) if (adj < 0).any(): continue if ((GmaxXYZ - (AminXYZ + Ashape * step)) < 0).any(): continue x, y, z = adj Agrid, AminXYZ = gu.process_residue(a, padding, step) try: tSf[atype][x:x + Agrid.shape[0], y:y + Agrid.shape[1], z:z + Agrid.shape[2]] += Agrid fNUCS[iNUCS[atype]] += 1 except: print(m, a) pass print('fNUCS ', fNUCS) nNUCS = np.zeros((lnucs, ), dtype=np.float32) ln = len(NUCS) for i in range(ln): mult = fNUCS[i] if mult > 0: ttSf = tSf[NUCS[i]] nmax = None nmax = np.max(ttSf) med = np.median(ttSf) ttSf[ttSf < (med)] = 0 ttSf /= mult nNUCS[i] = nmax / mult print(i, NUCS[i], nNUCS[i], nmax) nmax = np.max(nNUCS) # Open matrix file in parallel mode Sf = h5py.File(Sfn, 'w') for i in range(ln): if mult > 0: ttSf = tSf[NUCS[i]] ttSf /= nmax ttSf *= 100.0 tG = np.ceil(ttSf).astype(np.int8) Sf.create_dataset(NUCS[i], data=tG) else: print('Array is empty for: ', NUCS[i]) Gstep = np.array([step, step, step], dtype=np.float32) Sf.create_dataset('step', data=Gstep) Sf.create_dataset('origin', data=GminXYZ) # Sf.create_dataset('atypes', data=np.array([args['atypes'], ], dtype='S20')) print('Totgood: ', np.sum(fNUCS)) Sf.close()
def process(self, args=None): if not args: args = self.args NUCS = self.atypes iNUCS = dict(map(lambda x: ( x[1], x[0], ), enumerate(NUCS))) iNUCS = self.mpi.comm.bcast(iNUCS) lnucs = len(NUCS) fNUCS = np.zeros((lnucs, ), dtype=np.float) # Init storage for matrices # Get file name tSf = dict() for i in NUCS: tSf[i] = np.zeros(self.N, dtype=np.float) args['mpi'] = self.mpi extractor = er.PepExtractor(**args) lM = len(self.aplist) self.mpi.comm.Barrier() # if self.mpi.rank == 0: # pbar = tqdm(total=lM) tota_ = 0 totba_ = 0 for cm in range(lM): m = self.aplist[cm] print('Rank %d model %d of %d' % (self.mpi.rank, cm, lM)) # if self.mpi.rank == 0: # pbar.update(cm) try: S = extractor.extract_result(m) except: print('ERROR: BAD PEPTIDE: %s' % m) continue lS = S.numCoordsets() tlS = range(lS) if self.cluster is True: resc = S.select('not element H').getCoordsets() cl = 'NOSUP_SERIAL_CALCULATOR' mHandler = MatrixHandler() matrix = mHandler.createMatrix(resc, cl) mat = scipy.spatial.distance.squareform(matrix.get_data()) smatrix = (mat**2) * (-1) aff = AffinityPropagation(affinity='precomputed') aff_cluster = aff.fit(smatrix) tlS = aff_cluster.cluster_centers_indices_ if tlS is None: continue for S_ in tlS: S.setACSIndex(S_) for a in S.iterAtoms(): # skip hydrogens if a.getElement() == 'H': continue try: atype = self.rtypes[(a.getResname(), a.getName())] except: print('ATYPE not found', a.getResname(), a.getName()) if atype not in self.atypes: continue Agrid, AminXYZ = gu.process_atom(a, self.step) adj = (AminXYZ - self.GminXYZ) adj = (adj / self.step).astype(np.int) x, y, z = adj try: tSf[atype][x:x + Agrid.shape[0], y:y + Agrid.shape[1], z:z + Agrid.shape[2]] += Agrid fNUCS[iNUCS[atype]] += 1 tota_ += 1 except: # print(m, a) totba_ += 1 pass # if self.mpi.rank == 0: # pbar.close() self.mpi.comm.Barrier() if self.mpi.rank == 0: print('Collecting grids') fNUCS_ = self.mpi.comm.allreduce(fNUCS) nNUCS = np.zeros((lnucs, ), dtype=np.float) tota = self.mpi.comm.reduce(tota_) totba = self.mpi.comm.reduce(totba_) for i in range(lnucs): NUC_ = NUCS[i] if self.mpi.rank != 0: self.mpi.comm.Send(tSf[NUC_], dest=0, tag=i) elif self.mpi.rank == 0: for j in range(1, self.mpi.NPROCS): tG = np.empty(tSf[NUC_].shape, dtype=np.float) self.mpi.comm.Recv(tG, source=j, tag=i) tSf[NUC_] += tG nNUCS[i] = np.max(tSf[NUC_]) nNUCS_ = self.mpi.comm.bcast(nNUCS) self.mpi.comm.Barrier() # Allocate results file Sfn = args['Sfn'] if self.mpi.rank == 0: print('Saving data') # Sf.atomic = True nmax = bn.nanmax(np.divide(nNUCS_, fNUCS_)) Sf = h5py.File(Sfn, 'w') for i in range(lnucs): NUC_ = NUCS[i] iNUC_ = iNUCS[NUC_] mult = fNUCS_[iNUC_] if mult > 0.0: tG = tSf[NUC_] med = np.median(tG) tG[tG < (med)] = 0 tG /= float(mult) tG /= float(nmax) tG *= 100.0 tSf[NUC_] = tG else: print('Array is empty for: ', NUC_) Sf.create_dataset(NUC_, data=tSf[NUC_]) Gstep = np.array([self.step, self.step, self.step], dtype=np.float) Sf.create_dataset('step', data=Gstep) Sf.create_dataset('origin', data=self.GminXYZ) Sf.create_dataset('atypes', data=np.array([ args['atypes'], ], dtype='S20')) print('Total bad atoms %d of %d' % (totba, tota)) Sf.close() self.mpi.comm.Barrier() # Open matrix file in parallel mode self.database.close()
def process(args): Sfn = args['Sfn'] # Open matrix file in parallel mode Sf = h5py.File(Sfn, 'r') padding = max(gu.avdw.values()) * 2 * \ args['pad'] # Largest VdW radii - 1.7A atypes_ = Sf.keys() try: atypes_.remove('origin') atypes_.remove('step') atypes_.remove('atypes') atypes_.remove('H') # we do not need hydrogens! except: pass atypes_ = set(atypes_) atypes = atypes_ excl = args['excl'] incl = args['incl'] if excl: excl = set(excl) atypes = atypes - excl if incl: incl = set(incl) atypes.intersection_update(incl) atypes = atypes if excl and incl: raise ('You can not use include and exclude options simultaneiously!') print(excl, incl, atypes) extractor = er.PepExtractor(**args) lM = len(extractor.plist) # Init storage for matrices # Get file name # tSfn = 'tmp.' + Sfn tSfn = args['output'] tSf = h5py.File(tSfn, 'w') score = tSf.create_dataset('score', (lM * 20, ), dtype=np.float) GminXYZ = Sf['origin'][:] step = Sf['step'][0] NUCS = set(Sf.keys()) protected = set(['origin', 'step']) NUCS -= protected gNUCS = dict() for i in NUCS: gNUCS[i] = Sf[i][:] rk = gNUCS.keys()[0] GmaxXYZ = GminXYZ + np.array(gNUCS[rk].shape[:]) * step t0 = time.time() for cm in range(lM): m = extractor.plist[cm] t1 = time.time() dt = t1 - t0 t0 = t1 print('STEP: %d PERCENT: %.2f%% TIME: %s' % (cm, float(cm) / lM * 100, dt)) try: S = extractor.extract_result(m) except: print('ERROR: BAD PEPTIDE: %s' % m) continue lS = S.numCoordsets() for S_ in range(lS): mscore = 0.0 ac = 0 for a in S.iterResidues(): a.setACSIndex(S_) try: atype = a.getResname().upper() except: continue if atype not in atypes: continue AminXYZ, Ashape = gu.get_bounding(a.getCoords(), padding, step) adj = (AminXYZ - GminXYZ) adj = (adj / step).astype(np.int) x, y, z = adj if (adj < 0).any(): continue if ((GmaxXYZ - (AminXYZ + Ashape * step)) < 0).any(): continue try: tscore = gNUCS[atype][x:x + Ashape[0], y:y + Ashape[1], z:z + Ashape[2]] except (IndexError, ValueError): continue tscore = np.sum(tscore) mscore += tscore ac += 1 if ac > 0: mscore /= float(ac) mscore /= float(len(a)) score[cm * lS + S_] = mscore tscore = np.zeros((lM * lS, ), dtype=[('name', 'S128'), ('score', 'f8')]) for n in range(lM): for i in range(lS): tscore['name'][n * lS + i] = '%s_%02d.pdb' % (extractor.plist[n], i + 1) tscore['score'] = score[:] tscore.sort(order='score') tscore['score'] /= tscore['score'][-1] np.savetxt('r_score_table.csv', tscore[::-1], fmt="%s\t%.7f") tSf.close() Sf.close()
def process(self, args=None): if not args: args = self.args # Sfn = args['Sfn'] # Open matrix file in parallel mode # Sf = h5py.File(Sfn, 'r') args['mpi'] = self.mpi extractor = er.PepExtractor(**args) lM = len(self.aplist) dtype = np.dtype([ ('name', 'S10'), ('pnum', '<i4'), ('rnum', '<i4'), ('dist', '<f8'), ('hdist1', '<f8'), ('hdist2', '<f8'), ('BD', '<f8'), ('FL', '<f8'), ('AT', '<f8'), ('dir', 'S3'), ('aln', 'S20') ]) if self.mpi.rank == 0: m = self.aplist[0] lm = len(m) S = extractor.extract_result(m) lS = S.numCoordsets() Sf = h5py.File(args['out'], 'w') out = Sf.create_dataset( 'out', (len(self.plist) * lS * lm, ), dtype=dtype) Sf.close() self.mpi.comm.Barrier() Sf = h5py.File(args['out'], 'r+', driver='mpio', comm=self.mpi.comm) out = Sf['out'] # Init storage for matrices # Get file name # tSfn = 'tmp.' + Sfn # tSfn = args['output'] # stubs = { # 'ACE': 'X', # 'NME': 'Z', # } a = prody.parsePDB(args['receptor']) OG = a.select('resnum 151 name SG') Ob = a.select('resnum 168 and name O') ND1 = a.select('resnum 46 and name NE2') OXH = a.select("name N resnum 149 150 151") t0 = time.time() for cm in range(lM): m = self.aplist[cm] lm = len(m) t1 = time.time() dt = t1 - t0 t0 = t1 print('STEP: %d PERCENT: %.2f%% TIME: %s' % ( cm, float(cm) / lM * 100, dt)) try: S = extractor.extract_result(m) except: print('ERROR: BAD PEPTIDE: %s' % m) continue lS = S.numCoordsets() for S_ in range(lS): S.setACSIndex(S_) tC = S.select('name C') dist = np.inf rnum = None for C in tC.iterAtoms(): rnum = C.getResnum() if rnum == 1: continue O = S.select('resnum %i and name O' % rnum) Nl = S.select('resnum %i and name N' % (rnum)) N = S.select('resnum %i and name N' % (rnum + 1)) C_ = C.getCoords() O_ = O.getCoords()[0] N_ = N.getCoords()[0] dist = prody.calcDistance(C, OG)[0] hdist1 = np.min(prody.calcDistance(OXH, O)) hdist2 = np.min(prody.calcDistance(Ob, Nl)) nC_ = np.cross((C_ - N_), (O_ - C_)) nC_ /= np.linalg.norm(nC_) nC_ = nC_ + C_ nC_ = nC_.reshape(1, 3) nC = C.copy() nC.setCoords(nC_) BD = prody.calcAngle(OG, C, O) FL = prody.calcDihedral(OG, nC, C, O) AT = prody.calcAngle(ND1, OG, C) angle_ = prody.calcDihedral(ND1, OG, C, N) # angle_ = prody.calcDistance(Od, N)[0] \ # - prody.calcDistance(Od, C)[0] if angle_ < 0: DIR = 'F' else: DIR = 'R' s = 'X' + m + 'Z' pref = '' if DIR == 'F': seq = s pref = 6 - rnum else: seq = s[::-1] pref = rnum suf = 12 - (pref + len(seq)) seq = '-' * pref + seq + '-' * suf # outfmt = "%-10s\t%d\t%6.2f\t%6.2f%6.2f\t%6.2f\t" \ # "%6.2f\t%6.2f\t%3s\t%12s\n" # outstr = outfmt % ( # ('%s_%02d' % (m, S_ + 1), # rnum, dist, hdist1, hdist2, BD, FL, AT, # DIR, seq) # ) outdata = (m, S_ + 1, rnum, dist, hdist1, hdist2, BD, FL, AT, DIR, seq) ind = (self.tb + cm) * lS * lm + S_ * lm + rnum - 2 out[ind] = outdata self.database.close() Sf.close()
def process(args): # Sfn = args['Sfn'] # Open matrix file in parallel mode # Sf = h5py.File(Sfn, 'r') extractor = er.PepExtractor(**args) lM = len(extractor.plist) # Init storage for matrices # Get file name # tSfn = 'tmp.' + Sfn # tSfn = args['output'] stubs = { 'ACE': 'X', 'NME': 'Z', } a = prody.parsePDB(args['receptor']) SG = a.select('resnum 241 name OG') O = a.select('resnum 262 and name O') ND1 = a.select('resnum 79 and name NE2') tHN_ = a.select("name N resnum 239 240 241") tHN = np.average(tHN_.getCoords(), axis=0) t0 = time.time() out = open(args['out'], 'w') for cm in range(lM): m = extractor.plist[cm] t1 = time.time() dt = t1 - t0 t0 = t1 print('STEP: %d PERCENT: %.2f%% TIME: %s' % ( cm, float(cm) / lM * 100, dt)) try: S = extractor.extract_result(m) except: print('ERROR: BAD PEPTIDE: %s' % m) continue lS = S.numCoordsets() for S_ in range(lS): S.setACSIndex(S_) tC = S.select('name C') dist = np.inf for c in tC.iterAtoms(): dist_ = prody.calcDistance(c, SG)[0] if dist_ < dist: dist = dist_ rnum = c.getResnum() C = S.select('resnum %i and name C' % rnum) CO = S.select('resnum %i and name O' % rnum) # N = S.select('resnum %i and name N' % rnum_) N_ = S.select('resnum %i and name N' % (rnum + 1)) angleAttack = prody.calcAngle(ND1, SG, C) angle_ = prody.calcDistance(O, N_)[0] - prody.calcDistance(O, C)[0] if angle_ > 0: DIR = 'F' else: DIR = 'R' s = 'X' + m + 'Z' pref = '' if DIR == 'F': seq = s pref = 5 - rnum else: seq = s[::-1] pref = rnum suf = 10 - (pref + len(seq)) seq = '-' * pref + seq + '-' * suf # hangle = prody.calcAngle(tHN, C, N) hdist = np.linalg.norm(tHN - CO.getCoords()) # HN = b.select('resnum %i and name H' % rnum) # N_B = b.select('resnum %i and name N' % (rnum + 1)) # HN_B = b.select('resnum %i and name H' % (rnum + 1)) # distN = prody.calcDistance(O, N)[0] # angleHN = prody.calcAngle(O, HN, N) # distN_B = prody.calcDistance(O, N_B)[0] # angleHN_B = prody.calcAngle(O, HN_B, N_B) outstr = "%-10s\t%6.2f\t%6.2f\t%6.2f\t%3s\t%12s\t%d\n" % ( ('%s_%02d' % (m, S_ + 1), dist, angleAttack, hdist, # distN, angleHN, # distN_B, angleHN_B, DIR, seq, rnum) ) out.write(outstr) # tSf.close() # Sf.close() # os.remove(tSfn) out.close()