def get_saxs_factors(pdb): factors = [] pdb = rmsdlib.read_pdb(pdb) for a in pdb.atoms(): atomtype = int(a.line[57:59]) w = 0.0 if a.name in skipped: continue e = a.name[0] if e == "H": continue f = saxs_factors[e] factors.append(f) return np.array(factors,dtype="float32")
def get_coordinates(pdb, ignore_weightless): pdb = rmsdlib.read_pdb(pdb) if ignore_weightless: coors = [] for a in pdb.atoms(): if a.name in skipped: continue e = a.name[0] if e == "H": continue coors.append((a.x, a.y, a.z)) else: coors = pdb.coordinates() return np.array(coors, dtype="float32")
def get_dummy_factors(pdb): factors = [] pdb = rmsdlib.read_pdb(pdb) for a in pdb.atoms(): atomtype = int(a.line[57:59]) w = 0.0 if a.name in skipped: continue e = a.name[0] if e == "H": continue hnum = get_hydrogens(a.resname, a.name) f = saxs_factors[e][1] + hnum * saxs_factors["H"][ 1] #dummy solvent form factors factors.append(f) return np.array(factors, dtype="float32")
def get_weights(pdb, ignore_weightless): weights = [] pdb = rmsdlib.read_pdb(pdb) for a in pdb.atoms(): atomtype = int(a.line[57:59]) w = 0.0 if atomtype in (32,99) and ignore_weightless: continue elif atomtype < 32: w = aweights[atomtype] elif atomtype in (32,99): pass else: if a.name in skipped: continue e = a.name[0] if e == "H": continue w = element_masses[e] weights.append(w) return np.array(weights,dtype="float32")
import argparse a = argparse.ArgumentParser(prog="rmsd-matrix-pdb.py") a.add_argument("pdblist") a.add_argument("--np", type=int) a.add_argument("--allatoms", action="store_true") a.add_argument("--ca", action="store_true") args = a.parse_args() #structures pdbfiles = [ l.strip().strip("\n") for l in open(args.pdblist) if len(l.strip().strip("\n")) ] #read atoms pdbs = [rmsdlib.read_pdb(f) for f in pdbfiles] for pdb in pdbs: rmsdlib.check_pdbs((pdb, ), pdbs) coors = np.array([list(pdb.coordinates()) for pdb in pdbs]) #select backbone if args.allatoms: assert not args.ca else: if args.ca: atomnames = ("CA", ) else: atomnames = ("CA", "C", "O", "N") amask = np.array([a.name in atomnames for a in pdbs[0].atoms()]) coors = coors[:, amask]
raise ImportError("Parsing CNS .tbl files requires the Lark (lark-parser) library") from lark import Lark import os currdir = os.path.dirname(__file__) parser = Lark( open(os.path.abspath(currdir) + "/tbl_grammar.ebnf").read(), parser="earley", start="assign_statements" ) from rmsdlib import read_pdb pdbs = [] for p in args.pdbs: pdbs.append(read_pdb(p)) resmaps = [] for m in args.mappings: resmap = {} for l in open(m): ll = l.split() if not len(ll) == 2: continue resmap[ll[0]] = int(ll[1]) resmaps.append(resmap) import numpy pdblen = [len(list(p.atoms())) for p in pdbs] pdbcumlen = [0] for plen in pdblen:
if arg.startswith("--"): raise Exception("Unknown option '%s'" % arg) if len(sys.argv) < 4 or len(sys.argv) % 2: raise Exception( "Please supply an even number of PDB files (unbound, bound)") unboundfiles = [] boundfiles = [] for n in range(2, len(sys.argv), 2): unboundfiles.append(sys.argv[n]) boundfiles.append(sys.argv[n + 1]) if len(boundfiles) == 1 and opt_allresidues == False: raise Exception("Cannot determine the interface for a single PDB") bounds = [rmsdlib.read_pdb(f) for f in boundfiles] unbounds = [rmsdlib.read_pdb(f) for f in unboundfiles] struc_header, structures = read_struc(sys.argv[1]) pivots = [] for hnr, h in enumerate(struc_header): if not h.startswith("#pivot"): continue hh = h.split() assert len(hh) == 5 and hh[1] == str(hnr + 1), h pivot = numpy.array([float(v) for v in hh[2:5]]) pivots.append(pivot) initargs = [sys.argv[1]] + unboundfiles if modefile: initargs += ["--modes", modefile] if imodefile: initargs += ["--imodes", imodefile] for nr, ensfile in ensfiles:
cf = open(clustfile, "w") for cnr, c in enumerate(clust): print >> cf, "Cluster %d ->" % (cnr + 1), for cc in c: print >> cf, cc, print >> cf, "" rootclusters = read_clustfile(clustfile) superclust = [] subclust = [] maxstruc = 30000 coor = rmsdlib.read_pdb(pdbfiles[0]).coordinates() coor = numpy.array(coor) natom = len(coor) lim = cutoff * cutoff * natom coor = coor.flatten() coorsize = len(coor) clust_struc = numpy.zeros(dtype=float, shape=(maxstruc, coorsize)) for rootclustnr, rootclust in enumerate(rootclusters): print >> sys.stderr, rootclustnr + 1 if len(rootclust) == 1: subclust.append(rootclust) superclust.append([len(subclust)]) continue leafclust = [] for cnr, c in enumerate(rootclust):
#fills a PDB B-factor (temperature) column from per-residue numerical data #input: PDB file, numerical data #column 1-6 of the numerical data is the full resid (chainID + 5 resid columns; chainID _ is interpreted as " ") #the rest of the line is interpreted as a numerical value (float), V # B-factor = V / max(V) * 100 # i.e. the largest numerical value (max(V)) gets a B-factor of 100 # missing numerical data is interpreted as zero import sys, rmsdlib pdb = rmsdlib.read_pdb(sys.argv[1]) resids = [res[0].resid for res in pdb.residues()] values = {} for l in open(sys.argv[2]): resid = l[:6].replace("_", " ") assert resid in resids, resid v = float(l[6:]) values[resid] = v maxv = max(values.values()) for res in pdb.residues(): resid = res[0].resid v = values.get(resid, 0.0) b = v / maxv * 100.0 for atom in res: l = atom.line ll = l[:60] + "%6.2f" % b + l[66:] sys.stdout.write(ll)