def loadadsite(self, gbestf, cutoff=5.0): """ gbestf is gbest.txt file path read it and filter out all residues within cut-off """ gbests = re.sub("gbest.txt", "gbest_sorted.txt", gbestf) try: L = np.loadtxt(gbests, delimiter=",") logger.debug("Loaded gbest: {}".format(gbests)) except: logger.error("Can NOT read the numbers in {}".format(gbestf)) L = [] R = [] if len(L) > 0 and len(L[0]) > 1: #The residues height was stored in second column min_h = L[:,1].min() * 1.0 n_cutoff = min_h + cutoff #filter out with-in cut-off R = L[L[:,1] < n_cutoff][:,0] logger.debug("filter cutoff={}: {}".format(n_cutoff, str(R))) return R
def face2surface(self, resi): """ rotate the protein let the residue number resi face to the surface. input argument resi should be a list which contains at least one element. """ #ASSUMED the list of residue (resi) is valid self.cmd.center("protein") proteinc = np.array(self.cmd.get_position()) xi = [1, 0, 0] yi = [0, 1, 0] #TODO: check resi in range #TODO: should project the vector to x-z and y-z plane self.cmd.center("protein and resi {}".format("+".join( str(x) for x in resi))) residuec = np.array(self.cmd.get_position()) rvt = residuec - proteinc cosx = np.dot(rvt, xi) / (np.linalg.norm(rvt) * np.linalg.norm(xi)) cosy = np.dot(rvt, yi) / (np.linalg.norm(rvt) * np.linalg.norm(yi)) degx = np.degrees(np.arccos(cosx)) degy = np.degrees(np.arccos(cosy)) logger.info("Rotate the residue group to face to surface.") logger.info("degx: {}, degy: {}".format(degx, degy)) self.protate("y", -90 - degx) self.protate("x", -90 - degy) self.cmd.create("protein_bak", "protein") logger.debug( "backup the initial protein position after face to surface") #fpath = os.path.join(self.args.outdir, "bak_init.pdb") #self.cmd.save(fpath, "protein surface") #logger.debug("backup the initial orientation pdb file: {}".format(fpath)) #Compute the radius #cosine = adjacent over hypotenuse hypoten = 0 for i in resi: self.cmd.center("protein and resi {}".format(str(i))) ric = np.array(self.cmd.get_position()) length = np.linalg.norm(ric - proteinc) if length > hypoten: hypoten = length adjacent = np.linalg.norm(rvt) cosTheta = adjacent / (hypoten + 5 ) #hard code! 5 is the adsorption site cut-off #It should even works if only one residue in the list Theta = np.degrees(np.arccos(cosTheta)) logger.debug("Adjacent({}) over hypoten({}) +5".format( adjacent, hypoten)) logger.debug("Computed cosTheta: {}, and Theta: {}".format( cosTheta, Theta)) return Theta
def ptransl(self, axis, step): if self.pcheck(axis, step, "ptransl"): return -1 self.cmd.center("protein") center = self.cmd.get_position() logger.debug( "[ptransl] protein position BEFORE translation: {}".format(center)) delta = [0.0, 0.0, 0.0] delta[self.xyzid[axis]] = step logger.debug("[ptransl] translate protein delta: {}".format(delta)) self.cmd.translate(delta, "protein", state=0, camera=0) self.cmd.center("protein") center = self.cmd.get_position() logger.debug( "[ptransl] protein position AFTER translation: {}".format(center))
def scoring(self): logger.info("Now we call gromacs to calculate the score.") for con in self.jsdb["confs"]: f = con["id"] + ".pdb" logger.debug("processing file: {}".format(f)) fn = os.path.join(self.jsdb["confd"], f) logger.debug("scoringone the file: {}".format(fn)) r = self.scoringone(fn) try: con["stepN"] = r["stepN"] con["coul"] = r["coul"] con["ljsr"] = r["ljsr"] con["file"] = r["file"] logger.debug("updated the configuration db, {}:{}".format( con["coul"], con["ljsr"])) except: logger.warning("scoringone return incorrect data!!!") self.savejdb()
def initcmd(self): logger.info("Initialized command line arguments") self.parser = argparse.ArgumentParser( description="This is PSO testing") self.parser.add_argument( "--outdir", help="set the output configuration directory.", required=False, default="pso_conf") self.parser.add_argument("--jsdbf", help="set the json db file path.", default="db.json") self.parser.add_argument("--proteinf", help="set the input protein pdb filename.", default="protein.pdb", metavar="protein.pdb") self.parser.add_argument("--surfacef", help="set the input surface pdb filename.", default="surface.pdb", metavar="surface.pdb") self.parser.add_argument("--n", help="set the number of birds.", default=200, required=False, type=int) self.parser.add_argument("--r", help="set the total iteration number.", default=10, required=False, type=int) self.parser.add_argument("--w", help="set the weight for updating velocity.", default=0.721, required=False, type=float) self.parser.add_argument("--c1", help="set the parameter C1.", default=1.193, required=False, type=float) self.parser.add_argument("--c2", help="set the parameter C2.", default=1.193, required=False, type=float) self.parser.add_argument("--maxitr", help="set the maximum number of iteration.", default=0, required=False, type=int) self.parser.add_argument( "--emdir", help="set the directory for EM, energy minimization using Gromacs.", default="EM", required=False, type=str) self.parser.add_argument( "--keep-pdb", help="keep all of the pdf files generated during the searching.", default=False, action="store_true", required=False) self.initpg = self.parser.add_mutually_exclusive_group() self.initpg.add_argument( "--resi", help="searching only the prefered residue surface", type=int, nargs="+") self.initpg.add_argument("--offset", help="set the initial searching orientation", nargs=6, type=float) self.initpg.add_argument( "--init", help="use the exactly same molecule conformation as input", action="store_true", default=False) self.args = self.parser.parse_args() logger.debug("args: {}".format(self.args))
def initvar(self): super(simplePSO, self).initvar() logger.debug("initialized PSO variables: {}".format(self.__dict__)) self.tid = 1
def plot(self): '''output the db.json to csv''' if self.args.no_plot: logger.info("skipped the plot progress...") return 0 try: a = self.jsdb["confs"] steps = self.jsdb["steps"] mindst = self.jsdb["mindst"] except KeyError: logger.error( "Can not find the confs/steps from the json db file. It might be corrupted." ) return 1 count = 0 x = [] y = [] z = [] xt = [] yt = [] zt = [] ang = 360.0 / steps lx = [] ly = [] ld = [] minres = "UNKNOW" mineng = 7777777 mincnt = 0 maxeng = -7777777 maxres = "UNKNOW" minxra = 0 minyra = 0 logger.debug("preparing X Y Z") for i in a: try: enegy = float(i["coul"]) + float(i["ljsr"]) resnm = i["id"] xi = int(i["xi"]) yi = int(i["yi"]) except (ValueError, KeyError): enegy = 7777777 resnm = "UNKNOW" xi = 0 yi = 0 if enegy < mineng: mineng = enegy minres = resnm mincnt = count minxra = xi minyra = yi logger.debug("Found a lower energy residue: {}@{}".format( minres, mineng)) if enegy > maxeng: maxeng = enegy logger.debug("Found a higher energy residue: {}@{}".format( maxres, maxeng)) xti = float(xi * ang - self.args.init_xr) yti = float(yi * ang - self.args.init_yr) xt.append(xti) yt.append(yti) zt.append(enegy) lx.append(count) ly.append(enegy) ld.append(resnm) count += 1 if (count % steps) == 0: x.append(xt) xt = [] y.append(yt) yt = [] z.append(zt) zt = [] x = np.array(x) y = np.array(y) z = np.array(z) logger.debug("get X, Y and Z, then plot the graph.") figa = plt.figure() cmap = plt.cm.get_cmap("RdBu_r") if self.args.minlv != None and self.args.maxlv != None: a = self.args.minlv b = self.args.maxlv i = ( b - a ) / self.ctlvs #ASSUMED: the maxlv is always bigger than minlv levels = range(a, b, i) logger.debug("The contour map levels: {}".format(levels)) else: levels = None logger.debug("Use the default contour map levels.") plt.plot(112, -65, 'ko') #the PSO ans #plot the minimum location plt.plot(float(minxra * ang - self.args.init_xr), float(minyra * ang - self.args.init_yr), 'kx') Da = plt.contourf(x, y, z, cmap=cmap, levels=levels, norm=mpl.colors.SymLogNorm(011)) plt.title(u"contour diagram\ndistance={}Å".format(mindst)) plt.xlabel("X rotation angle") plt.ylabel("Y rotation angle") cbar = plt.colorbar(Da) cbar.ax.set_ylabel("energy level") plt.savefig("diagram_0a.pdf") logger.debug("plot contour diagram and save as pdf file.") figb = plt.figure() plt.title(u"energy line\ndistance={}Å".format(mindst)) plt.xlabel("Iteration Number") plt.ylabel("Energy Value") Db = plt.plot(lx, ly, 'k') plt.plot(mincnt, mineng, 'bo') plt.plot([mincnt, mincnt * 1.1], [mineng, mineng], 'k') plt.text(mincnt * 1.13, mineng, "id: {}\nen: {}".format(minres, mineng), verticalalignment="center", horizontalalignment="left") logger.debug("plot energy line only.") if self.args.minlv != None: plt.ylim(ymin=self.args.minlv) logger.debug("set the y-axis minimum range.") if self.args.maxlv != None: plt.ylim(ymax=self.args.maxlv) logger.debug("set the y-axis maximum range.") plt.savefig("diagram_0k.pdf") ly = np.array(ly) ld = np.array(ld) L = np.column_stack((ly, ld)) np.savetxt("energy.txt.gz", L, delimiter=" ", fmt="%11s %11s") logger.debug("plot energy line diagram and save as pdf file.") fige = plt.figure() plt.title(u"normalized energy line\ndistance={}Å".format(mindst)) plt.ylabel("Energy Value") plt.xlabel("residues") plt.axis("off") plt.grid("on") plt.xticks([]) plt.yticks([]) sly = np.sort(ly) sry = sly[::-1] nly = (sry - mineng) / (maxeng - mineng) De = plt.plot(range(len(nly)), nly) plt.text(0, 0, "id: {}@{}".format(minres, mineng)) logger.debug("plot energy histogram.") plt.savefig("diagram_0e.pdf") logger.debug("plot energy histogram diagram and saved as pdf file.") figh = plt.figure() plt.title(u"normalized energy line\ndistance={}Å".format(mindst)) Dh = plt.hist(ly, 100) plt.xlabel("The lowest configuration is id: {}@eng: {}".format( minres, mineng)) plt.savefig("diagram_0h.pdf") logger.debug("plot another histogram diagram and saved as pdf file.") if self.args.no_plot_re: logger.info("skipped the residues plot progress...") return 0 logger.debug( "plot residues configuration graph, go through all configurations..." ) self.cmd.load("system00003.pse") for con in a: cid = con["id"] try: nid = re.findall('\d+', cid)[0] nid = int(nid) except: logger.warning("function plot - can not find digit from cid.") nid = 0 logger.debug("find the digit: {} from cid.".format(nid)) cen = float(con["coul"]) + float(con["ljsr"]) logger.debug("processing conf: {}, the energy value is: {}".format( cid, cen)) logger.debug("Now create the system state for processing...") self.cmd.create("system", "mov", nid + 1, 1) atoms = self.cmd.get_model("system") resds = atoms.get_residues() xr = [] # x axis: residues id yr = [] # y axis: mini distance mindt = [999999.99] * len(resds) surfc = self.surfaceh() szv = surfc["stop"] for atom in atoms.atom: rid = int(atom.resi) - 1 azv = atom.coord[2] dst = azv - szv if dst < mindt[rid]: mindt[rid] = dst #save the graph under jsdb["confd"] directory figs = plt.figure() Ds = plt.plot(range(1, len(mindt)+1), mindt, 'r+', \ range(1, len(mindt)+1), mindt, 'k') plt.title( u"residues configuraions diagram\ndistance={}Å; energy={}kj". format(self.jsdb["mindst"], cen)) plt.savefig(os.path.join(self.jsdb["confd"], cid + ".pdf")) logger.debug("plot a residues diagram and saved as pdf.")
def protation(self): s = self.args.steps step = 360.0 / s sum = 1 for i in range(s): for j in range(s): #save state self.cmd.create("mov", "protein", self.cmd.get_state(), sum) logger.debug( "[Current state ({})]Saved new state (count: {}), after rotate x" .format(self.cmd.get_state(), i * s + j + 1)) logger.debug( "[protation] for loop i: {}, j: {}, ia: {}, ja: {}".format( i, j, i * step, j * step)) logger.debug("{:=^70}".format("next round")) self.protate("x", step) sum += 1 logger.debug("{:=^70}".format("Y AXIS")) self.protate("y", step) logger.debug("{:=^70}".format("N TURN")) self.cmd.ending() logger.debug("[protation] Goes to the last state ({}).".format( self.cmd.get_state())) self.cmd.create("final", "protein", self.cmd.get_state()) logger.debug("Saved the final structure.") self.savefile(pdb=True) logger.info( "[protation] Saved file (count: {}), rotation sampling is done". format(self.svcount))
def protate(self, axis, step): if self.pcheck(axis, step, "protate"): return -1 za = self.proteinz() logger.debug("[protate] The lowest Z B4 rotate is : {}".format(za)) self.cmd.center("protein") origin = self.cmd.get_position() logger.debug("[protate] define the origin: {}".format(origin)) self.cmd.rotate(axis, step, "protein", camera=0, origin=origin) logger.debug("[protate] Rotate {} axis {} units ".format(axis, step)) logger.debug("rotate {}, {}, protein, camera=0, origin={}".format( axis, step, origin)) zb = self.proteinz() logger.debug("[protate] The lowest Z AF rotate is : {}".format(zb)) delta = za - zb logger.debug("[protate] distA: {} - distB: {} = delta: {}".format( za, zb, delta)) self.cmd.translate([0, 0, delta], "protein", state=0, camera=0) logger.debug( "[protate] translated the protein {}(delta) units.".format(delta)) logger.debug("[protate] now the Z value is : {}".format( self.proteinz()))
def mindst(self, state=-1, z=True, surfc=None): '''Find the minimal distance bewteen the surface and the protein if the mov model not yet exist, please give state=0 to choose the protein model z=False may not work in current version''' pdown = float("+inf") adown = [0.0, 0.0, 0.0] rdown = "" #resi of atom ndown = "" #name of atom if surfc: surf = surfc else: surf = self.surfaceh() stop = surf["stop"] atop = surf["atop"] rtop = surf["rtop"] ntop = surf["ntop"] #loop all atoms to find the top of surface and the bottom of protein try: int(state) except ValueError: logger.warning("given state is not integer!!") return -1 #TODO: extract surface height to another function try: atoms = self.cmd.get_model("surface") except: logger.warning("Can not find model: surface") return 0 for atom in atoms.atom: if atom.coord[2] > stop: stop = atom.coord[2] atop = atom.coord[:3] rtop = str(atom.resi) ntop = str(atom.name) logger.debug("(state: {})sTOP is : {}.".format(self.cmd.get_state(), stop)) model = "" get_state = self.cmd.get_state() if state == 0: model = "protein" get_state = 0 elif state != -1: model = "mov" get_state = state else: model = "mov" try: logger.debug("Trying to get model: {} at state({}).".format( model, get_state)) atoms = self.cmd.get_model(model, get_state) except: logger.warning("Can not find model({}): in state({})".format( model, get_state)) return 0 for atom in atoms.atom: if atom.coord[2] < pdown: pdown = atom.coord[2] adown = atom.coord[:3] rdown = str(atom.resi) ndown = str(atom.name) logger.debug("(state: {})pdown is : {}.".format( self.cmd.get_state(), pdown)) logger.debug("Calculate the destince bewteen two atoms.") deltasq = [(i - j)**2 for i, j in zip(atop, adown)] distanc = math.sqrt(sum(deltasq)) distanz = pdown - stop logger.debug("The distance is : {}".format(distanc)) logger.debug("Z distance is : {}".format(distanz)) r = distanc if z == True: r = distanz return r
def load_post(self, skip=False): #ASSUMED: it is called after load_file if not skip: self.cmd.center("surface") surfacec = self.cmd.get_position() logger.debug("Center of the surface: " + ", ".join(format(x, "f") for x in surfacec)) self.cmd.center("protein") proteinc = self.cmd.get_position() logger.debug("Center of the protein: " + ", ".join(format(x, "f") for x in proteinc)) delta = [i - j for i, j in zip(proteinc, surfacec)] logger.debug("move delta: " + ", ".join(format(x, "f") for x in delta)) self.cmd.translate(delta, "surface", camera=0) logger.debug("Translate Z axis of protein-surface.") miniZ = self.mindst(state=0, z=True) self.cmd.translate( [0, 0, miniZ - 0], "surface", camera=0) #let the distance to be zero, translate later logger.debug("The minimal Z distance now is : {}".format( self.mindst(state=0, z=True))) #logger.info("The initial structure has been created ") self.cmd.create("protein_bak", "protein") logger.debug("backup the original initial protein position")
def load_file(self): logger.debug("loading " + self.args.proteinf) self.cmd.load(self.args.proteinf, "protein") logger.debug("loading " + self.args.surfacef) self.cmd.load(self.args.surfacef, "surface") logger.info("loaded protein and surface pdb files.")
def cluster(self): """ cluster by dbscae it will return a list with the corresponding cluster id we convert it into a dict index by cluster id """ self.get_gbest() X = StandardScaler().fit_transform(self.Hnums) db = DBSCAN(eps=self.args.eps, min_samples=self.args.minn).fit(X) self.dbresult = np.array(db.labels_) logger.debug("DBSCAN labels: {}".format(",".join(map(str,self.dbresult)))) """ self.clusters = [ [] for x in range(labels.max()+2) ] for idx, val in enumerate(labels): if idx < len(self.labels) and val < len(self.clusters): self.clusters[val].append(self.labels[idx]) self.ranking() if not self.args.no_plot and len(self.clusters) > 0: idx = 0 while idx < len(self.rankings) and idx < len(self.rankinge): rankids = self.rankings[idx] rankide = self.rankinge[idx] loweids = self.low_energy[rankids] loweide = self.low_energy[rankide] s_clusters = self.clusters[rankids] e_clusters = self.clusters[rankide] #self.hello("DEBUG the lowest energy by size is {}".format(s_clusters[loweids])) #self.hello("DEBUG the lowest energy by energy is {}".format(e_clusters[loweide])) #self.plotcluster(idx+1, s_clusters, "bysize", s_clusters[loweids]) self.plotcluster("{:02d}".format(idx+1), e_clusters, "byenergy", e_clusters[loweide]) idx+=1 self.hello("Plot the noise group") self.plotcluster("noise", self.clusters[-1], "noise") """ for idx, cid in enumerate(self.dbresult): #Here, idx is *gbest_id* if cid not in self.clusters: self.clusters[cid] = {} self.clusters[cid]["coden"] = "{:02d}".format(cid+1) self.clusters[cid]["gbest"] = [] self.clusters[cid]["englt"] = [] self.clusters[cid]["adsit"] = [] self.clusters[cid]["adslt"] = [] if cid == -1: self.clusters[cid]["coden"] = "noise" gbestf = self.labels[idx] energy = self.loadenergy(gbestf) adsita = self.loadadsite(gbestf) adsitb = self.clusters[cid]["adsit"] logger.debug("site a:{}".format(adsita)) logger.debug("site b:{}".format(adsitb)) if len(adsitb) > 0: adsitc = np.intersect1d(adsita, adsitb) logger.debug("site A and B intersection.") else: adsitc = adsita logger.debug("site c:{}".format(adsitc)) self.clusters[cid]["adslt"] += list(adsita) self.clusters[cid]["gbest"].append(idx) self.clusters[cid]["englt"].append(energy) self.clusters[cid]["adsit"] = adsitc #ASSUMED: lambda should correct self.cs_eid = sorted(self.clusters, key=lambda x: np.mean(self.clusters[x]["englt"]), reverse=False) if -1 in self.cs_eid: self.cs_eid.remove(-1) self.cs_eid.append(-1) logger.debug("cluster sorted by energy: {}".format(str(self.cs_eid))) for i, c in enumerate(self.cs_eid): if c != -1: self.clusters[c]["coden"] = "{:02d}".format(i+1) if not self.args.no_plot and len(self.clusters) > 0: cidlt = self.clusters.keys() for c in cidlt: list_of_gbesti = self.clusters[c]["gbest"] list_of_gbestf = [ self.labels[x] for x in list_of_gbesti ] logger.debug("gbesti: {}".format(str(list_of_gbesti))) logger.debug("gbestf: {}".format(str(list_of_gbestf))) filename = "{}".format(self.clusters[c]["coden"]) fpath = self.plotcluster(filename, list_of_gbestf, bold="") self.clusters[c]["fpath"] = fpath logger.debug("clusters: \n{}".format(self.pp.pformat(self.clusters))) """ for idx, cst in enumerate(self.clusters): self.hello("DEBUG cluster id {:02d} average energy {} lowest energy {} size {}".format(idx, self.avg_energy[idx], self.lowest_eng[idx], len(cst))) self.hello("{}".format(cst)) self.clusterbar() self.hello("{:=^70}".format("FINALLY")) self.hello("{:=^70}".format("by size")) for idx, cst in enumerate(self.rankings): cstlst = self.clusters[cst] loweng = self.low_energy[cst] #self.hello("The {:02d}th cluster (size={}): {}".format(idx+1, len(self.clusters[cst]), self.clusters[cst])) self.hello("The {:02d}th cluster (size={}; average energy={}; median energy={}; lowest energy={}; energy std={}): \n{}".format(\ idx+1, len(cstlst), self.avg_energy[cst], self.median_eng[cst], self.lowest_eng[cst], self.std_energy[cst], cstlst[loweng])) self.hello("{:=^70}".format("by energy")) for idx, cst in enumerate(self.rankinge): cstlst = self.clusters[cst] loweng = self.low_energy[cst] #self.hello("The {:02d}th cluster (energy={}): {}".format(idx+1, self.avg_energy[cst], self.clusters[cst])) self.hello("The {:02d}th cluster (size={}; energy={}): {}".format(idx+1, len(cstlst), self.avg_energy[cst], cstlst[loweng])) """ self.printoutcluster()
def hello(self, msg="はじまるよ~♪ "): logger.debug("[hello] {}".format(msg)) logger.debug("hello function is deprecated.")
self.hello("{}: {}".format(i, act.__doc__)) self.hello("="*77) def main(self): logger.debug("command line arguments:") logger.debug(str(self.args)) act = getattr(self, self.args.act, self.helping) if callable(act): logger.debug("Run the action ({})".format(str(act))) act() if __name__ == "__main__": m=dbcluster() logger.debug("Hello! This is main function of dbcluster") m.hello() #m.fplot() #m.fneo() #m.fthree() #m.ffour() #m.ffive() m.main()