class Pair: def __init__(self, receptordb, liganddb): self.ligand = Ligand(liganddb) self.receptor = Receptor(receptordb) def foreachPair(self, pid, sink): self.ligand.foreachId(lambda cid: sink(pid, cid)) def foreach(self, sink, criteria=''): self.receptor.foreachId(lambda pid: self.foreachPair(pid, sink)) def size(self, rcriteria='1=1', lcriteria='1=1'): ligands = self.ligand.size(lcriteria) receptors = self.receptor.size(rcriteria) return ligands * receptors def close(self): self.ligand.close() self.receptor.close()
def main(): # Parse command-line args. args = parse_args() # Make the libraries used by the Ligand class shut up. Ligand(None).quiet() # Set random seed. numpy.random.seed(args.seed) # Perform the GA search. gnn = init_gnn(args) # Perform the GA search. ga_best = ga_search(args, gnn)
def run(): ligand = None protein = request.form['protein'] data = dict(request.form) if request.files: file_obj = request.files for f in file_obj: ligand = request.files.get(f) word = Ligand.save(ligand, app.instance_path) return render_template("results.html", ligand=word, protein=protein)
def init_gnn(args): # Setup gnn fitness oracle. protein = load_protein_file(args.protein) ligand = Ligand("CCC").get_dict() target = numpy.array([0.0, 1.0], dtype=numpy.float32) target = { 'n_node': 0, 'n_edge': 0, 'nodes': None, 'edges': None, 'senders': None, 'receivers': None, 'globals': target } ops = build_gnn(args, (protein, ligand, target)) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = False sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, args.model) return (sess, ops)
def postLigand(ligand): word = Ligand.save(ligand, app.instance_path) return word
def pdf(): ligand = {"EGFR": 1, "EGFE": 2} ml = {"Support Vector Machines": [0.5, 0.5], "KNN": [0.4, 0.6]} pdf = Ligand.makePDF(ligand, ml) return app.send_static_file(pdf)
def Loadpdb(pdb=None, hetatm= True, verbose=False): try: assert(pdb != None) #Check if filehandle to PDB file is passed except AssertionError: sys.exit("**No filehandle passed**. Pass a filehandle (to a pdb file) as an argument to Loadpdb.") AtomNumber=0 #Keeps track of atom indices (assigned in the order atoms listed in input file) mol_data={} #Key: molid; Value: Molecule_Type Object; Keep track of different molecules (different chains or molecule type) in input structure first_res =True #To identify molecule type of every molecule in input structure and accordingly define Molecule object. Prev_res=0 # to keep track of residue change Prev_chain='aa' # to keep track of chain change in HETATM section atmTohet = True #To determine transition from ATOM to HETATM record frame_tag = '' # To keep track of multi-frame entry (multiple entry for same molecule type with same chain id) '''Load the PDB structure file''' for line in pdb: if line[0:4]=="ATOM" and line[12:16].upper().strip() not in ["OXT"]: AtomNumber+=1 AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(line) atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ) #Check for unrecognized residue and new molecule if not first_res: if ResName.lower() not in Mol_types['protein'] + Mol_types['lipid'] + Mol_types['ligand']: print "*** Unrecognized residue name: "+ ResName+ " ***.\nAdded %s as Ligand." % ResName sys.exit("In file configstruc.py: Add missing residue name("+ ResName+ ") to appropriate molecule in Mol_types") if ResNo != Prev_res or (Prev_chain != Chain and mol.molecule_type().lower()=='ligand'): if mol.molecule_type().lower()=='ligand': mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary first_res = True elif Prev_chain != Chain or ResName.lower() not in Mol_types[mol.molecule_type().lower()]: #Either Chain is different or New residue doesn't belong to current molecule type mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary first_res = True elif frame_tag.lower() in ['endmdl', 'ter', 'end'] and Prev_chain == Chain: #Different molecule (of same molecule type) with same chain id; as in trajectory frames mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary first_res = True if first_res: #Initialize mol for new chain or molecule if ResName.lower() in Mol_types['protein']: mol = Protein() elif ResName.lower() in Mol_types['lipid']: mol = Lipid() elif ResName.lower() in Mol_types['ligand']: mol = Ligand() else: print "*** Unrecognized residue name: "+ ResName+ " ***.\n Cannot initialize Molecule object." sys.exit("In file configstruc.py: Add missing residue name("+ ResName+ ") to appropriate molecule in Mol_types") first_res = False frame_tag = '' mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac) mol.atmidx.append(AtomNumber) Prev_res= ResNo Prev_chain=Chain elif line[0:6]=="HETATM" and hetatm == True: if atmTohet: mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary first_res = True atmTohet = False AtomNumber+=1 AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(line) atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ) #Check for new ligand molecule if not first_res and (ResNo != Prev_res or Prev_chain != Chain): mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary first_res = True #Initialize mol for new chain or molecule if first_res: if ResName.lower() in Mol_types['ligand']: mol = Ligand() else: print "*** Unrecognized residue name: "+ ResName+ " ***.\n Cannot initialize Ligand object." sys.exit("In file configstruc.py: Add missing residue name ("+ ResName+ ") to ligand molecule in Mol_types") first_res = False mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac) mol.atmidx.append(AtomNumber) Prev_res= ResNo Prev_chain=Chain elif line[0:3].lower() in ["ter", "end"] or line[0:6].lower() == "endmdl": frame_tag = line[0:3] #append the last mol object to mol_data mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary if verbose: print "Number of molecules in input file: ", len(mol_data), "\n" #Update mol_data[molid].nor, mol_data[molid].resids, and check for chain breaks in non-ligand molecules for key in sorted(mol_data): if verbose: print "Molid:", key,"Molecule_Type:",mol_data[key].molecule_type() mol_data[key].resids = sorted(mol_data[key].residue) mol_data[key].nor = len(mol_data[key].resids) if mol_data[key].molecule_type().lower() != 'ligand': #Check for chain breaks in non-ligand molecule resids_diff=numpy.array(mol_data[key].resids[1:]) - numpy.array(mol_data[key].resids[:-1]) if mol_data[key].nor != (numpy.sum(resids_diff)+1): break_indices = numpy.where(resids_diff > 1) print "Chain break encountered in molecule",key, "at residue positions: " for res in break_indices[0]: print mol_data[key].resids[res], print "\n" mol_data[key].chain_break = True return mol_data
import os from parameter import Parameter from ligand import Ligand from receptor import Receptor from path import Path def print_pairs(pid, ligand): ligand.foreachId(lambda cid: print(pid, cid, sep='\t')) p = Parameter() pt = Path(p) ligand = Ligand(pt.data + p._('project.wizard.ligand.db')) receptor = Receptor(pt.data + p._('project.wizard.receptor.db')) receptor.foreachId(lambda id: print_pairs(id, ligand)) ligand.close() receptor.close()
import os from pdbqtdaemon import PDBQTDaemon from parameter import Parameter from ligand import Ligand from path import Path process_name = 'prepare_ligand' p = Parameter() pt = Path(p) user = p._('user') payload = p.i('daemon.sbatch.payload') db = Ligand(pt.liganddb) class PrepareLigand(PDBQTDaemon): def __init__(self): PDBQTDaemon.__init__(self, user, db, pt, payload, process_name) def molecule_done(self, id): return os.path.isfile(pt.ligandpdbqt(id)) def prepare(self, id, templ): os.chdir(self.path.docking_ligand) log_name = '{}/{}_{}.out'.format(self.path.log, self.proc_name, id) templ = self.template.format(log_name, self.path.ligand(id), id) target = '{}_{}_{}.sbatch'.format(self.path.project_name, self.proc_name, id) open(target, 'w').write(templ)
from parameter import Parameter from ligand import Ligand from receptor import Receptor def line(id, ligand): ids = [str(id)] ligand.foreachId(lambda id: ids.append(str(id))) return '\t'.join(ids) p = Parameter() ligand = Ligand(p._('project.ligand.db')) receptor = Receptor(p._('project.receptor.db')) receptor.foreachId(lambda id: print(line(id, ligand))) ligand.close() receptor.close()
def Loadpdb(pdb=None, hetatm=True, verbose=False): try: assert (pdb != None) #Check if filehandle to PDB file is passed except AssertionError: sys.exit( "**No filehandle passed**. Pass a filehandle (to a pdb file) as an argument to Loadpdb. " ) AtomNumber = 0 #Keeps track of atom indices (assigned in the order atoms listed i input file) mol_data = { } #Key: molid; Keep track of different molecules (different chains or molecule type) in input structure check_het = False #To keep track of new Hetero residue first_res = True #To identify molecule type of every molecule in input structure and accordingly define Molecule object. Prev_res = 0 # to keep track of residue change in HETATM section; a new Molecule object is assigned for every residue. Prev_chain = 'a' # to keep track of chain change in HETATM section '''Load the PDB structure file''' for line in pdb: if line[0:4] == "ATOM" and line[12:16].upper().strip() not in ["OXT"]: AtomNumber += 1 AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec( line) atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ) if first_res: #Initialize mol for new chain or molecule if ResName.lower() in Mol_types['protein']: mol = Protein() elif ResName.lower() in Mol_types['ligand']: mol = Ligand() else: print "*** Unrecognized residue name: " + ResName + " ***.\n Cannot initialize Molecule object." sys.exit( "In file configstruc.py: Add missing residue name(" + ResName + ") to appropriate molecule in Mol_types") first_res = False mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac) mol.atmidx.append(AtomNumber) elif line[0:3] == "TER": mol_data[Molecule.molid] = deepcopy( mol) #copy mol object into dictionary first_res = True # mol object will be initialized to molecule type of next molecule elif line[0:6] == "HETATM" and hetatm == True: AtomNumber += 1 AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec( line) atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ) #Check for new molecule if ( ResNo != Prev_res or Prev_chain != Chain ) and check_het == True: #For first HETATM check_het is always False mol_data[Molecule.molid] = deepcopy( mol) #copy mol object into dictionary first_res = True #Initialize mol for new chain or molecule if first_res: if ResName.lower() in Mol_types['ligand']: mol = Ligand() else: print "*** Unrecognized residue name: " + ResName + " ***.\n Cannot initialize Ligand object." sys.exit( "In file configstruc.py: Add missing residue name (" + ResName + ") to ligand molecule in Mol_types") first_res = False mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac) mol.atmidx.append(AtomNumber) if Prev_res == 0: check_het = True Prev_res = ResNo Prev_chain = Chain if hetatm: #If HETATM record was added; append the last hetero residue object to mol_data mol_data[Molecule.molid] = deepcopy( mol) #copy mol object into dictionary if verbose: print "Number of molecules in input file: ", len(mol_data), "\n" #Update mol_data[molid].nor, mol_data[molid].resids, and check for chain breaks in non-ligand molecules for key in sorted(mol_data): if verbose: print "Molid:", key, "Molecule_Type:", mol_data[key].molecule_type( ) if mol_data[key].molecule_type().lower() != 'ligand': mol_data[key].resids = sorted(mol_data[key].residue) mol_data[key].nor = len(mol_data[key].resids) #Check for chain breaks in protein resids_diff = numpy.array(mol_data[key].resids[1:]) - numpy.array( mol_data[key].resids[:-1]) if mol_data[key].nor != (numpy.sum(resids_diff) + 1): break_indices = numpy.where(resids_diff > 1) print "Chain break encountered in molecule", key, "at residue positions: " for res in break_indices[0]: print mol_data[key].resids[res], print "\n" return mol_data
import os from parameter import Parameter from ligand import Ligand p = Parameter() dir = p._('project.dir') + '/' project_dir = dir + p._('project.wizard.create.name') + '/' data_dir = project_dir + 'data/' molecule = Ligand(data_dir + p._('project.wizard.ligand.db')) columns = p._('project.wizard.ligand_report.columns') molecule.foreachRecord(columns, lambda id, fields: print(id, fields, sep='\t')) molecule.close()
import os from parameter import Parameter from ligand import Ligand p = Parameter() #locate tree dir = p._('project.dir') name = p._('project.wizard.create.name') project_dir = dir + '/' + name data_dir = project_dir + '/data' filename = '{}/stage/{}'.format(project_dir, p._('project.wizard.load.ligand')) #import data molecule = Ligand(data_dir + '/' + p._('project.wizard.ligand.db')) molecule.clear() for id in open(filename): id = id.strip('\n') molecule.add(id) molecule.commit() # download ligand structures def save(id, s, format, dir): filename = '{}/{}.{}'.format(dir, id, format) with open(filename, 'w') as file: file.write(s) print(id)
def __init__(self, receptordb, liganddb): self.ligand = Ligand(liganddb) self.receptor = Receptor(receptordb)
def getSampleScores(protein, active): dict = Ligand.getSample(protein, active) return make_response(jsonify(dict))
def ga_search(args, gnn): # Create initial population. protein = load_protein_file(args.protein) population = [Ligand(smiles) for smiles in args.initpop.split(',')] population += [ population[0].clone() for i in range(args.popsz - len(population)) ] for ind in population[1:]: ind.mutate(args.mu) # Generation loop. print("Start genetic algorithm search (%d):" % args.gens) best = population[0] best_fit = WORST_ERROR images = [] for gen in range(args.gens): # Stdout print. print(" Generation %d:" % gen) print(" PopSize: %d" % len(population)) # Compute fitness. tstart = time.time() fits = eval_inds(population, gnn, protein) tfend = time.time() viable = sum(fit != WORST_ERROR for fit in fits) # Viability selection. if viable is 0: print(" Population went extinct!") break print(" nViable: %d" % viable) indexes = list(range(len(fits))) indexes.sort(key=fits.__getitem__) fits = list(map(fits.__getitem__, indexes)) population = list(map(population.__getitem__, indexes)) population = population[0:int(args.popsz * args.sigma)] elite, efits = ([], []) for ndx, ind in enumerate(population): if ind.smiles not in elite: elite.append(ind.smiles) efits.append(fits[ndx]) if len(elite) == 3: break population = [ ind for i, ind in enumerate(population) if fits[i] != WORST_ERROR ] fits = fits[0:len(population)] # Some initial stats. if fits[0] < best_fit: best_fit = fits[0] best = population[0] if args.img: images.append("%s/best_%d.png" % (args.out, gen)) best.write_image(images[-1]) write_gif(images, "%s/best.gif" % (args.out)) print(" Fitness: %f" % (sum(fits) / len(fits))) print(" BestFit: %f" % (best_fit)) print(" Best: %s" % (best.smiles)) print(" Elite:") for ndx, ind in enumerate(elite): print(" (%.4f) %s" % (efits[ndx], ind)) # Reproduction. opop = len(population) population.append(population[0].clone()) while len(population) < args.popsz: # Crossover. indi = population[numpy.random.randint(0, opop)] indj = population[numpy.random.randint(0, opop)] inds = indi.mate(indj) # Mutation. for ind in inds: ind.mutate(args.mu) population.append(ind) # End of generation timing and print. tend = time.time() print(" Time: %.2f s (fit %.3f, ga %.3f)" % (tend - tstart, tfend - tstart, tend - tfend)) # Return best found. return best
from path import Path from ligand import Ligand p = Parameter() pt = Path(p) #locate tree dir = p._('project.dir') name = p._('project.wizard.create.name') project_dir = dir + '/' + name data_dir = project_dir + '/data' filename = '{}/stage/{}'.format(project_dir, p._('project.wizard.load.ligand')) #import data molecule = Ligand(pt.liganddb) molecule.clear() for id in open(filename): id = id.strip('\n') molecule.add(id) molecule.commit() # download ligand structures def instage(id, pt): return pt.filename('{}.pdb'.format(id), pt.stage, '*.pdb') != None def copy(id, dir, pt, pids):