def build_bfactor(self, ligands, protein, protein_pdb): """Builds b-factor descriptors for series of ligands. Parameters ---------- ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule A list or iterable of ligands to build the descriptor or a single molecule. protein: oddt.toolkit.Molecule or None (default=None) Default protein to use as reference protein_pdb: the pdb id of the protein. """ if protein: self.protein = protein if is_molecule(ligands): ligands = [ligands] out = [] for mol in ligands: mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types, self.mode) if self.aligned_pairs: pairs = zip(self.ligand_types, self.protein_types) else: pairs = [(mol_type, prot_type) for mol_type in self.ligand_types for prot_type in self.protein_types] dist = distance(self.protein.atom_dict['coords'], mol.atom_dict['coords']) within_cutoff = (dist <= self.cutoff.max()).any(axis=1) local_protein_dict = self.protein.atom_dict[within_cutoff] prot_dict = atoms_by_type(local_protein_dict, self.protein_types, self.mode) desc = [] for mol_type, prot_type in pairs: d = distance(prot_dict[prot_type]['coords'], mol_dict[mol_type]['coords'])[..., np.newaxis] if len(self.cutoff) > 1: count = ((d > self.cutoff[..., 0]) & (d <= self.cutoff[..., 1])).sum(axis=(0, 1)) else: count = (d <= self.cutoff).sum() desc.append(count) desc = np.array(desc, dtype=int).flatten() out.append(desc) parser = PDBParser() structure = parser.get_structure('pdb', protein_pdb) atoms = structure.get_atoms() # Get the b_factors for each atom the structure for a in atoms: out = [np.append(out[0], np.array(a.get_bfactor()))] return np.vstack(out)
def build(self, ligands, protein=None): """Builds descriptors for series of ligands Parameters ---------- ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule A list or iterable of ligands to build the descriptor or a single molecule. protein: oddt.toolkit.Molecule or None (default=None) Default protein to use as reference """ if protein: self.protein = protein if is_molecule(ligands): ligands = [ligands] out = [] for mol in ligands: if self.protein is None: out.append(self.func(mol)) else: out.append(self.func(mol, protein=self.protein)) if self.sparse: # out = list(map(partial(sparse_to_csr_matrix, size=self.shape), out)) return sparse_vstack(map( partial(sparse_to_csr_matrix, size=self.shape), out), format='csr') else: return np.vstack(out)
def similarity(self, method, query, cutoff=0.9, protein=None): """Similarity filter. Supported structural methods: * ift: interaction fingerprints * sift: simple interaction fingerprints * usr: Ultrafast Shape recognition * usr_cat: Ultrafast Shape recognition, Credo Atom Types * electroshape: Electroshape, an USR method including partial charges Parameters ---------- method: string Similarity method used to compare molecules. Avaiale methods: * `ifp` - interaction fingerprint (requires a receptor) * `sifp` - simple interaction fingerprint (requires a receptor) * `usr` - Ultrafast Shape Reckognition * `usr_cat` - USR, with CREDO atom types * `electroshape` - Electroshape, USR with moments representing partial charge query: oddt.toolkit.Molecule or list of oddt.toolkit.Molecule Query molecules to compare the pipeline to. cutoff: float Similarity cutoff for filtering molecules. Any similarity lower than it will be filtered out. protein: oddt.toolkit.Molecule (default = None) Protein for underling method. By default it's empty, but sturctural fingerprints need one. """ if is_molecule(query): query = [query] # choose fp/usr and appropriate distance if method.lower() == 'ifp': gen = partial(InteractionFingerprint, protein=protein) dist = dice elif method.lower() == 'sifp': gen = partial(SimpleInteractionFingerprint, protein=protein) dist = dice elif method.lower() == 'usr': gen = usr dist = usr_similarity elif method.lower() == 'usr_cat': gen = usr_cat dist = usr_similarity elif method.lower() == 'electroshape': gen = electroshape dist = usr_similarity else: raise ValueError('Similarity filter "%s" is not supported.' % method) # generate FPs for query molecules once query_fps = [gen(q) for q in query] self._pipe.append(partial(_filter_similarity, distance=dist, generator=gen, # same generator for pipe mols query_fps=query_fps, cutoff=cutoff))
def build(self, mols): if is_molecule(mols): mols = [mols] out = [] for mol in mols: fp = self._get_fingerprint(mol) out.append(fp) return np.vstack(out)
def build_num_aromat_rings(self, ligands, protein, ligand_sdf): """Builds number of aromatic rings descriptors for series of ligands. Parameters ---------- ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule A list or iterable of ligands to build the descriptor or a single molecule. protein: oddt.toolkit.Molecule or None (default=None) Default protein to use as reference ligand_sdf: the path to the sdf-file of the ligand. """ if protein: self.protein = protein if is_molecule(ligands): ligands = [ligands] out = [] for mol in ligands: mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types, self.mode) if self.aligned_pairs: pairs = zip(self.ligand_types, self.protein_types) else: pairs = [(mol_type, prot_type) for mol_type in self.ligand_types for prot_type in self.protein_types] dist = distance(self.protein.atom_dict['coords'], mol.atom_dict['coords']) within_cutoff = (dist <= self.cutoff.max()).any(axis=1) local_protein_dict = self.protein.atom_dict[within_cutoff] prot_dict = atoms_by_type(local_protein_dict, self.protein_types, self.mode) desc = [] for mol_type, prot_type in pairs: d = distance(prot_dict[prot_type]['coords'], mol_dict[mol_type]['coords'])[..., np.newaxis] if len(self.cutoff) > 1: count = ((d > self.cutoff[..., 0]) & (d <= self.cutoff[..., 1])).sum(axis=(0, 1)) else: count = (d <= self.cutoff).sum() desc.append(count) desc = np.array(desc, dtype=int).flatten() out.append(desc) for mol in pybel.readfile("sdf", ligand_sdf): # could be sdf or mol2 result = [ "Aromatic" for r in mol.OBMol.GetSSSR() if r.IsAromatic() ] out = [np.append(out[0], np.array(len(result)))] output = np.vstack(out) return output
def build(self, ligands, protein=None): """Builds descriptors for series of ligands Parameters ---------- ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule A list or iterable of ligands to build the descriptor or a single molecule. protein: oddt.toolkit.Molecule or None (default=None) Default protein to use as reference """ if protein: self.protein = protein if is_molecule(ligands): ligands = [ligands] out = [] for mol in ligands: mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types, self.mode) if self.aligned_pairs: pairs = zip(self.ligand_types, self.protein_types) else: pairs = [(mol_type, prot_type) for mol_type in self.ligand_types for prot_type in self.protein_types] dist = distance(self.protein.atom_dict['coords'], mol.atom_dict['coords']) within_cutoff = (dist <= self.cutoff.max()).any(axis=1) local_protein_dict = self.protein.atom_dict[within_cutoff] prot_dict = atoms_by_type(local_protein_dict, self.protein_types, self.mode) desc = [] for mol_type, prot_type in pairs: d = distance(prot_dict[prot_type]['coords'], mol_dict[mol_type]['coords'])[..., np.newaxis] if len(self.cutoff) > 1: count = ((d > self.cutoff[..., 0]) & (d <= self.cutoff[..., 1])).sum(axis=(0, 1)) else: count = (d <= self.cutoff).sum() desc.append(count) desc = np.array(desc, dtype=int).flatten() out.append(desc) return np.vstack(out)
def score(self, ligands, protein=None): """Automated scoring procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to score protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if not self.protein_file: raise IOError("No receptor.") if is_molecule(ligands): ligands = [ligands] ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): check_molecule(ligand, force_coords=True) ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n) try: scores = parse_vina_scoring_output( subprocess.check_output([ self.executable, '--score_only', '--receptor', self.protein_file, '--ligand', ligand_file ] + self.params, stderr=subprocess.STDOUT)) except subprocess.CalledProcessError as e: sys.stderr.write(e.output.decode('ascii')) if self.skip_bad_mols: continue else: raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd)) ligand.data.update(scores) output_array.append(ligand) rmtree(ligand_dir) return output_array
def score(self, ligands, protein=None): """Automated scoring procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to score protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if not self.protein_file: raise IOError("No receptor.") if is_molecule(ligands): ligands = [ligands] ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): check_molecule(ligand, force_coords=True) ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n) try: scores = parse_vina_scoring_output( subprocess.check_output([self.executable, '--score_only', '--receptor', self.protein_file, '--ligand', ligand_file] + self.params, stderr=subprocess.STDOUT)) except subprocess.CalledProcessError as e: sys.stderr.write(e.output.decode('ascii')) if self.skip_bad_mols: continue else: raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd)) ligand.data.update(scores) output_array.append(ligand) rmtree(ligand_dir) return output_array
def build(self, ligands, protein=None): if protein: self.set_protein(protein) else: protein = self.protein if is_molecule(ligands): ligands = [ligands] desc = None for mol in ligands: # Vina # TODO: Asynchronous output from vina, push command to score and retrieve at the end? # TODO: Check if ligand has vina scores scored_mol = self.vina.score(mol)[0].data vec = np.array(([scored_mol[key] for key in self.vina_scores]), dtype=np.float32).flatten() if desc is None: desc = vec else: desc = np.vstack((desc, vec)) return np.atleast_2d(desc)
def build(self, ligands, protein=None): if protein: self.set_protein(protein) else: protein = self.protein if is_molecule(ligands): ligands = [ligands] desc = None for mol in ligands: mol_keys = mol.data.keys() if any(x not in mol_keys for x in self.vina_scores): self.vina.set_ligand(mol) inter = self.vina.score_inter() intra = self.vina.score_intra() num_rotors = self.vina.num_rotors # could use self.vina.score(), but better to reuse variables affinity = ((inter * self.vina.weights[:5]).sum() / (1 + self.vina.weights[5] * num_rotors)) assert len(self.all_vina_scores) == len(inter) + len(intra) + 2 score = dict( zip( self.all_vina_scores, np.hstack( (affinity, inter, intra, num_rotors)).flatten())) mol.data.update(score) else: score = mol.data.to_dict() try: vec = np.array([score[s] for s in self.vina_scores], dtype=np.float32).flatten() except Exception as e: print(score, affinity, inter, intra, num_rotors) print(mol.title) raise e if desc is None: desc = vec else: desc = np.vstack((desc, vec)) return np.atleast_2d(desc)
def dock(self, ligands, protein=None): """Automated docking procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to dock protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if not self.protein_file: raise IOError("No receptor.") if is_molecule(ligands): ligands = [ligands] ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): check_molecule(ligand, force_coords=True) ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n) ligand_outfile = ligand_file[:-6] + '_out.pdbqt' try: scores = parse_vina_docking_output( subprocess.check_output([ self.executable, '--receptor', self.protein_file, '--ligand', ligand_file, '--out', ligand_outfile ] + self.params + ['--cpu', str(self.n_cpu)], stderr=subprocess.STDOUT)) except subprocess.CalledProcessError as e: sys.stderr.write(e.output.decode('ascii')) if self.skip_bad_mols: continue # TODO: print some warning message else: raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd)) # docked conformations may have wrong connectivity - use source ligand if is_openbabel_molecule(ligand): if oddt.toolkits.ob.__version__ >= '2.4.0': # find the order of PDBQT atoms assigned by OpenBabel with open(ligand_file) as f: write_order = [ int(line[7:12].strip()) for line in f if line[:4] == 'ATOM' ] new_order = sorted(range(len(write_order)), key=write_order.__getitem__) new_order = [i + 1 for i in new_order] # OBMol has 1 based idx assert len(new_order) == len(ligand.atoms) else: # Openbabel 2.3.2 does not support perserving atom order. # We read back the PDBQT ligand to get "correct" bonding. ligand = next(oddt.toolkit.readfile('pdbqt', ligand_file)) if 'REMARK' in ligand.data: del ligand.data['REMARK'] docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile) for docked_ligand, score in zip(docked_ligands, scores): # Renumber atoms to match the input ligand if (is_openbabel_molecule(docked_ligand) and oddt.toolkits.ob.__version__ >= '2.4.0'): docked_ligand.OBMol.RenumberAtoms(new_order) # HACK: copy docked coordinates onto source ligand # We assume that the order of atoms match between ligands clone = ligand.clone clone.clone_coords(docked_ligand) clone.data.update(score) # Calculate RMSD to the input pose try: clone.data['vina_rmsd_input'] = rmsd(ligand, clone) clone.data['vina_rmsd_input_min'] = rmsd( ligand, clone, method='min_symmetry') except Exception: pass output_array.append(clone) rmtree(ligand_dir) return output_array
def build_stand_alone(self, ligands, lig_sdf, protein, prot_pdb, nmbr_modes, schroedinger_path): """Combines qikprop properties, #aromatic rings, #rotatable bons, #eigenvectors and #eigenvalues as descriptors. Used in ET-Score. Parameters ---------- ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule A list or iterable of ligands to build the descriptor or a single molecule. protein: oddt.toolkit.Molecule or None (default=None) Default protein to use as reference protein_pdb: the pdb id of the protein. ligand_sdf: the path to the sdf-file of the ligand. """ if protein: self.protein = protein if is_molecule(ligands): ligands = [ligands] out = [] for mol in ligands: mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types, self.mode) if self.aligned_pairs: pairs = zip(self.ligand_types, self.protein_types) else: pairs = [(mol_type, prot_type) for mol_type in self.ligand_types for prot_type in self.protein_types] dist = distance(self.protein.atom_dict['coords'], mol.atom_dict['coords']) within_cutoff = (dist <= self.cutoff.max()).any(axis=1) local_protein_dict = self.protein.atom_dict[within_cutoff] prot_dict = atoms_by_type(local_protein_dict, self.protein_types, self.mode) desc = [] for mol_type, prot_type in pairs: d = distance(prot_dict[prot_type]['coords'], mol_dict[mol_type]['coords'])[..., np.newaxis] if len(self.cutoff) > 1: count = ((d > self.cutoff[..., 0]) & (d <= self.cutoff[..., 1])).sum(axis=(0, 1)) else: count = (d <= self.cutoff).sum() desc.append(count) desc = np.array(desc, dtype=int).flatten() out.append(desc) if schroedinger_path[-1] != "/": schroedinger_path += "/" subprocess.call(schroedinger_path + "qikprop -NOJOBID %s" % lig_sdf, shell=True) if ("/" in lig_sdf): pattern = re.compile(r'/([^/]+)\.sdf') lig_name = re.search(pattern, lig_sdf).group(1) print(lig_name) else: lig_name = lig_sdf[:-4] # remove .sdf qikprops = {} with open("%s.CSV" % lig_name) as csvfile: reader = csv.DictReader(csvfile) for row in reader: qikprops["FOSA"] = row["FOSA"] qikprops["FISA"] = row["FISA"] qikprops["WPSA"] = row["WPSA"] qikprops["QPlogPo/w"] = row["QPlogPo/w"] qikprops["QPlogHERG"] = row["QPlogHERG"] qikprops["QPlogKhsa"] = row["QPlogKhsa"] qikprops["QPPMDCK"] = row["QPPMDCK"] qikprops["QPlogKp"] = row["QPlogKp"] # Add QikProp properties as descriptors fail = 0 for prop in qikprops: qikprops[prop] if qikprops[prop] == '': # QikProp has failed if fail == 0: with open("./qikFail_eigv.txt", "a+") as results: results.write("%s \n" % lig_name) fail = 1 out = [np.append(out[0], np.array(0))] else: out = [np.append(out[0], np.array(float(qikprops[prop])))] # Add Number of rotatable bonds for mol in pybel.readfile("sdf", lig_sdf): # can be sdf or mol2 out = [np.append(out[0], np.array(mol.OBMol.NumRotors()))] # Add number of aromatic rings for mol in pybel.readfile("sdf", lig_sdf): # could be sdf or mol2 result = [ "Aromatic" for r in mol.OBMol.GetSSSR() if r.IsAromatic() ] out = [np.append(out[0], np.array(len(result)))] print("Protein:") print(prot_pdb) pdb = parsePDB(prot_pdb) calphas = pdb.select('calpha') anm = ANM('pdb ANM analysis') anm.buildHessian(calphas, cutoff=12.0) anm.getHessian().round(3) anm.calcModes(n_modes=nmbr_modes) # Add NMA Length out = [np.append(out[0], np.array(len(anm.getEigvals())))] out = [np.append(out[0], np.array(len(anm.getEigvecs())))] return np.vstack(out)
def build_eigval_qik(self, ligands, protein, protein_pdb, ligand_sdf): """Combines nma_eigenvalues and qikprop-properties descriptors for a series of ligands. Parameters ---------- ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule A list or iterable of ligands to build the descriptor or a single molecule. protein: oddt.toolkit.Molecule or None (default=None) Default protein to use as reference protein_pdb: the pdb id of the protein. ligand_sdf: the path to the sdf-file of the ligand. """ if protein: self.protein = protein if is_molecule(ligands): ligands = [ligands] out = [] for mol in ligands: mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types, self.mode) if self.aligned_pairs: pairs = zip(self.ligand_types, self.protein_types) else: pairs = [(mol_type, prot_type) for mol_type in self.ligand_types for prot_type in self.protein_types] dist = distance(self.protein.atom_dict['coords'], mol.atom_dict['coords']) within_cutoff = (dist <= self.cutoff.max()).any(axis=1) local_protein_dict = self.protein.atom_dict[within_cutoff] prot_dict = atoms_by_type(local_protein_dict, self.protein_types, self.mode) desc = [] for mol_type, prot_type in pairs: d = distance(prot_dict[prot_type]['coords'], mol_dict[mol_type]['coords'])[..., np.newaxis] if len(self.cutoff) > 1: count = ((d > self.cutoff[..., 0]) & (d <= self.cutoff[..., 1])).sum(axis=(0, 1)) else: count = (d <= self.cutoff).sum() desc.append(count) desc = np.array(desc, dtype=int).flatten() out.append(desc) lig_id = ligand_sdf[-15:-4] subprocess.call("/opt/schrodinger2017-4/qikprop -NOJOBID %s" % ligand_sdf, shell=True) qikprops = {} with open("%s.CSV" % lig_id) as csvfile: reader = csv.DictReader(csvfile) for row in reader: qikprops["FOSA"] = row["FOSA"] qikprops["FISA"] = row["FISA"] qikprops["WPSA"] = row["WPSA"] qikprops["QPlogPo/w"] = row["QPlogPo/w"] qikprops["QPlogHERG"] = row["QPlogHERG"] qikprops["QPlogKhsa"] = row["QPlogKhsa"] qikprops["QPPMDCK"] = row["QPPMDCK"] qikprops["QPlogKp"] = row["QPlogKp"] lig_name = lig_id[:-7] subprocess.call("rm " + lig_name + "*", shell=True) # Add QikProp properties as descriptors fail = 0 for prop in qikprops: qikprops[prop] if qikprops[prop] == '': # QikProp has failed if fail == 0: with open("./qikFail_eigv.txt", "a+") as results: results.write("%s \n" % lig_name) fail = 1 out = [np.append(out[0], np.array(0))] else: out = [np.append(out[0], np.array(float(qikprops[prop])))] # Add NMA Eigenvalues pdb = parsePDB(protein_pdb) calphas = pdb.select('calpha') anm = ANM('pdb ANM analysis') anm.buildHessian(calphas, cutoff=12.0) anm.getHessian().round(3) anm.calcModes() for mode in anm: desc = np.array(mode.getEigval(), dtype=int).flatten() out = [np.append(out[0], np.array(mode.getEigval()))] return np.vstack(out)
def build_nmaLength(self, ligands, protein, protein_pdb, nmbr_modes): """Builds descriptors with number of eigenvalues and eigenvectors for series of ligands and proteins. Parameters ---------- ligands: iterable of oddt.toolkit.Molecules or oddt.toolkit.Molecule A list or iterable of ligands to build the descriptor or a single molecule. protein: oddt.toolkit.Molecule or None (default=None) Default protein to use as reference protein_pdb: the pdb id of the protein. nmbr_modes: the number of normal modes that will be calculated. """ if protein: self.protein = protein if is_molecule(ligands): ligands = [ligands] out = [] for mol in ligands: mol_dict = atoms_by_type(mol.atom_dict, self.ligand_types, self.mode) if self.aligned_pairs: pairs = zip(self.ligand_types, self.protein_types) else: pairs = [(mol_type, prot_type) for mol_type in self.ligand_types for prot_type in self.protein_types] dist = distance(self.protein.atom_dict['coords'], mol.atom_dict['coords']) within_cutoff = (dist <= self.cutoff.max()).any(axis=1) local_protein_dict = self.protein.atom_dict[within_cutoff] prot_dict = atoms_by_type(local_protein_dict, self.protein_types, self.mode) desc = [] for mol_type, prot_type in pairs: d = distance(prot_dict[prot_type]['coords'], mol_dict[mol_type]['coords'])[..., np.newaxis] if len(self.cutoff) > 1: count = ((d > self.cutoff[..., 0]) & (d <= self.cutoff[..., 1])).sum(axis=(0, 1)) else: count = (d <= self.cutoff).sum() desc.append(count) desc = np.array(desc, dtype=int).flatten() out.append(desc) # New normal modes descriptors # print(protein_pdb) pdb = parsePDB(protein_pdb) calphas = pdb.select('calpha') anm = ANM('pdb ANM analysis') anm.buildHessian(calphas, cutoff=12.0) anm.getHessian().round(3) anm.calcModes(n_modes=nmbr_modes) out = [np.append(out[0], np.array(len(anm.getEigvals())))] out = [np.append(out[0], np.array(len(anm.getEigvecs())))] output = np.vstack(out) return output
def dock(self, ligands, protein=None): """Automated docking procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to dock protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if not self.protein_file: raise IOError("No receptor.") if is_molecule(ligands): ligands = [ligands] ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): check_molecule(ligand, force_coords=True) ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n) ligand_outfile = ligand_file[:-6] + '_out.pdbqt' try: scores = parse_vina_docking_output( subprocess.check_output([self.executable, '--receptor', self.protein_file, '--ligand', ligand_file, '--out', ligand_outfile] + self.params + ['--cpu', str(self.n_cpu)], stderr=subprocess.STDOUT)) except subprocess.CalledProcessError as e: sys.stderr.write(e.output.decode('ascii')) if self.skip_bad_mols: continue # TODO: print some warning message else: raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd)) # docked conformations may have wrong connectivity - use source ligand if is_openbabel_molecule(ligand): if oddt.toolkits.ob.__version__ >= '2.4.0': # find the order of PDBQT atoms assigned by OpenBabel with open(ligand_file) as f: write_order = [int(line[7:12].strip()) for line in f if line[:4] == 'ATOM'] new_order = sorted(range(len(write_order)), key=write_order.__getitem__) new_order = [i + 1 for i in new_order] # OBMol has 1 based idx assert len(new_order) == len(ligand.atoms) else: # Openbabel 2.3.2 does not support perserving atom order. # We read back the PDBQT ligand to get "correct" bonding. ligand = next(oddt.toolkit.readfile('pdbqt', ligand_file)) if 'REMARK' in ligand.data: del ligand.data['REMARK'] docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile) for docked_ligand, score in zip(docked_ligands, scores): # Renumber atoms to match the input ligand if (is_openbabel_molecule(docked_ligand) and oddt.toolkits.ob.__version__ >= '2.4.0'): docked_ligand.OBMol.RenumberAtoms(new_order) # HACK: copy docked coordinates onto source ligand # We assume that the order of atoms match between ligands clone = ligand.clone clone.clone_coords(docked_ligand) clone.data.update(score) # Calculate RMSD to the input pose clone.data['vina_rmsd_input'] = rmsd(ligand, clone) clone.data['vina_rmsd_input_min'] = rmsd(ligand, clone, method='min_symmetry') output_array.append(clone) rmtree(ligand_dir) return output_array