def test_nitrogen_charges(self): """ TestPDB: Verify that nitrogen groups are charged correctly. """ # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-) # The labeling should pick up 2 charged nitrogen groups for two # ammoniums. ammonium_sulfate_pdb = PDB() ammonium_sulfate_pdb_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdb") ammonium_sulfate_pdbqt_path = os.path.join( data_dir(), "ammonium_sulfate_hyd.pdbqt") ammonium_sulfate_pdb.load_from_files(ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path) nitrogen_charges = ammonium_sulfate_pdb.identify_nitrogen_charges() assert len(nitrogen_charges) == 2 assert nitrogen_charges[0].positive # Should be positive assert nitrogen_charges[1].positive # Should be positive # Test pyrrolidine (CH2)4NH. The nitrogen here should be sp3 # hybridized, so is likely to pick up an extra proton to its nitrogen # at physiological pH. pyrrolidine_pdb = PDB() pyrrolidine_pdb_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdb") pyrrolidine_pdbqt_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdbqt") pyrrolidine_pdb.load_from_files(pyrrolidine_pdb_path, pyrrolidine_pdbqt_path) nitrogen_charges = pyrrolidine_pdb.identify_nitrogen_charges() assert len(nitrogen_charges) == 1 assert nitrogen_charges[0].positive # Should be positive
def featurize_dude(dude_dir, target, pickle_dir, num_jobs): """Featurize DUD-E docked poses and write features to pickle_out. Parameters ---------- dude_dir: string Path to DUD-E directory target: string Name of DUD-E target. pickle_dir: string Path to directory to output pickles """ target_dir = os.path.join(dude_dir, target) actives_dir = os.path.join(target_dir, "actives") decoys_dir = os.path.join(target_dir, "decoys") actives = [a for a in os.listdir(actives_dir)] decoys = [a for a in os.listdir(decoys_dir)] receptor = os.path.join(target_dir, "receptor.pdb") pickle_out = os.path.join(target_dir, "out.pkl.gz") # Just for debugging purposes actives = actives[:1] num_per_job = int(math.ceil(len(actives) / float(num_jobs))) print "Number per job: %d" % num_per_job protein_pdb_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdb" protein_pdbqt_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdbqt" print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) binana = Binana() feature_len = binana.num_features() feature_vectors = {} for compound in actives: compound_name = compound.split(".")[0] compound_pdbqt = compound_name + "_hyd_out.pdbqt" compound_pdbqt = os.path.join(actives_dir, compound_pdbqt) # Convert the pdbqt to pdb pdbqt_to_pdb(compound_pdbqt, actives_dir) compound_pdb = compound_name + "_hyd_out.pdb" compound_pdb = os.path.join(actives_dir, compound_pdb) structures = MultiStructure() structures.load_from_files(compound_pdb, compound_pdbqt) vectors = [] for key in sorted(structures.molecules.keys()): structure = structures.molecules[key] print "type(structure)" print type(structure) vectors.append( binana.compute_input_vector(structure, protein_pdb_obj)) feature_vectors[compound_name] = vectors with gzip.open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f) decoys = decoys[:1]
def test_metallic_charges(self): """ TestPDB: Verify that non-protein charges are assigned properly. """ # Test metallic ion charge. magnesium_pdb = PDB() magnesium_atom = Atom(element="MG", coordinates=Point(coords=np.array([0,0,0]))) magnesium_pdb.add_new_non_protein_atom(magnesium_atom) metallic_charges = magnesium_pdb.identify_metallic_charges() assert len(metallic_charges) == 1
def test_metallic_charges(self): """ TestPDB: Verify that non-protein charges are assigned properly. """ # Test metallic ion charge. magnesium_pdb = PDB() magnesium_atom = Atom(element="MG", coordinates=Point(coords=np.array([0, 0, 0]))) magnesium_pdb.add_new_non_protein_atom(magnesium_atom) metallic_charges = magnesium_pdb.identify_metallic_charges() assert len(metallic_charges) == 1
def test_assign_ligand_aromatics(self): """ TestPDB: Test that non-protein aromatic rings are assigned correctly. """ ### 3ao4 comes from PDBBind-CN and contains some cruft in the PDB file: ### atoms without residues labelled. This triggered some problems with ### non-protein aromatics complaining. # TODO(rbharath): Add a stub here. _3ao4_protein = PDB() _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb") _3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt") _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt)
def test_phosphorus_charges(self): """ TestPDB: Verify that Phosphorus groups are charged correctly. """ # CID82671 contains a phosphate between two aromatic groups. phosphate_pdb = PDB() phosphate_pdb_path = os.path.join(data_dir(), "82671_hyd.pdb") phosphate_pdbqt_path = os.path.join(data_dir(), "82671_hyd.pdb") phosphate_pdb.load_from_files(phosphate_pdb_path, phosphate_pdbqt_path) phosphorus_charges = phosphate_pdb.identify_phosphorus_charges() assert len(phosphorus_charges) == 1 assert not phosphorus_charges[ 0].positive # Should be negatively charged.
def test_nitrogen_charges(self): """ TestPDB: Verify that nitrogen groups are charged correctly. """ # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-) # The labeling should pick up 2 charged nitrogen groups for two # ammoniums. ammonium_sulfate_pdb = PDB() ammonium_sulfate_pdb_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdb") ammonium_sulfate_pdbqt_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdbqt") ammonium_sulfate_pdb.load_from_files( ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path) nitrogen_charges = ammonium_sulfate_pdb.identify_nitrogen_charges() assert len(nitrogen_charges) == 2 assert nitrogen_charges[0].positive # Should be positive assert nitrogen_charges[1].positive # Should be positive # Test pyrrolidine (CH2)4NH. The nitrogen here should be sp3 # hybridized, so is likely to pick up an extra proton to its nitrogen # at physiological pH. pyrrolidine_pdb = PDB() pyrrolidine_pdb_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdb") pyrrolidine_pdbqt_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdbqt") pyrrolidine_pdb.load_from_files(pyrrolidine_pdb_path, pyrrolidine_pdbqt_path) nitrogen_charges = pyrrolidine_pdb.identify_nitrogen_charges() assert len(nitrogen_charges) == 1 assert nitrogen_charges[0].positive # Should be positive
def test_load_bonds_from_pdb(self): """ TestPDB: Verifies that bonds can be loaded from PDB. """ pdb = PDB() # Test that we can load CO2 carbon_atom = Atom(element="C") oxygen_atom_1 = Atom(element="O") oxygen_atom_2 = Atom(element="O") pdb.add_new_atom(carbon_atom) pdb.add_new_atom(oxygen_atom_1) pdb.add_new_atom(oxygen_atom_2) lines = [ "CONECT 1 2 3 " "CONECT 2 " "CONECT 3 " ] with tempfile.NamedTemporaryFile() as temp: temp.write("\n".join(lines)) temp.flush() pdb.load_bonds_from_pdb(temp.name) assert len(carbon_atom.indices_of_atoms_connecting) == 2 assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0 assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0
def test_sulfur_charges(self): """ TestPDB: Verify that sulfur groups are charged correctly. """ triflic_acid_pdb = PDB() triflic_acid_pdb_path = os.path.join(data_dir(), "triflic_acid_hyd.pdb") triflic_acid_pdbqt_path = os.path.join(data_dir(), "triflic_acid_hyd.pdbqt") triflic_acid_pdb.load_from_files(triflic_acid_pdb_path, triflic_acid_pdbqt_path) sulfur_charges = (triflic_acid_pdb.identify_sulfur_charges()) assert len(sulfur_charges) == 1 assert not sulfur_charges[0].positive # Should be negatively charged.
def test_phosphorus_charges(self): """ TestPDB: Verify that Phosphorus groups are charged correctly. """ # CID82671 contains a phosphate between two aromatic groups. phosphate_pdb = PDB() phosphate_pdb_path = os.path.join(data_dir(), "82671_hyd.pdb") phosphate_pdbqt_path = os.path.join(data_dir(), "82671_hyd.pdb") phosphate_pdb.load_from_files( phosphate_pdb_path, phosphate_pdbqt_path) phosphorus_charges = phosphate_pdb.identify_phosphorus_charges() assert len(phosphorus_charges) == 1 assert not phosphorus_charges[0].positive # Should be negatively charged.
def test_ligand_assign_aromatics(self): """ TestPDB: Verify that aromatic rings in ligands are identified. """ benzene_pdb = PDB() benzene_pdb_path = os.path.join(data_dir(), "benzene_hyd.pdb") benzene_pdbqt_path = os.path.join(data_dir(), "benzene_hyd.pdbqt") benzene_pdb.load_from_files(benzene_pdb_path, benzene_pdbqt_path) # A benzene should have exactly one aromatic ring. print benzene_pdb.aromatic_rings assert len(benzene_pdb.aromatic_rings) == 1 # The first 6 atoms in the benzene pdb form the aromatic ring. assert (set(benzene_pdb.aromatic_rings[0].indices) == set([1,2,3,4,5,6]))
def test_ligand_assign_aromatics(self): """ TestPDB: Verify that aromatic rings in ligands are identified. """ benzene_pdb = PDB() benzene_pdb_path = os.path.join(data_dir(), "benzene_hyd.pdb") benzene_pdbqt_path = os.path.join(data_dir(), "benzene_hyd.pdbqt") benzene_pdb.load_from_files(benzene_pdb_path, benzene_pdbqt_path) # A benzene should have exactly one aromatic ring. print benzene_pdb.aromatic_rings assert len(benzene_pdb.aromatic_rings) == 1 # The first 6 atoms in the benzene pdb form the aromatic ring. assert (set(benzene_pdb.aromatic_rings[0].indices) == set( [1, 2, 3, 4, 5, 6]))
def test_sulfur_charges(self): """ TestPDB: Verify that sulfur groups are charged correctly. """ triflic_acid_pdb = PDB() triflic_acid_pdb_path = os.path.join(data_dir(), "triflic_acid_hyd.pdb") triflic_acid_pdbqt_path = os.path.join(data_dir(), "triflic_acid_hyd.pdbqt") triflic_acid_pdb.load_from_files( triflic_acid_pdb_path, triflic_acid_pdbqt_path) sulfur_charges = ( triflic_acid_pdb.identify_sulfur_charges()) assert len(sulfur_charges) == 1 assert not sulfur_charges[0].positive # Should be negatively charged.
def compute_input_vector_from_files(self, ligand_pdb_filename, receptor_pdb_filename, line_header): """Computes feature vector for ligand-receptor pair. Parameters ---------- ligand_pdb_filename: string path to ligand's pdb file. receptor_pdb_filename: string path to receptor pdb file. line_header: string line separator in PDB files """ # Load receptor and ligand from file. receptor = PDB() receptor.load_from_files(receptor_pdb_filename, line_header) receptor.assign_secondary_structure() ligand = PDB() ligand.load_from_files(ligand_pdb_filename, line_header) self.compute_input_vector(ligand, receptor)
def setUp(self): """ Instantiate a dummy PDB file. """ self.temp_dir = tempfile.mkdtemp() self.pdb = PDB() _, self.pdb_filename = tempfile.mkstemp(suffix=".pdb", dir=self.temp_dir) self.prgr_pdb = PDB() prgr_pdb_path = os.path.join(data_dir(), "prgr_hyd.pdb") prgr_pdbqt_path = os.path.join(data_dir(), "prgr_hyd.pdbqt") self.prgr_pdb.load_from_files(prgr_pdb_path, prgr_pdbqt_path) self._1r5y_protein = PDB() _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb") _1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt") self._1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt) self.proteins = [("prgr", self.prgr_pdb), ("1r5y", self._1r5y_protein)]
def test_assign_non_protein_charges(self): """ TestPDB: Verify that charges are properly added to ligands. """ # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-) # There should be 3 charged groups, two positive, one negative ammonium_sulfate_pdb = PDB() ammonium_sulfate_pdb_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdb") ammonium_sulfate_pdbqt_path = os.path.join( data_dir(), "ammonium_sulfate_hyd.pdbqt") # Notice that load automatically identifies non-protein charges. ammonium_sulfate_pdb.load_from_files(ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path) assert len(ammonium_sulfate_pdb.charges) == 3 num_pos, num_neg = 0, 0 for charge in ammonium_sulfate_pdb.charges: if charge.positive: num_pos += 1 else: num_neg += 1 assert num_pos == 2 assert num_neg == 1
def test_assign_non_protein_charges(self): """ TestPDB: Verify that charges are properly added to ligands. """ # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-) # There should be 3 charged groups, two positive, one negative ammonium_sulfate_pdb = PDB() ammonium_sulfate_pdb_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdb") ammonium_sulfate_pdbqt_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdbqt") # Notice that load automatically identifies non-protein charges. ammonium_sulfate_pdb.load_from_files( ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path) assert len(ammonium_sulfate_pdb.charges) == 3 num_pos, num_neg = 0, 0 for charge in ammonium_sulfate_pdb.charges: if charge.positive: num_pos += 1 else: num_neg += 1 assert num_pos == 2 assert num_neg == 1
def _featurize_complex(self, mol_pdb, protein_pdb): """ Compute Binana fingerprint for complex. """ ### OPEN TEMPDIR tempdir = tempfile.mkdtemp() mol_pdb_file = os.path.join(tempdir, "mol.pdb") with open(mol_pdb_file, "w") as mol_f: mol_f.writelines(mol_pdb) protein_pdb_file = os.path.join(tempdir, "protein.pdb") with open(protein_pdb_file, "w") as protein_f: protein_f.writelines(protein_pdb) mol_hyd_file = os.path.join(tempdir, "mol_hyd.pdb") mol_pdbqt_file = os.path.join(tempdir, "mol_hyd.pdbqt") hydrogenate_and_compute_partial_charges( mol_pdb_file, "pdb", tempdir, mol_hyd_file, mol_pdbqt_file) protein_hyd_file = os.path.join(tempdir, "protein_hyd.pdb") protein_pdbqt_file = os.path.join(tempdir, "protein_hyd.pdbqt") hydrogenate_and_compute_partial_charges( protein_pdb_file, "pdb", tempdir, protein_hyd_file, protein_pdbqt_file) mol_pdb_obj = PDB() mol_pdb_obj.load_from_files(mol_pdb_file, mol_pdbqt_file) protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_file, protein_pdbqt_file) features = self.binana.compute_input_vector(mol_pdb_obj, protein_pdb_obj) ### CLOSE TEMPDIR shutil.rmtree(tempdir) return features
def featurize_pdbbind(pdbbind_dir, pickle_out): """Featurize all entries in pdbbind_dir and write features to pickle_out pdbbind_dir should be a dir, with K subdirs, one for each protein-ligand complex to be featurized. The ligand and receptor should each have a pdb and pdbqt file. The ligand files should end in '_ligand_hyd.${FILETYPE}' while the receptor files should end in '_protein_hyd.${FILETYPE}' pdbbind_dir: string Path to pdbbind directory. pickle_out: string Path to write pickle output. """ assert os.path.isdir(pdbbind_dir) # Instantiate copy of binana vector binana = Binana() feature_vectors = {} # Extract the subdirectories in pdbbind_dir subdirs = [d for d in os.listdir(pdbbind_dir) if os.path.isdir(os.path.join(pdbbind_dir, d))] # TODO(rbharath): ONLY FOR DEBUGGING! subdirs = subdirs[900:] num_atoms = len(Binana.atom_types) # See features/tests/nnscore_test.py:TestBinana.testComputeInputVector # for derivation. feature_len = (3*num_atoms*(num_atoms+1)/2 + num_atoms + 12 + 6 + 3 + 6 + 3 + 6 + 3 + 1) for count, d in enumerate(subdirs): print "\nprocessing %d-th pdb %s" % (count, d) subdir = os.path.join(pdbbind_dir, d) print "About to extract ligand and protein input files" ligand_pdb, ligand_pdbqt = None, None protein_pdb, protein_pdbqt = None, None for f in os.listdir(subdir): if re.search("_ligand_hyd.pdb$", f): ligand_pdb = f elif re.search("_ligand_hyd.pdbqt$", f): ligand_pdbqt = f elif re.search("_protein_hyd.pdb$", f): protein_pdb = f elif re.search("_protein_hyd.pdbqt$", f): protein_pdbqt = f print "Extracted Input Files:" print (ligand_pdb, ligand_pdbqt, protein_pdb, protein_pdbqt) if (not ligand_pdb or not ligand_pdbqt or not protein_pdb or not protein_pdbqt): raise ValueError("Required files not present for %s" % d) ligand_pdb_path = os.path.join(subdir, ligand_pdb) ligand_pdbqt_path = os.path.join(subdir, ligand_pdbqt) protein_pdb_path = os.path.join(subdir, protein_pdb) protein_pdbqt_path = os.path.join(subdir, protein_pdbqt) print "About to load ligand from input files" ligand_pdb_obj = PDB() ligand_pdb_obj.load_from_files(ligand_pdb_path, ligand_pdbqt_path) print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) print "About to generate feature vector." vector = binana.compute_input_vector(ligand_pdb_obj, protein_pdb_obj) feature_vectors[d] = vector if len(vector) != feature_len: raise ValueError("Feature length incorrect on %s" % d) print "Feature vector generated correctly." with open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f)
def setUp(self): """ Instantiate local copy of Binana object. """ self.binana = Binana() ### 3zp9 comes from PDBBind-CN _3zp9_protein = PDB() _3zp9_protein_pdb = os.path.join(data_dir(), "3zp9_protein_hyd.pdb") _3zp9_protein_pdbqt = os.path.join(data_dir(), "3zp9_protein_hyd.pdbqt") _3zp9_protein.load_from_files(_3zp9_protein_pdb, _3zp9_protein_pdbqt) # The ligand is also specified by pdbbind _3zp9_ligand = PDB() _3zp9_ligand_pdb = os.path.join(data_dir(), "3zp9_ligand_hyd.pdb") _3zp9_ligand_pdbqt = os.path.join(data_dir(), "3zp9_ligand_hyd.pdbqt") _3zp9_ligand.load_from_files(_3zp9_ligand_pdb, _3zp9_ligand_pdbqt) ### 3bwf comes from PDBBind-CN _3bwf_protein = PDB() _3bwf_protein_pdb = os.path.join(data_dir(), "3bwf_protein_hyd.pdb") _3bwf_protein_pdbqt = os.path.join(data_dir(), "3bwf_protein_hyd.pdbqt") _3bwf_protein.load_from_files(_3bwf_protein_pdb, _3bwf_protein_pdbqt) # The ligand is also specified by pdbbind _3bwf_ligand = PDB() _3bwf_ligand_pdb = os.path.join(data_dir(), "3bwf_ligand_hyd.pdb") _3bwf_ligand_pdbqt = os.path.join(data_dir(), "3bwf_ligand_hyd.pdbqt") _3bwf_ligand.load_from_files(_3bwf_ligand_pdb, _3bwf_ligand_pdbqt) self.test_cases = [("3bwf", _3bwf_protein, _3bwf_ligand), ("3zp9", _3zp9_protein, _3zp9_ligand)]
def setUp(self): """ Instantiate local copy of Binana object. """ self.binana = Binana() # TODO(rbharath): This load sequence is getting unwieldy. Refactor. #### PRGR is from the DUD-E collection #prgr_receptor = PDB() #prgr_pdb = os.path.join(data_dir(), "prgr_hyd.pdb") #prgr_pdbqt = os.path.join(data_dir(), "prgr_hyd.pdbqt") #prgr_receptor.load_from_files(prgr_pdb, prgr_pdbqt) ## This compound is CHEMBL1164248 #prgr_active = PDB() #prgr_active_pdb = os.path.join(data_dir(), "prgr_active0_hyd.pdb") #prgr_active_pdbqt = os.path.join(data_dir(), "prgr_active0_hyd.pdbqt") #prgr_active.load_from_files(prgr_active_pdb, prgr_active_pdbqt) #### c-Abl is taken from the Autodock Vina examples #cabl_receptor = PDB() #cabl_receptor_pdb = os.path.join(data_dir(), "c-Abl_hyd.pdb") #cabl_receptor_pdbqt = os.path.join(data_dir(), "c-Abl_hyd.pdbqt") #cabl_receptor.load_from_files(cabl_receptor_pdb, # cabl_receptor_pdbqt) ## This compound is imatinib #cabl_active = PDB() #cabl_active_pdb = os.path.join(data_dir(), "imatinib_hyd.pdb") #cabl_active_pdbqt = os.path.join(data_dir(), "imatinib_hyd.pdbqt") #cabl_active.load_from_files(cabl_active_pdb, # cabl_active_pdbqt) #### 1zea comes from PDBBind-CN ## Python complains about variables starting with numbers, so put an ## underscore in front of everything. #_1zea_protein = PDB() #_1zea_protein_pdb = os.path.join(data_dir(), "1zea_protein_hyd.pdb") #_1zea_protein_pdbqt = os.path.join(data_dir(), "1zea_protein_hyd.pdbqt") #_1zea_protein.load_from_files(_1zea_protein_pdb, _1zea_protein_pdbqt) ## The ligand is also specified by pdbbind #_1zea_ligand = PDB() #_1zea_ligand_pdb = os.path.join(data_dir(), "1zea_ligand_hyd.pdb") #_1zea_ligand_pdbqt = os.path.join(data_dir(), "1zea_ligand_hyd.pdbqt") #_1zea_ligand.load_from_files(_1zea_ligand_pdb, _1zea_ligand_pdbqt) #### 1r5y comes from PDBBind-CN #_1r5y_protein = PDB() #_1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb") #_1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt") #_1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt) ## The ligand is also specified by pdbbind #_1r5y_ligand = PDB() #_1r5y_ligand_pdb = os.path.join(data_dir(), "1r5y_ligand_hyd.pdb") #_1r5y_ligand_pdbqt = os.path.join(data_dir(), "1r5y_ligand_hyd.pdbqt") #_1r5y_ligand.load_from_files(_1r5y_ligand_pdb, _1r5y_ligand_pdbqt) #### 3ao4 comes from PDBBind-CN #_3ao4_protein = PDB() #_3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb") #_3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt") #_3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt) ## The ligand is also specified by pdbbind #_3ao4_ligand = PDB() #_3ao4_ligand_pdb = os.path.join(data_dir(), "3ao4_ligand_hyd.pdb") #_3ao4_ligand_pdbqt = os.path.join(data_dir(), "3ao4_ligand_hyd.pdbqt") #_3ao4_ligand.load_from_files(_3ao4_ligand_pdb, _3ao4_ligand_pdbqt) #### 2jdm comes from PDBBind-CN #_2jdm_protein = PDB() #_2jdm_protein_pdb = os.path.join(data_dir(), "2jdm_protein_hyd.pdb") #_2jdm_protein_pdbqt = os.path.join(data_dir(), "2jdm_protein_hyd.pdbqt") #_2jdm_protein.load_from_files(_2jdm_protein_pdb, _2jdm_protein_pdbqt) ## The ligand is also specified by pdbbind #_2jdm_ligand = PDB() #_2jdm_ligand_pdb = os.path.join(data_dir(), "2jdm_ligand_hyd.pdb") #_2jdm_ligand_pdbqt = os.path.join(data_dir(), "2jdm_ligand_hyd.pdbqt") #_2jdm_ligand.load_from_files(_2jdm_ligand_pdb, _2jdm_ligand_pdbqt) #### 3g5k comes from PDBBind-CN #_3g5k_protein = PDB() #_3g5k_protein_pdb = os.path.join(data_dir(), "3g5k_protein_hyd.pdb") #_3g5k_protein_pdbqt = os.path.join(data_dir(), "3g5k_protein_hyd.pdbqt") #_3g5k_protein.load_from_files(_3g5k_protein_pdb, _3g5k_protein_pdbqt) ## The ligand is also specified by pdbbind #_3g5k_ligand = PDB() #_3g5k_ligand_pdb = os.path.join(data_dir(), "3g5k_ligand_hyd.pdb") #_3g5k_ligand_pdbqt = os.path.join(data_dir(), "3g5k_ligand_hyd.pdbqt") #_3g5k_ligand.load_from_files(_3g5k_ligand_pdb, _3g5k_ligand_pdbqt) #### 3str comes from PDBBind-CN #_3str_protein = PDB() #_3str_protein_pdb = os.path.join(data_dir(), "3str_protein_hyd.pdb") #_3str_protein_pdbqt = os.path.join(data_dir(), "3str_protein_hyd.pdbqt") #_3str_protein.load_from_files(_3str_protein_pdb, _3str_protein_pdbqt) ## The ligand is also specified by pdbbind #_3str_ligand = PDB() #_3str_ligand_pdb = os.path.join(data_dir(), "3str_ligand_hyd.pdb") #_3str_ligand_pdbqt = os.path.join(data_dir(), "3str_ligand_hyd.pdbqt") #_3str_ligand.load_from_files(_3str_ligand_pdb, _3str_ligand_pdbqt) #### 1nu3 comes from PDBBind-CN #_1nu3_protein = PDB() #_1nu3_protein_pdb = os.path.join(data_dir(), "1nu3_protein_hyd.pdb") #_1nu3_protein_pdbqt = os.path.join(data_dir(), "1nu3_protein_hyd.pdbqt") #_1nu3_protein.load_from_files(_1nu3_protein_pdb, _1nu3_protein_pdbqt) ## The ligand is also specified by pdbbind #_1nu3_ligand = PDB() #_1nu3_ligand_pdb = os.path.join(data_dir(), "1nu3_ligand_hyd.pdb") #_1nu3_ligand_pdbqt = os.path.join(data_dir(), "1nu3_ligand_hyd.pdbqt") #_1nu3_ligand.load_from_files(_1nu3_ligand_pdb, _1nu3_ligand_pdbqt) #### 2rio comes from PDBBind-CN #_2rio_protein = PDB() #_2rio_protein_pdb = os.path.join(data_dir(), "2rio_protein_hyd.pdb") #_2rio_protein_pdbqt = os.path.join(data_dir(), "2rio_protein_hyd.pdbqt") #_2rio_protein.load_from_files(_2rio_protein_pdb, _2rio_protein_pdbqt) ## The ligand is also specified by pdbbind #_2rio_ligand = PDB() #_2rio_ligand_pdb = os.path.join(data_dir(), "2rio_ligand_hyd.pdb") #_2rio_ligand_pdbqt = os.path.join(data_dir(), "2rio_ligand_hyd.pdbqt") #_2rio_ligand.load_from_files(_2rio_ligand_pdb, _2rio_ligand_pdbqt) #### 2y2h comes from PDBBind-CN #_2y2h_protein = PDB() #_2y2h_protein_pdb = os.path.join(data_dir(), "2y2h_protein_hyd.pdb") #_2y2h_protein_pdbqt = os.path.join(data_dir(), "2y2h_protein_hyd.pdbqt") #_2y2h_protein.load_from_files(_2y2h_protein_pdb, _2y2h_protein_pdbqt) ## The ligand is also specified by pdbbind #_2y2h_ligand = PDB() #_2y2h_ligand_pdb = os.path.join(data_dir(), "2y2h_ligand_hyd.pdb") #_2y2h_ligand_pdbqt = os.path.join(data_dir(), "2y2h_ligand_hyd.pdbqt") #_2y2h_ligand.load_from_files(_2y2h_ligand_pdb, _2y2h_ligand_pdbqt) #### 1pi5 comes from PDBBind-CN #_1pi5_protein = PDB() #_1pi5_protein_pdb = os.path.join(data_dir(), "1pi5_protein_hyd.pdb") #_1pi5_protein_pdbqt = os.path.join(data_dir(), "1pi5_protein_hyd.pdbqt") #_1pi5_protein.load_from_files(_1pi5_protein_pdb, _1pi5_protein_pdbqt) ## The ligand is also specified by pdbbind #_1pi5_ligand = PDB() #_1pi5_ligand_pdb = os.path.join(data_dir(), "1pi5_ligand_hyd.pdb") #_1pi5_ligand_pdbqt = os.path.join(data_dir(), "1pi5_ligand_hyd.pdbqt") #_1pi5_ligand.load_from_files(_1pi5_ligand_pdb, _1pi5_ligand_pdbqt) #### 3fxz comes from PDBBind-CN #_3fxz_protein = PDB() #_3fxz_protein_pdb = os.path.join(data_dir(), "3fxz_protein_hyd.pdb") #_3fxz_protein_pdbqt = os.path.join(data_dir(), "3fxz_protein_hyd.pdbqt") #_3fxz_protein.load_from_files(_3fxz_protein_pdb, _3fxz_protein_pdbqt) ## The ligand is also specified by pdbbind #_3fxz_ligand = PDB() #_3fxz_ligand_pdb = os.path.join(data_dir(), "3fxz_ligand_hyd.pdb") #_3fxz_ligand_pdbqt = os.path.join(data_dir(), "3fxz_ligand_hyd.pdbqt") #_3fxz_ligand.load_from_files(_3fxz_ligand_pdb, _3fxz_ligand_pdbqt) #### 4i60 comes from PDBBind-CN #_4i60_protein = PDB() #_4i60_protein_pdb = os.path.join(data_dir(), "4i60_protein_hyd.pdb") #_4i60_protein_pdbqt = os.path.join(data_dir(), "4i60_protein_hyd.pdbqt") #_4i60_protein.load_from_files(_4i60_protein_pdb, _4i60_protein_pdbqt) ## The ligand is also specified by pdbbind #_4i60_ligand = PDB() #_4i60_ligand_pdb = os.path.join(data_dir(), "4i60_ligand_hyd.pdb") #_4i60_ligand_pdbqt = os.path.join(data_dir(), "4i60_ligand_hyd.pdbqt") #_4i60_ligand.load_from_files(_4i60_ligand_pdb, _4i60_ligand_pdbqt) #### 1hyv comes from PDBBind-CN #_1hyv_protein = PDB() #_1hyv_protein_pdb = os.path.join(data_dir(), "1hyv_protein_hyd.pdb") #_1hyv_protein_pdbqt = os.path.join(data_dir(), "1hyv_protein_hyd.pdbqt") #_1hyv_protein.load_from_files(_1hyv_protein_pdb, _1hyv_protein_pdbqt) ## The ligand is also specified by pdbbind #_1hyv_ligand = PDB() #_1hyv_ligand_pdb = os.path.join(data_dir(), "1hyv_ligand_hyd.pdb") #_1hyv_ligand_pdbqt = os.path.join(data_dir(), "1hyv_ligand_hyd.pdbqt") #_1hyv_ligand.load_from_files(_1hyv_ligand_pdb, _1hyv_ligand_pdbqt) #### 3m1j comes from PDBBind-CN #_3m1j_protein = PDB() #_3m1j_protein_pdb = os.path.join(data_dir(), "3m1j_protein_hyd.pdb") #_3m1j_protein_pdbqt = os.path.join(data_dir(), "3m1j_protein_hyd.pdbqt") #_3m1j_protein.load_from_files(_3m1j_protein_pdb, _3m1j_protein_pdbqt) ## The ligand is also specified by pdbbind #_3m1j_ligand = PDB() #_3m1j_ligand_pdb = os.path.join(data_dir(), "3m1j_ligand_hyd.pdb") #_3m1j_ligand_pdbqt = os.path.join(data_dir(), "3m1j_ligand_hyd.pdbqt") #_3m1j_ligand.load_from_files(_3m1j_ligand_pdb, _3m1j_ligand_pdbqt) #### 1y3g comes from PDBBind-CN #_1y3g_protein = PDB() #_1y3g_protein_pdb = os.path.join(data_dir(), "1y3g_protein_hyd.pdb") #_1y3g_protein_pdbqt = os.path.join(data_dir(), "1y3g_protein_hyd.pdbqt") #_1y3g_protein.load_from_files(_1y3g_protein_pdb, _1y3g_protein_pdbqt) ## The ligand is also specified by pdbbind #_1y3g_ligand = PDB() #_1y3g_ligand_pdb = os.path.join(data_dir(), "1y3g_ligand_hyd.pdb") #_1y3g_ligand_pdbqt = os.path.join(data_dir(), "1y3g_ligand_hyd.pdbqt") #_1y3g_ligand.load_from_files(_1y3g_ligand_pdb, _1y3g_ligand_pdbqt) #### 6rsa comes from PDBBind-CN #_6rsa_protein = PDB() #_6rsa_protein_pdb = os.path.join(data_dir(), "6rsa_protein_hyd.pdb") #_6rsa_protein_pdbqt = os.path.join(data_dir(), "6rsa_protein_hyd.pdbqt") #_6rsa_protein.load_from_files(_6rsa_protein_pdb, _6rsa_protein_pdbqt) ## The ligand is also specified by pdbbind #_6rsa_ligand = PDB() #_6rsa_ligand_pdb = os.path.join(data_dir(), "6rsa_ligand_hyd.pdb") #_6rsa_ligand_pdbqt = os.path.join(data_dir(), "6rsa_ligand_hyd.pdbqt") #_6rsa_ligand.load_from_files(_6rsa_ligand_pdb, _6rsa_ligand_pdbqt) #### 1lvk comes from PDBBind-CN #_1lvk_protein = PDB() #_1lvk_protein_pdb = os.path.join(data_dir(), "1lvk_protein_hyd.pdb") #_1lvk_protein_pdbqt = os.path.join(data_dir(), "1lvk_protein_hyd.pdbqt") #_1lvk_protein.load_from_files(_1lvk_protein_pdb, _1lvk_protein_pdbqt) ## The ligand is also specified by pdbbind #_1lvk_ligand = PDB() #_1lvk_ligand_pdb = os.path.join(data_dir(), "1lvk_ligand_hyd.pdb") #_1lvk_ligand_pdbqt = os.path.join(data_dir(), "1lvk_ligand_hyd.pdbqt") #_1lvk_ligand.load_from_files(_1lvk_ligand_pdb, _1lvk_ligand_pdbqt) #### 3rj7 comes from PDBBind-CN #_3rj7_protein = PDB() #_3rj7_protein_pdb = os.path.join(data_dir(), "3rj7_protein_hyd.pdb") #_3rj7_protein_pdbqt = os.path.join(data_dir(), "3rj7_protein_hyd.pdbqt") #_3rj7_protein.load_from_files(_3rj7_protein_pdb, _3rj7_protein_pdbqt) ## The ligand is also specified by pdbbind #_3rj7_ligand = PDB() #_3rj7_ligand_pdb = os.path.join(data_dir(), "3rj7_ligand_hyd.pdb") #_3rj7_ligand_pdbqt = os.path.join(data_dir(), "3rj7_ligand_hyd.pdbqt") #_3rj7_ligand.load_from_files(_3rj7_ligand_pdb, _3rj7_ligand_pdbqt) ### 3zp9 comes from PDBBind-CN _3zp9_protein = PDB() _3zp9_protein_pdb = os.path.join(data_dir(), "3zp9_protein_hyd.pdb") _3zp9_protein_pdbqt = os.path.join(data_dir(), "3zp9_protein_hyd.pdbqt") _3zp9_protein.load_from_files(_3zp9_protein_pdb, _3zp9_protein_pdbqt) # The ligand is also specified by pdbbind _3zp9_ligand = PDB() _3zp9_ligand_pdb = os.path.join(data_dir(), "3zp9_ligand_hyd.pdb") _3zp9_ligand_pdbqt = os.path.join(data_dir(), "3zp9_ligand_hyd.pdbqt") _3zp9_ligand.load_from_files(_3zp9_ligand_pdb, _3zp9_ligand_pdbqt) ### 3bwf comes from PDBBind-CN _3bwf_protein = PDB() _3bwf_protein_pdb = os.path.join(data_dir(), "3bwf_protein_hyd.pdb") _3bwf_protein_pdbqt = os.path.join(data_dir(), "3bwf_protein_hyd.pdbqt") _3bwf_protein.load_from_files(_3bwf_protein_pdb, _3bwf_protein_pdbqt) # The ligand is also specified by pdbbind _3bwf_ligand = PDB() _3bwf_ligand_pdb = os.path.join(data_dir(), "3bwf_ligand_hyd.pdb") _3bwf_ligand_pdbqt = os.path.join(data_dir(), "3bwf_ligand_hyd.pdbqt") _3bwf_ligand.load_from_files(_3bwf_ligand_pdb, _3bwf_ligand_pdbqt) #self.test_cases = [("prgr", prgr_receptor, prgr_active), # ("cabl", cabl_receptor, cabl_active), # ("1zea", _1zea_protein, _1zea_ligand), # ("1r5y", _1r5y_protein, _1r5y_ligand), # ("3ao4", _3ao4_protein, _3ao4_ligand), # ("2jdm", _2jdm_protein, _2jdm_ligand), # ("3g5k", _3g5k_protein, _3g5k_ligand), # ("3str", _3str_protein, _3str_ligand), # ("2rio", _2rio_protein, _2rio_ligand)] self.test_cases = [("3bwf", _3bwf_protein, _3bwf_ligand)]
def setUp(self): """ Instantiate local copy of Binana object. """ self.binana = Binana() # TODO(rbharath): This load sequence is getting unwieldy. Refactor. ### PRGR is from the DUD-E collection prgr_receptor = PDB() prgr_pdb = os.path.join(data_dir(), "prgr_hyd.pdb") prgr_pdbqt = os.path.join(data_dir(), "prgr_hyd.pdbqt") prgr_receptor.load_from_files(prgr_pdb, prgr_pdbqt) # This compound is CHEMBL1164248 prgr_active = PDB() prgr_active_pdb = os.path.join(data_dir(), "prgr_active0_hyd.pdb") prgr_active_pdbqt = os.path.join(data_dir(), "prgr_active0_hyd.pdbqt") prgr_active.load_from_files(prgr_active_pdb, prgr_active_pdbqt) ### c-Abl is taken from the Autodock Vina examples cabl_receptor = PDB() cabl_receptor_pdb = os.path.join(data_dir(), "c-Abl_hyd.pdb") cabl_receptor_pdbqt = os.path.join(data_dir(), "c-Abl_hyd.pdbqt") cabl_receptor.load_from_files(cabl_receptor_pdb, cabl_receptor_pdbqt) # This compound is imatinib cabl_active = PDB() cabl_active_pdb = os.path.join(data_dir(), "imatinib_hyd.pdb") cabl_active_pdbqt = os.path.join(data_dir(), "imatinib_hyd.pdbqt") cabl_active.load_from_files(cabl_active_pdb, cabl_active_pdbqt) ### 1zea comes from PDBBind-CN # Python complains about variables starting with numbers, so put an # underscore in front of everything. _1zea_protein = PDB() _1zea_protein_pdb = os.path.join(data_dir(), "1zea_protein_hyd.pdb") _1zea_protein_pdbqt = os.path.join(data_dir(), "1zea_protein_hyd.pdbqt") _1zea_protein.load_from_files(_1zea_protein_pdb, _1zea_protein_pdbqt) # The ligand is also specified by pdbbind _1zea_ligand = PDB() _1zea_ligand_pdb = os.path.join(data_dir(), "1zea_ligand_hyd.pdb") _1zea_ligand_pdbqt = os.path.join(data_dir(), "1zea_ligand_hyd.pdbqt") _1zea_ligand.load_from_files(_1zea_ligand_pdb, _1zea_ligand_pdbqt) ### 1r5y comes from PDBBind-CN _1r5y_protein = PDB() _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb") _1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt") _1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt) # The ligand is also specified by pdbbind _1r5y_ligand = PDB() _1r5y_ligand_pdb = os.path.join(data_dir(), "1r5y_ligand_hyd.pdb") _1r5y_ligand_pdbqt = os.path.join(data_dir(), "1r5y_ligand_hyd.pdbqt") _1r5y_ligand.load_from_files(_1r5y_ligand_pdb, _1r5y_ligand_pdbqt) ### 3ao4 comes from PDBBind-CN _3ao4_protein = PDB() _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb") _3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt") _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt) # The ligand is also specified by pdbbind _3ao4_ligand = PDB() _3ao4_ligand_pdb = os.path.join(data_dir(), "3ao4_ligand_hyd.pdb") _3ao4_ligand_pdbqt = os.path.join(data_dir(), "3ao4_ligand_hyd.pdbqt") _3ao4_ligand.load_from_files(_3ao4_ligand_pdb, _3ao4_ligand_pdbqt) ### 2jdm comes from PDBBind-CN _2jdm_protein = PDB() _2jdm_protein_pdb = os.path.join(data_dir(), "2jdm_protein_hyd.pdb") _2jdm_protein_pdbqt = os.path.join(data_dir(), "2jdm_protein_hyd.pdbqt") _2jdm_protein.load_from_files(_2jdm_protein_pdb, _2jdm_protein_pdbqt) # The ligand is also specified by pdbbind _2jdm_ligand = PDB() _2jdm_ligand_pdb = os.path.join(data_dir(), "2jdm_ligand_hyd.pdb") _2jdm_ligand_pdbqt = os.path.join(data_dir(), "2jdm_ligand_hyd.pdbqt") _2jdm_ligand.load_from_files(_2jdm_ligand_pdb, _2jdm_ligand_pdbqt) ### 3g5k comes from PDBBind-CN _3g5k_protein = PDB() _3g5k_protein_pdb = os.path.join(data_dir(), "3g5k_protein_hyd.pdb") _3g5k_protein_pdbqt = os.path.join(data_dir(), "3g5k_protein_hyd.pdbqt") _3g5k_protein.load_from_files(_3g5k_protein_pdb, _3g5k_protein_pdbqt) # The ligand is also specified by pdbbind _3g5k_ligand = PDB() _3g5k_ligand_pdb = os.path.join(data_dir(), "3g5k_ligand_hyd.pdb") _3g5k_ligand_pdbqt = os.path.join(data_dir(), "3g5k_ligand_hyd.pdbqt") _3g5k_ligand.load_from_files(_3g5k_ligand_pdb, _3g5k_ligand_pdbqt) ### 3str comes from PDBBind-CN _3str_protein = PDB() _3str_protein_pdb = os.path.join(data_dir(), "3str_protein_hyd.pdb") _3str_protein_pdbqt = os.path.join(data_dir(), "3str_protein_hyd.pdbqt") _3str_protein.load_from_files(_3str_protein_pdb, _3str_protein_pdbqt) # The ligand is also specified by pdbbind _3str_ligand = PDB() _3str_ligand_pdb = os.path.join(data_dir(), "3str_ligand_hyd.pdb") _3str_ligand_pdbqt = os.path.join(data_dir(), "3str_ligand_hyd.pdbqt") _3str_ligand.load_from_files(_3str_ligand_pdb, _3str_ligand_pdbqt) ### 1nu3 comes from PDBBind-CN _1nu3_protein = PDB() _1nu3_protein_pdb = os.path.join(data_dir(), "1nu3_protein_hyd.pdb") _1nu3_protein_pdbqt = os.path.join(data_dir(), "1nu3_protein_hyd.pdbqt") _1nu3_protein.load_from_files(_1nu3_protein_pdb, _1nu3_protein_pdbqt) # The ligand is also specified by pdbbind _1nu3_ligand = PDB() _1nu3_ligand_pdb = os.path.join(data_dir(), "1nu3_ligand_hyd.pdb") _1nu3_ligand_pdbqt = os.path.join(data_dir(), "1nu3_ligand_hyd.pdbqt") _1nu3_ligand.load_from_files(_1nu3_ligand_pdb, _1nu3_ligand_pdbqt) ### 2rio comes from PDBBind-CN _2rio_protein = PDB() _2rio_protein_pdb = os.path.join(data_dir(), "2rio_protein_hyd.pdb") _2rio_protein_pdbqt = os.path.join(data_dir(), "2rio_protein_hyd.pdbqt") _2rio_protein.load_from_files(_2rio_protein_pdb, _2rio_protein_pdbqt) # The ligand is also specified by pdbbind _2rio_ligand = PDB() _2rio_ligand_pdb = os.path.join(data_dir(), "2rio_ligand_hyd.pdb") _2rio_ligand_pdbqt = os.path.join(data_dir(), "2rio_ligand_hyd.pdbqt") _2rio_ligand.load_from_files(_2rio_ligand_pdb, _2rio_ligand_pdbqt) self.test_cases = [("prgr", prgr_receptor, prgr_active), ("cabl", cabl_receptor, cabl_active), ("1zea", _1zea_protein, _1zea_ligand), ("1r5y", _1r5y_protein, _1r5y_ligand), ("3ao4", _3ao4_protein, _3ao4_ligand), ("2jdm", _2jdm_protein, _2jdm_ligand), ("3g5k", _3g5k_protein, _3g5k_ligand), ("3str", _3str_protein, _3str_ligand), ("2rio", _2rio_protein, _2rio_ligand)]
def featurize_dude(dude_dir, target, pickle_dir, num_jobs): """Featurize DUD-E docked poses and write features to pickle_out. Parameters ---------- dude_dir: string Path to DUD-E directory target: string Name of DUD-E target. pickle_dir: string Path to directory to output pickles """ target_dir = os.path.join(dude_dir, target) actives_dir = os.path.join(target_dir, "actives") decoys_dir = os.path.join(target_dir, "decoys") actives = [a for a in os.listdir(actives_dir)] decoys = [a for a in os.listdir(decoys_dir)] receptor = os.path.join(target_dir, "receptor.pdb") pickle_out = os.path.join(target_dir, "out.pkl.gz") # Just for debugging purposes actives = actives[:1] num_per_job = int(math.ceil(len(actives)/float(num_jobs))) print "Number per job: %d" % num_per_job protein_pdb_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdb" protein_pdbqt_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdbqt" print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) binana = Binana() feature_len = binana.num_features() feature_vectors = {} for compound in actives: compound_name = compound.split(".")[0] compound_pdbqt = compound_name + "_hyd_out.pdbqt" compound_pdbqt = os.path.join(actives_dir, compound_pdbqt) # Convert the pdbqt to pdb pdbqt_to_pdb(compound_pdbqt, actives_dir) compound_pdb = compound_name + "_hyd_out.pdb" compound_pdb = os.path.join(actives_dir, compound_pdb) structures = MultiStructure() structures.load_from_files(compound_pdb, compound_pdbqt) vectors = [] for key in sorted(structures.molecules.keys()): structure = structures.molecules[key] print "type(structure)" print type(structure) vectors.append(binana.compute_input_vector(structure, protein_pdb_obj)) feature_vectors[compound_name] = vectors with gzip.open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f) decoys = decoys[:1]
def test_carbon_charges(self): """ TestPDB: Verify that carbon groups are charged correctly. """ # Guanidine is positively charged at physiological pH guanidine_pdb = PDB() guanidine_pdb_path = os.path.join(data_dir(), "guanidine_hyd.pdb") guanidine_pdbqt_path = os.path.join(data_dir(), "guanidine_hyd.pdbqt") guanidine_pdb.load_from_files( guanidine_pdb_path, guanidine_pdbqt_path) carbon_charges = guanidine_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert carbon_charges[0].positive # Should be positive # sulfaguanidine contains a guanidine group that is likely to be # positively protonated at physiological pH sulfaguanidine_pdb = PDB() sulfaguanidine_pdb_path = os.path.join(data_dir(), "sulfaguanidine_hyd.pdb") sulfaguanidine_pdbqt_path = os.path.join(data_dir(), "sulfaguanidine_hyd.pdbqt") sulfaguanidine_pdb.load_from_files( sulfaguanidine_pdb_path, sulfaguanidine_pdbqt_path) carbon_charges = sulfaguanidine_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert carbon_charges[0].positive # Should be positive # Formic acid is a carboxylic acid, which should be negatively charged. formic_acid_pdb = PDB() formic_acid_pdb_path = os.path.join(data_dir(), "formic_acid_hyd.pdb") formic_acid_pdbqt_path = os.path.join(data_dir(), "formic_acid_hyd.pdbqt") formic_acid_pdb.load_from_files( formic_acid_pdb_path, formic_acid_pdbqt_path) carbon_charges = formic_acid_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert not carbon_charges[0].positive # Should be negatively charged.
def test_carbon_charges(self): """ TestPDB: Verify that carbon groups are charged correctly. """ # Guanidine is positively charged at physiological pH guanidine_pdb = PDB() guanidine_pdb_path = os.path.join(data_dir(), "guanidine_hyd.pdb") guanidine_pdbqt_path = os.path.join(data_dir(), "guanidine_hyd.pdbqt") guanidine_pdb.load_from_files(guanidine_pdb_path, guanidine_pdbqt_path) carbon_charges = guanidine_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert carbon_charges[0].positive # Should be positive # sulfaguanidine contains a guanidine group that is likely to be # positively protonated at physiological pH sulfaguanidine_pdb = PDB() sulfaguanidine_pdb_path = os.path.join(data_dir(), "sulfaguanidine_hyd.pdb") sulfaguanidine_pdbqt_path = os.path.join(data_dir(), "sulfaguanidine_hyd.pdbqt") sulfaguanidine_pdb.load_from_files(sulfaguanidine_pdb_path, sulfaguanidine_pdbqt_path) carbon_charges = sulfaguanidine_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert carbon_charges[0].positive # Should be positive # Formic acid is a carboxylic acid, which should be negatively charged. formic_acid_pdb = PDB() formic_acid_pdb_path = os.path.join(data_dir(), "formic_acid_hyd.pdb") formic_acid_pdbqt_path = os.path.join(data_dir(), "formic_acid_hyd.pdbqt") formic_acid_pdb.load_from_files(formic_acid_pdb_path, formic_acid_pdbqt_path) carbon_charges = formic_acid_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert not carbon_charges[0].positive # Should be negatively charged.
def featurize_pdbbind(pdbbind_dir, pickle_out): """Featurize all entries in pdbbind_dir and write features to pickle_out pdbbind_dir should be a dir, with K subdirs, one for each protein-ligand complex to be featurized. The ligand and receptor should each have a pdb and pdbqt file. The ligand files should end in '_ligand_hyd.${FILETYPE}' while the receptor files should end in '_protein_hyd.${FILETYPE}' pdbbind_dir: string Path to pdbbind directory. pickle_out: string Path to write pickle output. """ assert os.path.isdir(pdbbind_dir) # Instantiate copy of binana vector binana = Binana() feature_vectors = {} # Extract the subdirectories in pdbbind_dir subdirs = [ d for d in os.listdir(pdbbind_dir) if os.path.isdir(os.path.join(pdbbind_dir, d)) ] # TODO(rbharath): ONLY FOR DEBUGGING! subdirs = subdirs[900:] num_atoms = len(Binana.atom_types) # See features/tests/nnscore_test.py:TestBinana.testComputeInputVector # for derivation. feature_len = (3 * num_atoms * (num_atoms + 1) / 2 + num_atoms + 12 + 6 + 3 + 6 + 3 + 6 + 3 + 1) for count, d in enumerate(subdirs): print "\nprocessing %d-th pdb %s" % (count, d) subdir = os.path.join(pdbbind_dir, d) print "About to extract ligand and protein input files" ligand_pdb, ligand_pdbqt = None, None protein_pdb, protein_pdbqt = None, None for f in os.listdir(subdir): if re.search("_ligand_hyd.pdb$", f): ligand_pdb = f elif re.search("_ligand_hyd.pdbqt$", f): ligand_pdbqt = f elif re.search("_protein_hyd.pdb$", f): protein_pdb = f elif re.search("_protein_hyd.pdbqt$", f): protein_pdbqt = f print "Extracted Input Files:" print(ligand_pdb, ligand_pdbqt, protein_pdb, protein_pdbqt) if (not ligand_pdb or not ligand_pdbqt or not protein_pdb or not protein_pdbqt): raise ValueError("Required files not present for %s" % d) ligand_pdb_path = os.path.join(subdir, ligand_pdb) ligand_pdbqt_path = os.path.join(subdir, ligand_pdbqt) protein_pdb_path = os.path.join(subdir, protein_pdb) protein_pdbqt_path = os.path.join(subdir, protein_pdbqt) print "About to load ligand from input files" ligand_pdb_obj = PDB() ligand_pdb_obj.load_from_files(ligand_pdb_path, ligand_pdbqt_path) print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) print "About to generate feature vector." vector = binana.compute_input_vector(ligand_pdb_obj, protein_pdb_obj) feature_vectors[d] = vector if len(vector) != feature_len: raise ValueError("Feature length incorrect on %s" % d) print "Feature vector generated correctly." with open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f)
class TestPDB(unittest.TestCase): """" Test PDB class. """ def setUp(self): """ Instantiate a dummy PDB file. """ self.temp_dir = tempfile.mkdtemp() self.pdb = PDB() _, self.pdb_filename = tempfile.mkstemp(suffix=".pdb", dir=self.temp_dir) self.prgr_pdb = PDB() prgr_pdb_path = os.path.join(data_dir(), "prgr_hyd.pdb") prgr_pdbqt_path = os.path.join(data_dir(), "prgr_hyd.pdbqt") self.prgr_pdb.load_from_files(prgr_pdb_path, prgr_pdbqt_path) self._1r5y_protein = PDB() _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb") _1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt") self._1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt) self.proteins = [("prgr", self.prgr_pdb), ("1r5y", self._1r5y_protein)] def tearDown(self): """ Delete temporary directory. """ shutil.rmtree(self.temp_dir) def test_add_new_atom(self): """ TestPDB: Verifies that new atoms can be added. """ # Verify that no atoms are present when we start. assert len(self.pdb.all_atoms.keys()) == 0 empty_atom = Atom() self.pdb.add_new_atom(empty_atom) # Verify that we now have one atom assert len(self.pdb.all_atoms.keys()) == 1 def test_get_residues(self): """ TestPDB: Tests that all residues in PDB are identified. """ residues = self.prgr_pdb.get_residues() # prgr.pdb has 280 unique residues assert len(residues.keys()) == 280 prgr_residues = [ "LEU", "ILE", "ASN", "LEU", "LEU", "MET", "SER", "ILE", "GLU", "PRO", "ASP", "VAL", "ILE", "TYR", "ALA", "GLY", "HIS", "ASP", "THR", "SER", "SER", "SER", "LEU", "LEU", "THR", "SER", "LEU", "ASN", "GLN", "LEU", "GLY", "GLU", "ARG", "GLN", "LEU", "LEU", "SER", "VAL", "VAL", "LYS", "TRP", "SER", "LYS", "SER", "LEU", "PRO", "GLY", "PHE", "ARG", "LEU", "HIS", "ILE", "ASP", "ASP", "GLN", "ILE", "THR", "LEU", "ILE", "GLN", "TYR", "SER", "TRP", "MET", "SER", "LEU", "MET", "VAL", "PHE", "GLY", "LEU", "GLY", "TRP", "ARG", "SER", "TYR", "LYS", "HIS", "VAL", "SER", "GLY", "GLN", "MET", "LEU", "TYR", "PHE", "ALA", "PRO", "ASP", "LEU", "ILE", "LEU", "ASN", "GLU", "GLN", "ARG", "MET", "LYS", "GLU", "PHE", "TYR", "SER", "LEU", "CYS", "LEU", "THR", "MET", "TRP", "GLN", "ILE", "PRO", "GLN", "GLU", "PHE", "VAL", "LYS", "LEU", "GLN", "VAL", "SER", "GLN", "GLU", "GLU", "PHE", "LEU", "CYS", "MET", "LYS", "VAL", "LEU", "LEU", "LEU", "LEU", "ASN", "THR", "ILE", "PRO", "LEU", "GLU", "GLY", "LEU", "PHE", "MET", "ARG", "TYR", "ILE", "GLU", "LEU", "ALA", "ILE", "ARG", "ARG", "PHE", "TYR", "GLN", "LEU", "THR", "LYS", "LEU", "LEU", "ASP", "ASN", "LEU", "HIS", "ASP", "LEU", "VAL", "LYS", "GLN", "LEU", "HIS", "LEU", "TYR", "CYS", "LEU", "ASN", "THR", "PHE", "ILE", "GLN", "SER", "ARG", "ALA", "LEU", "SER", "VAL", "GLU", "PHE", "PRO", "GLU", "MET", "MET", "SER", "GLU", "VAL", "ILE", "ALA", "ALA", "GLN", "LEU", "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET", "VAL", "LYS", "PRO", "LEU", "LEU", "PHE", "HIS", "LYS", "ASN", "LEU", "ASP", "ASP", "ILE", "THR", "LEU", "ILE", "GLN", "TYR", "SER", "TRP", "MET", "THR", "ILE", "PRO", "LEU", "GLU", "GLY", "LEU", "ARG", "VAL", "LYS", "GLN", "LEU", "HIS", "LEU", "TYR", "CYS", "LEU", "ASN", "THR", "PHE", "ILE", "GLN", "SER", "ARG", "ALA", "LEU", "SER", "VAL", "GLU", "PHE", "PRO", "GLU", "MET", "MET", "SER", "GLU", "VAL", "ILE", "ALA", "ALA", "GLN", "LEU", "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET", "VAL", "LYS", "PRO" ] # Recall the keys have format RESNAME_RESNUMBER_CHAIN resnames = [reskey.split("_")[0].strip() for reskey in residues] resnames.sort() prgr_residues.sort() assert resnames == prgr_residues # prgr.pdb has 2749 unique atoms. atom_count = 0 for (_, atom_indices) in residues.iteritems(): atom_count += len(atom_indices) print atom_count assert atom_count == 2788 def test_get_lysine_charges(self): """ TestPDB: Test that lysine charges are identified correctly. """ res_list = self.prgr_pdb.get_residues() lysine_charges = self.prgr_pdb.get_lysine_charges(res_list) # prgr has 14 lysines. print len(lysine_charges) assert len(lysine_charges) == 14 for charge in lysine_charges: # Lysine should be posistively charged assert charge.positive def test_get_arginine_charges(self): """ TestPDB: Test that arginine charges are identified correctly. """ res_list = self.prgr_pdb.get_residues() arginine_charges = self.prgr_pdb.get_arginine_charges(res_list) # prgr has 10 arginines assert len(arginine_charges) == 10 for charge in arginine_charges: # The guanidium in arginine should be positively charged. assert charge.positive def test_get_histidine_charges(self): """ TestPDB: Test that histidine charges are identified correctly. """ res_list = self.prgr_pdb.get_residues() histidine_charges = self.prgr_pdb.get_histidine_charges(res_list) # prgr has 7 arginines assert len(histidine_charges) == 7 for charge in histidine_charges: # The nitrogens pick up positive charges assert charge.positive def test_get_glutamic_acid_charges(self): """ TestPDB: Test that glutamic acid charges are identified correctly. """ res_list = self.prgr_pdb.get_residues() glutamic_acid_charges = self.prgr_pdb.get_glutamic_acid_charges( res_list) assert len(glutamic_acid_charges) == 16 for charge in glutamic_acid_charges: # The carboxyls get deprotonated. assert not charge.positive def test_get_aspartic_acid_charges(self): """ TestPDB: Test that aspartic acid charges are identified correctly. """ res_list = self.prgr_pdb.get_residues() aspartic_acid_charges = self.prgr_pdb.get_aspartic_acid_charges( res_list) assert len(aspartic_acid_charges) == 9 for charge in aspartic_acid_charges: # The carboxyls get deprotonated assert not charge.positive def test_assign_ligand_aromatics(self): """ TestPDB: Test that non-protein aromatic rings are assigned correctly. """ ### 3ao4 comes from PDBBind-CN and contains some cruft in the PDB file: ### atoms without residues labelled. This triggered some problems with ### non-protein aromatics complaining. # TODO(rbharath): Add a stub here. _3ao4_protein = PDB() _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb") _3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt") _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt) def test_remove_redundant_rings(self): """ TestPDB: Test that redundant rings are removed. """ # Recall that each ring is represented as a list of atom indices. # Test that rings of length 0 are removed assert remove_redundant_rings([[]]) == [] # Set that supersets are removed assert (remove_redundant_rings([[1, 2, 3], [1, 3, 4, 5], [1, 2, 3, 4, 5]]) == [[1, 2, 3], [1, 3, 4, 5]]) # Ensure that duplicate rings are handled correctly (that is, only one # copy of a duplicate ring should remain) assert remove_redundant_rings([[1, 2, 3], [1, 3, 2]]) == [[1, 2, 3]] def test_assign_protein_aromatics(self): """ TestPDB: Test that aromatic rings are assigned correctly. """ for name, protein in self.proteins: # The proteins should have aromatic rings assigned already by # load_from_files() print "Processing aromatics for %s" % name for aromatic in protein.aromatic_rings: assert aromatic is not None def test_get_phenylalanine_aromatics(self): """ TestPDB: Test that phenylalanine aromatic rings are retrieved. """ res_list = self.prgr_pdb.get_residues() phenylalanine_aromatics = ( self.prgr_pdb.get_phenylalanine_aromatics(res_list)) # prgr has 13 phenylalanines, each of which has 1 aromatic ring. assert len(phenylalanine_aromatics) == 13 for aromatic in phenylalanine_aromatics: # The aromatic rings in phenylalanine have 6 elements each assert len(aromatic.indices) == 6 def test_get_tyrosine_aromatics(self): """ TestPDB: Test that tyrosine aromatic rings are retrieved. """ # prgr has 10 tyrosines, each of which has 1 aromatic ring. res_list = self.prgr_pdb.get_residues() tyrosine_aromatics = self.prgr_pdb.get_tyrosine_aromatics(res_list) assert len(tyrosine_aromatics) == 10 for aromatic in tyrosine_aromatics: # The aromatic rings in tyrosine have 6 elements each assert len(aromatic.indices) == 6 def test_get_histidine_aromatics(self): """ TestPDB: Test that histidine aromatic rings are retrieved. """ res_list = self.prgr_pdb.get_residues() histidine_aromatics = self.prgr_pdb.get_histidine_aromatics(res_list) # prgr has 7 histidines, each of which has 1 aromatic ring. assert len(histidine_aromatics) == 7 for aromatic in histidine_aromatics: # The aromatic rings in histidine have 6 elements each print len(aromatic.indices) assert len(aromatic.indices) == 5 def test_get_tryptophan_aromatics(self): """ TestPDB: Test that tryptophan aromatic rings are retrieved. """ res_list = self.prgr_pdb.get_residues() tryptophan_aromatics = self.prgr_pdb.get_tryptophan_aromatics(res_list) # prgr has 5 tryptophans, each of which has 2 aromatic ring. print len(tryptophan_aromatics) assert len(tryptophan_aromatics) == 10 num_five_rings, num_six_rings = 0, 0 for aromatic in tryptophan_aromatics: # One aromatic ring in tryptophan hahas 6 elements each, # while the other has 5 elements. if len(aromatic.indices) == 6: num_six_rings += 1 elif len(aromatic.indices) == 5: num_five_rings += 1 assert num_six_rings == 5 assert num_five_rings == 5 def test_connected_atoms(self): """ TestPDB: Verifies that connected atom retrieval works. """ # Verify that no atoms are present when we start. assert len(self.pdb.all_atoms.keys()) == 0 carbon_atom = Atom(element="C") oxygen_atom = Atom(element="O") hydrogen_atom = Atom(element="H") self.pdb.add_new_atom(carbon_atom) self.pdb.add_new_atom(oxygen_atom) self.pdb.add_new_atom(hydrogen_atom) # We want a carboxyl, so C connects O and H carbon_atom.indices_of_atoms_connecting = [2, 3] oxygen_atom.indices_of_atoms_connecting = [1] hydrogen_atom.indices_of_atoms_connecting = [1] connected_oxygens = self.pdb.connected_atoms(1, "O") assert len(connected_oxygens) == 1 connected_hydrogens = self.pdb.connected_atoms(1, "H") assert len(connected_hydrogens) == 1 def test_load_bonds_from_pdb(self): """ TestPDB: Verifies that bonds can be loaded from PDB. """ pdb = PDB() # Test that we can load CO2 carbon_atom = Atom(element="C") oxygen_atom_1 = Atom(element="O") oxygen_atom_2 = Atom(element="O") pdb.add_new_atom(carbon_atom) pdb.add_new_atom(oxygen_atom_1) pdb.add_new_atom(oxygen_atom_2) lines = [ "CONECT 1 2 3 " "CONECT 2 " "CONECT 3 " ] with tempfile.NamedTemporaryFile() as temp: temp.write("\n".join(lines)) temp.flush() pdb.load_bonds_from_pdb(temp.name) assert len(carbon_atom.indices_of_atoms_connecting) == 2 assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0 assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0 def test_connected_heavy_atoms(self): """ TestPDB: Verifies retrieval of connected heavy atoms. """ # Verify that no atoms are present when we start. assert len(self.pdb.all_atoms.keys()) == 0 carbon_atom = Atom(element="C") oxygen_atom = Atom(element="O") hydrogen_atom = Atom(element="H") self.pdb.add_new_atom(carbon_atom) self.pdb.add_new_atom(oxygen_atom) self.pdb.add_new_atom(hydrogen_atom) # We want a carboxyl, so C connects O and H carbon_atom.indices_of_atoms_connecting = [2, 3] oxygen_atom.indices_of_atoms_connecting = [1] hydrogen_atom.indices_of_atoms_connecting = [1] connected_heavy_atoms = self.pdb.connected_heavy_atoms(1) assert len(connected_heavy_atoms) == 1 assert connected_heavy_atoms[0] == 2 def test_assign_non_protein_charges(self): """ TestPDB: Verify that charges are properly added to ligands. """ # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-) # There should be 3 charged groups, two positive, one negative ammonium_sulfate_pdb = PDB() ammonium_sulfate_pdb_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdb") ammonium_sulfate_pdbqt_path = os.path.join( data_dir(), "ammonium_sulfate_hyd.pdbqt") # Notice that load automatically identifies non-protein charges. ammonium_sulfate_pdb.load_from_files(ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path) assert len(ammonium_sulfate_pdb.charges) == 3 num_pos, num_neg = 0, 0 for charge in ammonium_sulfate_pdb.charges: if charge.positive: num_pos += 1 else: num_neg += 1 assert num_pos == 2 assert num_neg == 1 def test_metallic_charges(self): """ TestPDB: Verify that non-protein charges are assigned properly. """ # Test metallic ion charge. magnesium_pdb = PDB() magnesium_atom = Atom(element="MG", coordinates=Point(coords=np.array([0, 0, 0]))) magnesium_pdb.add_new_non_protein_atom(magnesium_atom) metallic_charges = magnesium_pdb.identify_metallic_charges() assert len(metallic_charges) == 1 def test_nitrogen_charges(self): """ TestPDB: Verify that nitrogen groups are charged correctly. """ # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-) # The labeling should pick up 2 charged nitrogen groups for two # ammoniums. ammonium_sulfate_pdb = PDB() ammonium_sulfate_pdb_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdb") ammonium_sulfate_pdbqt_path = os.path.join( data_dir(), "ammonium_sulfate_hyd.pdbqt") ammonium_sulfate_pdb.load_from_files(ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path) nitrogen_charges = ammonium_sulfate_pdb.identify_nitrogen_charges() assert len(nitrogen_charges) == 2 assert nitrogen_charges[0].positive # Should be positive assert nitrogen_charges[1].positive # Should be positive # Test pyrrolidine (CH2)4NH. The nitrogen here should be sp3 # hybridized, so is likely to pick up an extra proton to its nitrogen # at physiological pH. pyrrolidine_pdb = PDB() pyrrolidine_pdb_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdb") pyrrolidine_pdbqt_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdbqt") pyrrolidine_pdb.load_from_files(pyrrolidine_pdb_path, pyrrolidine_pdbqt_path) nitrogen_charges = pyrrolidine_pdb.identify_nitrogen_charges() assert len(nitrogen_charges) == 1 assert nitrogen_charges[0].positive # Should be positive def test_carbon_charges(self): """ TestPDB: Verify that carbon groups are charged correctly. """ # Guanidine is positively charged at physiological pH guanidine_pdb = PDB() guanidine_pdb_path = os.path.join(data_dir(), "guanidine_hyd.pdb") guanidine_pdbqt_path = os.path.join(data_dir(), "guanidine_hyd.pdbqt") guanidine_pdb.load_from_files(guanidine_pdb_path, guanidine_pdbqt_path) carbon_charges = guanidine_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert carbon_charges[0].positive # Should be positive # sulfaguanidine contains a guanidine group that is likely to be # positively protonated at physiological pH sulfaguanidine_pdb = PDB() sulfaguanidine_pdb_path = os.path.join(data_dir(), "sulfaguanidine_hyd.pdb") sulfaguanidine_pdbqt_path = os.path.join(data_dir(), "sulfaguanidine_hyd.pdbqt") sulfaguanidine_pdb.load_from_files(sulfaguanidine_pdb_path, sulfaguanidine_pdbqt_path) carbon_charges = sulfaguanidine_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert carbon_charges[0].positive # Should be positive # Formic acid is a carboxylic acid, which should be negatively charged. formic_acid_pdb = PDB() formic_acid_pdb_path = os.path.join(data_dir(), "formic_acid_hyd.pdb") formic_acid_pdbqt_path = os.path.join(data_dir(), "formic_acid_hyd.pdbqt") formic_acid_pdb.load_from_files(formic_acid_pdb_path, formic_acid_pdbqt_path) carbon_charges = formic_acid_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert not carbon_charges[0].positive # Should be negatively charged. def test_phosphorus_charges(self): """ TestPDB: Verify that Phosphorus groups are charged correctly. """ # CID82671 contains a phosphate between two aromatic groups. phosphate_pdb = PDB() phosphate_pdb_path = os.path.join(data_dir(), "82671_hyd.pdb") phosphate_pdbqt_path = os.path.join(data_dir(), "82671_hyd.pdb") phosphate_pdb.load_from_files(phosphate_pdb_path, phosphate_pdbqt_path) phosphorus_charges = phosphate_pdb.identify_phosphorus_charges() assert len(phosphorus_charges) == 1 assert not phosphorus_charges[ 0].positive # Should be negatively charged. def test_sulfur_charges(self): """ TestPDB: Verify that sulfur groups are charged correctly. """ triflic_acid_pdb = PDB() triflic_acid_pdb_path = os.path.join(data_dir(), "triflic_acid_hyd.pdb") triflic_acid_pdbqt_path = os.path.join(data_dir(), "triflic_acid_hyd.pdbqt") triflic_acid_pdb.load_from_files(triflic_acid_pdb_path, triflic_acid_pdbqt_path) sulfur_charges = (triflic_acid_pdb.identify_sulfur_charges()) assert len(sulfur_charges) == 1 assert not sulfur_charges[0].positive # Should be negatively charged. def test_ligand_assign_aromatics(self): """ TestPDB: Verify that aromatic rings in ligands are identified. """ benzene_pdb = PDB() benzene_pdb_path = os.path.join(data_dir(), "benzene_hyd.pdb") benzene_pdbqt_path = os.path.join(data_dir(), "benzene_hyd.pdbqt") benzene_pdb.load_from_files(benzene_pdb_path, benzene_pdbqt_path) # A benzene should have exactly one aromatic ring. print benzene_pdb.aromatic_rings assert len(benzene_pdb.aromatic_rings) == 1 # The first 6 atoms in the benzene pdb form the aromatic ring. assert (set(benzene_pdb.aromatic_rings[0].indices) == set( [1, 2, 3, 4, 5, 6])) def test_assign_secondary_structure(self): """ TestPDB: Verify that secondary structure is assigned meaningfully. """ # TODO(rbharath): This test is just a stub. Add a more realistic test # that checks that nontrivial secondary structure is computed correctly # here. self.prgr_pdb.assign_secondary_structure() def test_get_structure_dict(self): """ TestPDB: Verify that dict with rudimentary structure labels is generated. TODO(rbharath): This is just a stub. Add some nontrivial tests here. """ structures = self.prgr_pdb.get_structure_dict() print structures print len(structures)
def featurize_fingerprint(pdb_directories, pickle_out): """Featurize all pdbs in provided directories.""" # Instantiate copy of binana vector binana = Binana() # See features/tests/nnscore_test.py:TestBinana.testComputeInputVector # for derivation. feature_len = binana.num_features() feature_vectors = {} for count, pdb_dir in enumerate(pdb_directories): print "\nprocessing %d-th pdb %s" % (count, dir) print "About to extract ligand and protein input files" ligand_pdb, ligand_pdbqt = None, None protein_pdb, protein_pdbqt = None, None for f in os.listdir(pdb_dir): if re.search("_ligand_hyd.pdb$", f): ligand_pdb = f elif re.search("_ligand_hyd.pdbqt$", f): ligand_pdbqt = f elif re.search("_protein_hyd.pdb$", f): protein_pdb = f elif re.search("_protein_hyd.pdbqt$", f): protein_pdbqt = f print "Extracted Input Files:" print (ligand_pdb, ligand_pdbqt, protein_pdb, protein_pdbqt) if (not ligand_pdb or not ligand_pdbqt or not protein_pdb or not protein_pdbqt): raise ValueError("Required files not present for %s" % pdb_dir) ligand_pdb_path = os.path.join(pdb_dir, ligand_pdb) ligand_pdbqt_path = os.path.join(pdb_dir, ligand_pdbqt) protein_pdb_path = os.path.join(pdb_dir, protein_pdb) protein_pdbqt_path = os.path.join(pdb_dir, protein_pdbqt) print "About to load ligand from input files" ligand_pdb_obj = PDB() ligand_pdb_obj.load_from_files(ligand_pdb_path, ligand_pdbqt_path) print "About to load protein from input files" protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path) print "About to generate feature vector." features = binana.compute_input_vector(ligand_pdb_obj, protein_pdb_obj) if len(features) != feature_len: raise ValueError("Feature length incorrect on %s" % pdb_dir) print "Feature vector generated correctly." print "About to compute ligand smiles string." ligand_mol = Chem.MolFromPDBFile(ligand_pdb_path) # TODO(rbharath): Why does this fail sometimes? if ligand_mol is None: continue smiles = Chem.MolToSmiles(ligand_mol) print "About to compute sequence." protein = md.load(protein_pdb_path) seq = [r.name for r in protein.top.residues] # Write the computed quantities feature_vectors[pdb_dir] = (features, smiles, seq) print "About to write pickle to " + pickle_out with open(pickle_out, "wb") as f: pickle.dump(feature_vectors, f)
class TestPDB(unittest.TestCase): """" Test PDB class. """ def setUp(self): """ Instantiate a dummy PDB file. """ self.temp_dir = tempfile.mkdtemp() self.pdb = PDB() _, self.pdb_filename = tempfile.mkstemp(suffix=".pdb", dir=self.temp_dir) self.prgr_pdb = PDB() prgr_pdb_path = os.path.join(data_dir(), "prgr_hyd.pdb") prgr_pdbqt_path = os.path.join(data_dir(), "prgr_hyd.pdbqt") self.prgr_pdb.load_from_files(prgr_pdb_path, prgr_pdbqt_path) self._1r5y_protein = PDB() _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb") _1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt") self._1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt) self.proteins = [("prgr", self.prgr_pdb), ("1r5y", self._1r5y_protein)] def tearDown(self): """ Delete temporary directory. """ shutil.rmtree(self.temp_dir) def test_add_new_atom(self): """ TestPDB: Verifies that new atoms can be added. """ # Verify that no atoms are present when we start. assert len(self.pdb.all_atoms.keys()) == 0 empty_atom = Atom() self.pdb.add_new_atom(empty_atom) # Verify that we now have one atom assert len(self.pdb.all_atoms.keys()) == 1 def test_get_residues(self): """ TestPDB: Tests that all residues in PDB are identified. """ residues = self.prgr_pdb.get_residues() # prgr.pdb has 280 unique residues assert len(residues.keys()) == 280 prgr_residues = ["LEU", "ILE", "ASN", "LEU", "LEU", "MET", "SER", "ILE", "GLU", "PRO", "ASP", "VAL", "ILE", "TYR", "ALA", "GLY", "HIS", "ASP", "THR", "SER", "SER", "SER", "LEU", "LEU", "THR", "SER", "LEU", "ASN", "GLN", "LEU", "GLY", "GLU", "ARG", "GLN", "LEU", "LEU", "SER", "VAL", "VAL", "LYS", "TRP", "SER", "LYS", "SER", "LEU", "PRO", "GLY", "PHE", "ARG", "LEU", "HIS", "ILE", "ASP", "ASP", "GLN", "ILE", "THR", "LEU", "ILE", "GLN", "TYR", "SER", "TRP", "MET", "SER", "LEU", "MET", "VAL", "PHE", "GLY", "LEU", "GLY", "TRP", "ARG", "SER", "TYR", "LYS", "HIS", "VAL", "SER", "GLY", "GLN", "MET", "LEU", "TYR", "PHE", "ALA", "PRO", "ASP", "LEU", "ILE", "LEU", "ASN", "GLU", "GLN", "ARG", "MET", "LYS", "GLU", "PHE", "TYR", "SER", "LEU", "CYS", "LEU", "THR", "MET", "TRP", "GLN", "ILE", "PRO", "GLN", "GLU", "PHE", "VAL", "LYS", "LEU", "GLN", "VAL", "SER", "GLN", "GLU", "GLU", "PHE", "LEU", "CYS", "MET", "LYS", "VAL", "LEU", "LEU", "LEU", "LEU", "ASN", "THR", "ILE", "PRO", "LEU", "GLU", "GLY", "LEU", "PHE", "MET", "ARG", "TYR", "ILE", "GLU", "LEU", "ALA", "ILE", "ARG", "ARG", "PHE", "TYR", "GLN", "LEU", "THR", "LYS", "LEU", "LEU", "ASP", "ASN", "LEU", "HIS", "ASP", "LEU", "VAL", "LYS", "GLN", "LEU", "HIS", "LEU", "TYR", "CYS", "LEU", "ASN", "THR", "PHE", "ILE", "GLN", "SER", "ARG", "ALA", "LEU", "SER", "VAL", "GLU", "PHE", "PRO", "GLU", "MET", "MET", "SER", "GLU", "VAL", "ILE", "ALA", "ALA", "GLN", "LEU", "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET", "VAL", "LYS", "PRO", "LEU", "LEU", "PHE", "HIS", "LYS", "ASN", "LEU", "ASP", "ASP", "ILE", "THR", "LEU", "ILE", "GLN", "TYR", "SER", "TRP", "MET", "THR", "ILE", "PRO", "LEU", "GLU", "GLY", "LEU", "ARG", "VAL", "LYS", "GLN", "LEU", "HIS", "LEU", "TYR", "CYS", "LEU", "ASN", "THR", "PHE", "ILE", "GLN", "SER", "ARG", "ALA", "LEU", "SER", "VAL", "GLU", "PHE", "PRO", "GLU", "MET", "MET", "SER", "GLU", "VAL", "ILE", "ALA", "ALA", "GLN", "LEU", "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET", "VAL", "LYS", "PRO"] # Recall the keys have format RESNAME_RESNUMBER_CHAIN resnames = [reskey.split("_")[0].strip() for reskey in residues] resnames.sort() prgr_residues.sort() assert resnames == prgr_residues # prgr.pdb has 2749 unique atoms. atom_count = 0 for (_, atom_indices) in residues.iteritems(): atom_count += len(atom_indices) print atom_count assert atom_count == 2788 def test_get_lysine_charges(self): """ TestPDB: Test that lysine charges are identified correctly. """ res_list = self.prgr_pdb.get_residues() lysine_charges = self.prgr_pdb.get_lysine_charges(res_list) # prgr has 14 lysines. print len(lysine_charges) assert len(lysine_charges) == 14 for charge in lysine_charges: # Lysine should be posistively charged assert charge.positive def test_get_arginine_charges(self): """ TestPDB: Test that arginine charges are identified correctly. """ res_list = self.prgr_pdb.get_residues() arginine_charges = self.prgr_pdb.get_arginine_charges(res_list) # prgr has 10 arginines assert len(arginine_charges) == 10 for charge in arginine_charges: # The guanidium in arginine should be positively charged. assert charge.positive def test_get_histidine_charges(self): """ TestPDB: Test that histidine charges are identified correctly. """ res_list = self.prgr_pdb.get_residues() histidine_charges = self.prgr_pdb.get_histidine_charges(res_list) # prgr has 7 arginines assert len(histidine_charges) == 7 for charge in histidine_charges: # The nitrogens pick up positive charges assert charge.positive def test_get_glutamic_acid_charges(self): """ TestPDB: Test that glutamic acid charges are identified correctly. """ res_list = self.prgr_pdb.get_residues() glutamic_acid_charges = self.prgr_pdb.get_glutamic_acid_charges(res_list) assert len(glutamic_acid_charges) == 16 for charge in glutamic_acid_charges: # The carboxyls get deprotonated. assert not charge.positive def test_get_aspartic_acid_charges(self): """ TestPDB: Test that aspartic acid charges are identified correctly. """ res_list = self.prgr_pdb.get_residues() aspartic_acid_charges = self.prgr_pdb.get_aspartic_acid_charges(res_list) assert len(aspartic_acid_charges) == 9 for charge in aspartic_acid_charges: # The carboxyls get deprotonated assert not charge.positive def test_assign_ligand_aromatics(self): """ TestPDB: Test that non-protein aromatic rings are assigned correctly. """ ### 3ao4 comes from PDBBind-CN and contains some cruft in the PDB file: ### atoms without residues labelled. This triggered some problems with ### non-protein aromatics complaining. # TODO(rbharath): Add a stub here. _3ao4_protein = PDB() _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb") _3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt") _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt) def test_remove_redundant_rings(self): """ TestPDB: Test that redundant rings are removed. """ # Recall that each ring is represented as a list of atom indices. # Test that rings of length 0 are removed assert remove_redundant_rings([[]]) == [] # Set that supersets are removed assert (remove_redundant_rings([[1, 2, 3], [1, 3, 4, 5], [1, 2, 3, 4, 5]]) == [[1, 2, 3], [1, 3, 4, 5]]) # Ensure that duplicate rings are handled correctly (that is, only one # copy of a duplicate ring should remain) assert remove_redundant_rings([[1, 2, 3], [1, 3, 2]]) == [[1, 2, 3]] def test_assign_protein_aromatics(self): """ TestPDB: Test that aromatic rings are assigned correctly. """ for name, protein in self.proteins: # The proteins should have aromatic rings assigned already by # load_from_files() print "Processing aromatics for %s" % name for aromatic in protein.aromatic_rings: assert aromatic is not None def test_get_phenylalanine_aromatics(self): """ TestPDB: Test that phenylalanine aromatic rings are retrieved. """ res_list = self.prgr_pdb.get_residues() phenylalanine_aromatics = ( self.prgr_pdb.get_phenylalanine_aromatics(res_list)) # prgr has 13 phenylalanines, each of which has 1 aromatic ring. assert len(phenylalanine_aromatics) == 13 for aromatic in phenylalanine_aromatics: # The aromatic rings in phenylalanine have 6 elements each assert len(aromatic.indices) == 6 def test_get_tyrosine_aromatics(self): """ TestPDB: Test that tyrosine aromatic rings are retrieved. """ # prgr has 10 tyrosines, each of which has 1 aromatic ring. res_list = self.prgr_pdb.get_residues() tyrosine_aromatics = self.prgr_pdb.get_tyrosine_aromatics(res_list) assert len(tyrosine_aromatics) == 10 for aromatic in tyrosine_aromatics: # The aromatic rings in tyrosine have 6 elements each assert len(aromatic.indices) == 6 def test_get_histidine_aromatics(self): """ TestPDB: Test that histidine aromatic rings are retrieved. """ res_list = self.prgr_pdb.get_residues() histidine_aromatics = self.prgr_pdb.get_histidine_aromatics(res_list) # prgr has 7 histidines, each of which has 1 aromatic ring. assert len(histidine_aromatics) == 7 for aromatic in histidine_aromatics: # The aromatic rings in histidine have 6 elements each print len(aromatic.indices) assert len(aromatic.indices) == 5 def test_get_tryptophan_aromatics(self): """ TestPDB: Test that tryptophan aromatic rings are retrieved. """ res_list = self.prgr_pdb.get_residues() tryptophan_aromatics = self.prgr_pdb.get_tryptophan_aromatics(res_list) # prgr has 5 tryptophans, each of which has 2 aromatic ring. print len(tryptophan_aromatics) assert len(tryptophan_aromatics) == 10 num_five_rings, num_six_rings = 0, 0 for aromatic in tryptophan_aromatics: # One aromatic ring in tryptophan hahas 6 elements each, # while the other has 5 elements. if len(aromatic.indices) == 6: num_six_rings += 1 elif len(aromatic.indices) == 5: num_five_rings += 1 assert num_six_rings == 5 assert num_five_rings == 5 def test_connected_atoms(self): """ TestPDB: Verifies that connected atom retrieval works. """ # Verify that no atoms are present when we start. assert len(self.pdb.all_atoms.keys()) == 0 carbon_atom = Atom(element="C") oxygen_atom = Atom(element="O") hydrogen_atom = Atom(element="H") self.pdb.add_new_atom(carbon_atom) self.pdb.add_new_atom(oxygen_atom) self.pdb.add_new_atom(hydrogen_atom) # We want a carboxyl, so C connects O and H carbon_atom.indices_of_atoms_connecting = [2, 3] oxygen_atom.indices_of_atoms_connecting = [1] hydrogen_atom.indices_of_atoms_connecting = [1] connected_oxygens = self.pdb.connected_atoms(1, "O") assert len(connected_oxygens) == 1 connected_hydrogens = self.pdb.connected_atoms(1, "H") assert len(connected_hydrogens) == 1 def test_load_bonds_from_pdb(self): """ TestPDB: Verifies that bonds can be loaded from PDB. """ pdb = PDB() # Test that we can load CO2 carbon_atom = Atom(element="C") oxygen_atom_1 = Atom(element="O") oxygen_atom_2 = Atom(element="O") pdb.add_new_atom(carbon_atom) pdb.add_new_atom(oxygen_atom_1) pdb.add_new_atom(oxygen_atom_2) lines = [ "CONECT 1 2 3 " "CONECT 2 " "CONECT 3 " ] with tempfile.NamedTemporaryFile() as temp: temp.write("\n".join(lines)) temp.flush() pdb.load_bonds_from_pdb(temp.name) assert len(carbon_atom.indices_of_atoms_connecting) == 2 assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0 assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0 def test_connected_heavy_atoms(self): """ TestPDB: Verifies retrieval of connected heavy atoms. """ # Verify that no atoms are present when we start. assert len(self.pdb.all_atoms.keys()) == 0 carbon_atom = Atom(element="C") oxygen_atom = Atom(element="O") hydrogen_atom = Atom(element="H") self.pdb.add_new_atom(carbon_atom) self.pdb.add_new_atom(oxygen_atom) self.pdb.add_new_atom(hydrogen_atom) # We want a carboxyl, so C connects O and H carbon_atom.indices_of_atoms_connecting = [2, 3] oxygen_atom.indices_of_atoms_connecting = [1] hydrogen_atom.indices_of_atoms_connecting = [1] connected_heavy_atoms = self.pdb.connected_heavy_atoms(1) assert len(connected_heavy_atoms) == 1 assert connected_heavy_atoms[0] == 2 def test_assign_non_protein_charges(self): """ TestPDB: Verify that charges are properly added to ligands. """ # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-) # There should be 3 charged groups, two positive, one negative ammonium_sulfate_pdb = PDB() ammonium_sulfate_pdb_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdb") ammonium_sulfate_pdbqt_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdbqt") # Notice that load automatically identifies non-protein charges. ammonium_sulfate_pdb.load_from_files( ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path) assert len(ammonium_sulfate_pdb.charges) == 3 num_pos, num_neg = 0, 0 for charge in ammonium_sulfate_pdb.charges: if charge.positive: num_pos += 1 else: num_neg += 1 assert num_pos == 2 assert num_neg == 1 def test_metallic_charges(self): """ TestPDB: Verify that non-protein charges are assigned properly. """ # Test metallic ion charge. magnesium_pdb = PDB() magnesium_atom = Atom(element="MG", coordinates=Point(coords=np.array([0,0,0]))) magnesium_pdb.add_new_non_protein_atom(magnesium_atom) metallic_charges = magnesium_pdb.identify_metallic_charges() assert len(metallic_charges) == 1 def test_nitrogen_charges(self): """ TestPDB: Verify that nitrogen groups are charged correctly. """ # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-) # The labeling should pick up 2 charged nitrogen groups for two # ammoniums. ammonium_sulfate_pdb = PDB() ammonium_sulfate_pdb_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdb") ammonium_sulfate_pdbqt_path = os.path.join(data_dir(), "ammonium_sulfate_hyd.pdbqt") ammonium_sulfate_pdb.load_from_files( ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path) nitrogen_charges = ammonium_sulfate_pdb.identify_nitrogen_charges() assert len(nitrogen_charges) == 2 assert nitrogen_charges[0].positive # Should be positive assert nitrogen_charges[1].positive # Should be positive # Test pyrrolidine (CH2)4NH. The nitrogen here should be sp3 # hybridized, so is likely to pick up an extra proton to its nitrogen # at physiological pH. pyrrolidine_pdb = PDB() pyrrolidine_pdb_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdb") pyrrolidine_pdbqt_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdbqt") pyrrolidine_pdb.load_from_files(pyrrolidine_pdb_path, pyrrolidine_pdbqt_path) nitrogen_charges = pyrrolidine_pdb.identify_nitrogen_charges() assert len(nitrogen_charges) == 1 assert nitrogen_charges[0].positive # Should be positive def test_carbon_charges(self): """ TestPDB: Verify that carbon groups are charged correctly. """ # Guanidine is positively charged at physiological pH guanidine_pdb = PDB() guanidine_pdb_path = os.path.join(data_dir(), "guanidine_hyd.pdb") guanidine_pdbqt_path = os.path.join(data_dir(), "guanidine_hyd.pdbqt") guanidine_pdb.load_from_files( guanidine_pdb_path, guanidine_pdbqt_path) carbon_charges = guanidine_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert carbon_charges[0].positive # Should be positive # sulfaguanidine contains a guanidine group that is likely to be # positively protonated at physiological pH sulfaguanidine_pdb = PDB() sulfaguanidine_pdb_path = os.path.join(data_dir(), "sulfaguanidine_hyd.pdb") sulfaguanidine_pdbqt_path = os.path.join(data_dir(), "sulfaguanidine_hyd.pdbqt") sulfaguanidine_pdb.load_from_files( sulfaguanidine_pdb_path, sulfaguanidine_pdbqt_path) carbon_charges = sulfaguanidine_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert carbon_charges[0].positive # Should be positive # Formic acid is a carboxylic acid, which should be negatively charged. formic_acid_pdb = PDB() formic_acid_pdb_path = os.path.join(data_dir(), "formic_acid_hyd.pdb") formic_acid_pdbqt_path = os.path.join(data_dir(), "formic_acid_hyd.pdbqt") formic_acid_pdb.load_from_files( formic_acid_pdb_path, formic_acid_pdbqt_path) carbon_charges = formic_acid_pdb.identify_carbon_charges() assert len(carbon_charges) == 1 assert not carbon_charges[0].positive # Should be negatively charged. def test_phosphorus_charges(self): """ TestPDB: Verify that Phosphorus groups are charged correctly. """ # CID82671 contains a phosphate between two aromatic groups. phosphate_pdb = PDB() phosphate_pdb_path = os.path.join(data_dir(), "82671_hyd.pdb") phosphate_pdbqt_path = os.path.join(data_dir(), "82671_hyd.pdb") phosphate_pdb.load_from_files( phosphate_pdb_path, phosphate_pdbqt_path) phosphorus_charges = phosphate_pdb.identify_phosphorus_charges() assert len(phosphorus_charges) == 1 assert not phosphorus_charges[0].positive # Should be negatively charged. def test_sulfur_charges(self): """ TestPDB: Verify that sulfur groups are charged correctly. """ triflic_acid_pdb = PDB() triflic_acid_pdb_path = os.path.join(data_dir(), "triflic_acid_hyd.pdb") triflic_acid_pdbqt_path = os.path.join(data_dir(), "triflic_acid_hyd.pdbqt") triflic_acid_pdb.load_from_files( triflic_acid_pdb_path, triflic_acid_pdbqt_path) sulfur_charges = ( triflic_acid_pdb.identify_sulfur_charges()) assert len(sulfur_charges) == 1 assert not sulfur_charges[0].positive # Should be negatively charged. def test_ligand_assign_aromatics(self): """ TestPDB: Verify that aromatic rings in ligands are identified. """ benzene_pdb = PDB() benzene_pdb_path = os.path.join(data_dir(), "benzene_hyd.pdb") benzene_pdbqt_path = os.path.join(data_dir(), "benzene_hyd.pdbqt") benzene_pdb.load_from_files(benzene_pdb_path, benzene_pdbqt_path) # A benzene should have exactly one aromatic ring. print benzene_pdb.aromatic_rings assert len(benzene_pdb.aromatic_rings) == 1 # The first 6 atoms in the benzene pdb form the aromatic ring. assert (set(benzene_pdb.aromatic_rings[0].indices) == set([1,2,3,4,5,6])) def test_assign_secondary_structure(self): """ TestPDB: Verify that secondary structure is assigned meaningfully. """ # TODO(rbharath): This test is just a stub. Add a more realistic test # that checks that nontrivial secondary structure is computed correctly # here. self.prgr_pdb.assign_secondary_structure() def test_get_structure_dict(self): """ TestPDB: Verify that dict with rudimentary structure labels is generated. TODO(rbharath): This is just a stub. Add some nontrivial tests here. """ structures = self.prgr_pdb.get_structure_dict() print structures print len(structures)