def test_nitrogen_charges(self):
        """
    TestPDB: Verify that nitrogen groups are charged correctly.
    """
        # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
        # The labeling should pick up 2 charged nitrogen groups for two
        # ammoniums.
        ammonium_sulfate_pdb = PDB()
        ammonium_sulfate_pdb_path = os.path.join(data_dir(),
                                                 "ammonium_sulfate_hyd.pdb")
        ammonium_sulfate_pdbqt_path = os.path.join(
            data_dir(), "ammonium_sulfate_hyd.pdbqt")
        ammonium_sulfate_pdb.load_from_files(ammonium_sulfate_pdb_path,
                                             ammonium_sulfate_pdbqt_path)
        nitrogen_charges = ammonium_sulfate_pdb.identify_nitrogen_charges()
        assert len(nitrogen_charges) == 2
        assert nitrogen_charges[0].positive  # Should be positive
        assert nitrogen_charges[1].positive  # Should be positive

        # Test pyrrolidine (CH2)4NH. The nitrogen here should be sp3
        # hybridized, so is likely to pick up an extra proton to its nitrogen
        # at physiological pH.
        pyrrolidine_pdb = PDB()
        pyrrolidine_pdb_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdb")
        pyrrolidine_pdbqt_path = os.path.join(data_dir(),
                                              "pyrrolidine_hyd.pdbqt")
        pyrrolidine_pdb.load_from_files(pyrrolidine_pdb_path,
                                        pyrrolidine_pdbqt_path)
        nitrogen_charges = pyrrolidine_pdb.identify_nitrogen_charges()
        assert len(nitrogen_charges) == 1
        assert nitrogen_charges[0].positive  # Should be positive
Exemple #2
0
def featurize_dude(dude_dir, target, pickle_dir, num_jobs):
    """Featurize DUD-E docked poses and write features to pickle_out.
 
  Parameters
  ----------
  dude_dir: string
    Path to DUD-E directory
  target: string
    Name of DUD-E target.
  pickle_dir: string
    Path to directory to output pickles 
  """
    target_dir = os.path.join(dude_dir, target)
    actives_dir = os.path.join(target_dir, "actives")
    decoys_dir = os.path.join(target_dir, "decoys")
    actives = [a for a in os.listdir(actives_dir)]
    decoys = [a for a in os.listdir(decoys_dir)]
    receptor = os.path.join(target_dir, "receptor.pdb")
    pickle_out = os.path.join(target_dir, "out.pkl.gz")
    # Just for debugging purposes
    actives = actives[:1]

    num_per_job = int(math.ceil(len(actives) / float(num_jobs)))
    print "Number per job: %d" % num_per_job
    protein_pdb_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdb"
    protein_pdbqt_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdbqt"

    print "About to load protein from input files"
    protein_pdb_obj = PDB()
    protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path)

    binana = Binana()
    feature_len = binana.num_features()
    feature_vectors = {}
    for compound in actives:
        compound_name = compound.split(".")[0]
        compound_pdbqt = compound_name + "_hyd_out.pdbqt"
        compound_pdbqt = os.path.join(actives_dir, compound_pdbqt)

        # Convert the pdbqt to pdb
        pdbqt_to_pdb(compound_pdbqt, actives_dir)
        compound_pdb = compound_name + "_hyd_out.pdb"
        compound_pdb = os.path.join(actives_dir, compound_pdb)

        structures = MultiStructure()
        structures.load_from_files(compound_pdb, compound_pdbqt)

        vectors = []
        for key in sorted(structures.molecules.keys()):
            structure = structures.molecules[key]
            print "type(structure)"
            print type(structure)
            vectors.append(
                binana.compute_input_vector(structure, protein_pdb_obj))
        feature_vectors[compound_name] = vectors

    with gzip.open(pickle_out, "wb") as f:
        pickle.dump(feature_vectors, f)

    decoys = decoys[:1]
 def test_metallic_charges(self):
   """
   TestPDB: Verify that non-protein charges are assigned properly.
   """
   # Test metallic ion charge.
   magnesium_pdb = PDB()
   magnesium_atom = Atom(element="MG",
       coordinates=Point(coords=np.array([0,0,0])))
   magnesium_pdb.add_new_non_protein_atom(magnesium_atom)
   metallic_charges = magnesium_pdb.identify_metallic_charges()
   assert len(metallic_charges) == 1
 def test_metallic_charges(self):
     """
 TestPDB: Verify that non-protein charges are assigned properly.
 """
     # Test metallic ion charge.
     magnesium_pdb = PDB()
     magnesium_atom = Atom(element="MG",
                           coordinates=Point(coords=np.array([0, 0, 0])))
     magnesium_pdb.add_new_non_protein_atom(magnesium_atom)
     metallic_charges = magnesium_pdb.identify_metallic_charges()
     assert len(metallic_charges) == 1
 def test_assign_ligand_aromatics(self):
   """
   TestPDB: Test that non-protein aromatic rings are assigned correctly.
   """
   ### 3ao4 comes from PDBBind-CN and contains some cruft in the PDB file:
   ### atoms without residues labelled. This triggered some problems with
   ### non-protein aromatics complaining.
   # TODO(rbharath): Add a stub here.
   _3ao4_protein = PDB()
   _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb")
   _3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt")
   _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt)
 def test_assign_ligand_aromatics(self):
     """
 TestPDB: Test that non-protein aromatic rings are assigned correctly.
 """
     ### 3ao4 comes from PDBBind-CN and contains some cruft in the PDB file:
     ### atoms without residues labelled. This triggered some problems with
     ### non-protein aromatics complaining.
     # TODO(rbharath): Add a stub here.
     _3ao4_protein = PDB()
     _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb")
     _3ao4_protein_pdbqt = os.path.join(data_dir(),
                                        "3ao4_protein_hyd.pdbqt")
     _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt)
 def test_phosphorus_charges(self):
     """
 TestPDB: Verify that Phosphorus groups are charged correctly.
 """
     # CID82671 contains a phosphate between two aromatic groups.
     phosphate_pdb = PDB()
     phosphate_pdb_path = os.path.join(data_dir(), "82671_hyd.pdb")
     phosphate_pdbqt_path = os.path.join(data_dir(), "82671_hyd.pdb")
     phosphate_pdb.load_from_files(phosphate_pdb_path, phosphate_pdbqt_path)
     phosphorus_charges = phosphate_pdb.identify_phosphorus_charges()
     assert len(phosphorus_charges) == 1
     assert not phosphorus_charges[
         0].positive  # Should be negatively charged.
  def test_nitrogen_charges(self):
    """
    TestPDB: Verify that nitrogen groups are charged correctly.
    """
    # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
    # The labeling should pick up 2 charged nitrogen groups for two
    # ammoniums.
    ammonium_sulfate_pdb = PDB()
    ammonium_sulfate_pdb_path = os.path.join(data_dir(),
        "ammonium_sulfate_hyd.pdb")
    ammonium_sulfate_pdbqt_path = os.path.join(data_dir(),
        "ammonium_sulfate_hyd.pdbqt")
    ammonium_sulfate_pdb.load_from_files(
        ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path)
    nitrogen_charges = ammonium_sulfate_pdb.identify_nitrogen_charges()
    assert len(nitrogen_charges) == 2
    assert nitrogen_charges[0].positive  # Should be positive
    assert nitrogen_charges[1].positive  # Should be positive

    # Test pyrrolidine (CH2)4NH. The nitrogen here should be sp3
    # hybridized, so is likely to pick up an extra proton to its nitrogen
    # at physiological pH.
    pyrrolidine_pdb = PDB()
    pyrrolidine_pdb_path = os.path.join(data_dir(),
        "pyrrolidine_hyd.pdb")
    pyrrolidine_pdbqt_path = os.path.join(data_dir(),
        "pyrrolidine_hyd.pdbqt")
    pyrrolidine_pdb.load_from_files(pyrrolidine_pdb_path,
        pyrrolidine_pdbqt_path)
    nitrogen_charges = pyrrolidine_pdb.identify_nitrogen_charges()
    assert len(nitrogen_charges) == 1
    assert nitrogen_charges[0].positive  # Should be positive
    def test_load_bonds_from_pdb(self):
        """
    TestPDB: Verifies that bonds can be loaded from PDB.
    """
        pdb = PDB()
        # Test that we can load CO2
        carbon_atom = Atom(element="C")
        oxygen_atom_1 = Atom(element="O")
        oxygen_atom_2 = Atom(element="O")

        pdb.add_new_atom(carbon_atom)
        pdb.add_new_atom(oxygen_atom_1)
        pdb.add_new_atom(oxygen_atom_2)
        lines = [
            "CONECT    1    2    3                                                 "
            "CONECT    2                                                           "
            "CONECT    3                                                           "
        ]
        with tempfile.NamedTemporaryFile() as temp:
            temp.write("\n".join(lines))
            temp.flush()
            pdb.load_bonds_from_pdb(temp.name)
        assert len(carbon_atom.indices_of_atoms_connecting) == 2
        assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0
        assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0
 def test_sulfur_charges(self):
     """
 TestPDB: Verify that sulfur groups are charged correctly.
 """
     triflic_acid_pdb = PDB()
     triflic_acid_pdb_path = os.path.join(data_dir(),
                                          "triflic_acid_hyd.pdb")
     triflic_acid_pdbqt_path = os.path.join(data_dir(),
                                            "triflic_acid_hyd.pdbqt")
     triflic_acid_pdb.load_from_files(triflic_acid_pdb_path,
                                      triflic_acid_pdbqt_path)
     sulfur_charges = (triflic_acid_pdb.identify_sulfur_charges())
     assert len(sulfur_charges) == 1
     assert not sulfur_charges[0].positive  # Should be negatively charged.
Exemple #11
0
 def test_phosphorus_charges(self):
   """
   TestPDB: Verify that Phosphorus groups are charged correctly.
   """
   # CID82671 contains a phosphate between two aromatic groups.
   phosphate_pdb = PDB()
   phosphate_pdb_path = os.path.join(data_dir(),
     "82671_hyd.pdb")
   phosphate_pdbqt_path = os.path.join(data_dir(),
     "82671_hyd.pdb")
   phosphate_pdb.load_from_files(
       phosphate_pdb_path, phosphate_pdbqt_path)
   phosphorus_charges = phosphate_pdb.identify_phosphorus_charges()
   assert len(phosphorus_charges) == 1
   assert not phosphorus_charges[0].positive  # Should be negatively charged.
Exemple #12
0
  def test_ligand_assign_aromatics(self):
    """
    TestPDB: Verify that aromatic rings in ligands are identified.
    """
    benzene_pdb = PDB()
    benzene_pdb_path = os.path.join(data_dir(), "benzene_hyd.pdb")
    benzene_pdbqt_path = os.path.join(data_dir(), "benzene_hyd.pdbqt")
    benzene_pdb.load_from_files(benzene_pdb_path, benzene_pdbqt_path)

    # A benzene should have exactly one aromatic ring.
    print benzene_pdb.aromatic_rings
    assert len(benzene_pdb.aromatic_rings) == 1
    # The first 6 atoms in the benzene pdb form the aromatic ring.
    assert (set(benzene_pdb.aromatic_rings[0].indices)
         == set([1,2,3,4,5,6]))
    def test_ligand_assign_aromatics(self):
        """
    TestPDB: Verify that aromatic rings in ligands are identified.
    """
        benzene_pdb = PDB()
        benzene_pdb_path = os.path.join(data_dir(), "benzene_hyd.pdb")
        benzene_pdbqt_path = os.path.join(data_dir(), "benzene_hyd.pdbqt")
        benzene_pdb.load_from_files(benzene_pdb_path, benzene_pdbqt_path)

        # A benzene should have exactly one aromatic ring.
        print benzene_pdb.aromatic_rings
        assert len(benzene_pdb.aromatic_rings) == 1
        # The first 6 atoms in the benzene pdb form the aromatic ring.
        assert (set(benzene_pdb.aromatic_rings[0].indices) == set(
            [1, 2, 3, 4, 5, 6]))
Exemple #14
0
 def test_sulfur_charges(self):
   """
   TestPDB: Verify that sulfur groups are charged correctly.
   """
   triflic_acid_pdb = PDB()
   triflic_acid_pdb_path = os.path.join(data_dir(),
     "triflic_acid_hyd.pdb")
   triflic_acid_pdbqt_path = os.path.join(data_dir(),
     "triflic_acid_hyd.pdbqt")
   triflic_acid_pdb.load_from_files(
     triflic_acid_pdb_path,
     triflic_acid_pdbqt_path)
   sulfur_charges = (
       triflic_acid_pdb.identify_sulfur_charges())
   assert len(sulfur_charges) == 1
   assert not sulfur_charges[0].positive  # Should be negatively charged.
Exemple #15
0
  def test_load_bonds_from_pdb(self):
    """
    TestPDB: Verifies that bonds can be loaded from PDB.
    """
    pdb = PDB()
    # Test that we can load CO2
    carbon_atom = Atom(element="C")
    oxygen_atom_1 = Atom(element="O")
    oxygen_atom_2 = Atom(element="O")

    pdb.add_new_atom(carbon_atom)
    pdb.add_new_atom(oxygen_atom_1)
    pdb.add_new_atom(oxygen_atom_2)
    lines = [
      "CONECT    1    2    3                                                 "
      "CONECT    2                                                           "
      "CONECT    3                                                           "
    ]
    with tempfile.NamedTemporaryFile() as temp:
      temp.write("\n".join(lines))
      temp.flush()
      pdb.load_bonds_from_pdb(temp.name)
    assert len(carbon_atom.indices_of_atoms_connecting) == 2
    assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0
    assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0
Exemple #16
0
    def compute_input_vector_from_files(self, ligand_pdb_filename,
                                        receptor_pdb_filename, line_header):
        """Computes feature vector for ligand-receptor pair.

    Parameters
    ----------
    ligand_pdb_filename: string
      path to ligand's pdb file.
    receptor_pdb_filename: string
      path to receptor pdb file.
    line_header: string
      line separator in PDB files
    """
        # Load receptor and ligand from file.
        receptor = PDB()
        receptor.load_from_files(receptor_pdb_filename, line_header)
        receptor.assign_secondary_structure()
        ligand = PDB()
        ligand.load_from_files(ligand_pdb_filename, line_header)
        self.compute_input_vector(ligand, receptor)
Exemple #17
0
  def setUp(self):
    """
    Instantiate a dummy PDB file.
    """
    self.temp_dir = tempfile.mkdtemp()
    self.pdb = PDB()

    _, self.pdb_filename = tempfile.mkstemp(suffix=".pdb",
        dir=self.temp_dir)

    self.prgr_pdb = PDB()
    prgr_pdb_path = os.path.join(data_dir(), "prgr_hyd.pdb")
    prgr_pdbqt_path = os.path.join(data_dir(), "prgr_hyd.pdbqt")
    self.prgr_pdb.load_from_files(prgr_pdb_path, prgr_pdbqt_path)

    self._1r5y_protein = PDB()
    _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb")
    _1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt")
    self._1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt)

    self.proteins = [("prgr", self.prgr_pdb), ("1r5y", self._1r5y_protein)]
 def test_assign_non_protein_charges(self):
     """
 TestPDB: Verify that charges are properly added to ligands.
 """
     # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
     # There should be 3 charged groups, two positive, one negative
     ammonium_sulfate_pdb = PDB()
     ammonium_sulfate_pdb_path = os.path.join(data_dir(),
                                              "ammonium_sulfate_hyd.pdb")
     ammonium_sulfate_pdbqt_path = os.path.join(
         data_dir(), "ammonium_sulfate_hyd.pdbqt")
     # Notice that load automatically identifies non-protein charges.
     ammonium_sulfate_pdb.load_from_files(ammonium_sulfate_pdb_path,
                                          ammonium_sulfate_pdbqt_path)
     assert len(ammonium_sulfate_pdb.charges) == 3
     num_pos, num_neg = 0, 0
     for charge in ammonium_sulfate_pdb.charges:
         if charge.positive:
             num_pos += 1
         else:
             num_neg += 1
     assert num_pos == 2
     assert num_neg == 1
Exemple #19
0
 def test_assign_non_protein_charges(self):
   """
   TestPDB: Verify that charges are properly added to ligands.
   """
   # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
   # There should be 3 charged groups, two positive, one negative
   ammonium_sulfate_pdb = PDB()
   ammonium_sulfate_pdb_path = os.path.join(data_dir(),
       "ammonium_sulfate_hyd.pdb")
   ammonium_sulfate_pdbqt_path = os.path.join(data_dir(),
       "ammonium_sulfate_hyd.pdbqt")
   # Notice that load automatically identifies non-protein charges.
   ammonium_sulfate_pdb.load_from_files(
       ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path)
   assert len(ammonium_sulfate_pdb.charges) == 3
   num_pos, num_neg = 0, 0
   for charge in ammonium_sulfate_pdb.charges:
     if charge.positive:
       num_pos += 1
     else:
       num_neg += 1
   assert num_pos == 2
   assert num_neg == 1
Exemple #20
0
  def compute_input_vector_from_files(self, ligand_pdb_filename,
      receptor_pdb_filename, line_header):
    """Computes feature vector for ligand-receptor pair.

    Parameters
    ----------
    ligand_pdb_filename: string
      path to ligand's pdb file.
    receptor_pdb_filename: string
      path to receptor pdb file.
    line_header: string
      line separator in PDB files
    """
    # Load receptor and ligand from file.
    receptor = PDB()
    receptor.load_from_files(receptor_pdb_filename, line_header)
    receptor.assign_secondary_structure()
    ligand = PDB()
    ligand.load_from_files(ligand_pdb_filename, line_header)
    self.compute_input_vector(ligand, receptor)
Exemple #21
0
  def _featurize_complex(self, mol_pdb, protein_pdb):
    """
    Compute Binana fingerprint for complex.
    """
    ### OPEN TEMPDIR
    tempdir = tempfile.mkdtemp()

    mol_pdb_file = os.path.join(tempdir, "mol.pdb")
    with open(mol_pdb_file, "w") as mol_f:
      mol_f.writelines(mol_pdb)
    protein_pdb_file = os.path.join(tempdir, "protein.pdb")
    with open(protein_pdb_file, "w") as protein_f:
      protein_f.writelines(protein_pdb)

    mol_hyd_file = os.path.join(tempdir, "mol_hyd.pdb")
    mol_pdbqt_file = os.path.join(tempdir, "mol_hyd.pdbqt")
    hydrogenate_and_compute_partial_charges(
        mol_pdb_file, "pdb", tempdir, mol_hyd_file, mol_pdbqt_file)

    protein_hyd_file = os.path.join(tempdir, "protein_hyd.pdb")
    protein_pdbqt_file = os.path.join(tempdir, "protein_hyd.pdbqt")
    hydrogenate_and_compute_partial_charges(
        protein_pdb_file, "pdb", tempdir, protein_hyd_file, protein_pdbqt_file)

    mol_pdb_obj = PDB()
    mol_pdb_obj.load_from_files(mol_pdb_file, mol_pdbqt_file)

    protein_pdb_obj = PDB()
    protein_pdb_obj.load_from_files(protein_pdb_file, protein_pdbqt_file)

    features = self.binana.compute_input_vector(mol_pdb_obj, protein_pdb_obj)

    ### CLOSE TEMPDIR
    shutil.rmtree(tempdir)

    return features
    def setUp(self):
        """
    Instantiate a dummy PDB file.
    """
        self.temp_dir = tempfile.mkdtemp()
        self.pdb = PDB()

        _, self.pdb_filename = tempfile.mkstemp(suffix=".pdb",
                                                dir=self.temp_dir)

        self.prgr_pdb = PDB()
        prgr_pdb_path = os.path.join(data_dir(), "prgr_hyd.pdb")
        prgr_pdbqt_path = os.path.join(data_dir(), "prgr_hyd.pdbqt")
        self.prgr_pdb.load_from_files(prgr_pdb_path, prgr_pdbqt_path)

        self._1r5y_protein = PDB()
        _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb")
        _1r5y_protein_pdbqt = os.path.join(data_dir(),
                                           "1r5y_protein_hyd.pdbqt")
        self._1r5y_protein.load_from_files(_1r5y_protein_pdb,
                                           _1r5y_protein_pdbqt)

        self.proteins = [("prgr", self.prgr_pdb), ("1r5y", self._1r5y_protein)]
def featurize_pdbbind(pdbbind_dir, pickle_out):
  """Featurize all entries in pdbbind_dir and write features to pickle_out

  pdbbind_dir should be a dir, with K subdirs, one for each protein-ligand
  complex to be featurized. The ligand and receptor should each have a pdb
  and pdbqt file. The ligand files should end in '_ligand_hyd.${FILETYPE}'
  while the receptor files should end in '_protein_hyd.${FILETYPE}'

  pdbbind_dir: string
    Path to pdbbind directory.
  pickle_out: string
    Path to write pickle output.
  """
  assert os.path.isdir(pdbbind_dir)
  # Instantiate copy of binana vector
  binana = Binana()
  feature_vectors = {}

  # Extract the subdirectories in pdbbind_dir
  subdirs = [d for d in os.listdir(pdbbind_dir) if
      os.path.isdir(os.path.join(pdbbind_dir, d))]
  # TODO(rbharath): ONLY FOR DEBUGGING!
  subdirs = subdirs[900:]

  num_atoms = len(Binana.atom_types)
  # See features/tests/nnscore_test.py:TestBinana.testComputeInputVector
  # for derivation.
  feature_len = (3*num_atoms*(num_atoms+1)/2 + num_atoms + 12 + 6 + 3 + 6 +
      3 + 6 + 3 + 1)
  for count, d in enumerate(subdirs):
    print "\nprocessing %d-th pdb %s" % (count, d)
    subdir = os.path.join(pdbbind_dir, d)

    print "About to extract ligand and protein input files"
    ligand_pdb, ligand_pdbqt = None, None
    protein_pdb, protein_pdbqt = None, None
    for f in os.listdir(subdir):
      if re.search("_ligand_hyd.pdb$", f):
        ligand_pdb = f
      elif re.search("_ligand_hyd.pdbqt$", f):
        ligand_pdbqt = f
      elif re.search("_protein_hyd.pdb$", f):
        protein_pdb = f
      elif re.search("_protein_hyd.pdbqt$", f):
        protein_pdbqt = f

    print "Extracted Input Files:"
    print (ligand_pdb, ligand_pdbqt, protein_pdb, protein_pdbqt)
    if (not ligand_pdb or not ligand_pdbqt or not protein_pdb or not
        protein_pdbqt):
        raise ValueError("Required files not present for %s" % d)
    ligand_pdb_path = os.path.join(subdir, ligand_pdb)
    ligand_pdbqt_path = os.path.join(subdir, ligand_pdbqt)
    protein_pdb_path = os.path.join(subdir, protein_pdb)
    protein_pdbqt_path = os.path.join(subdir, protein_pdbqt)

    print "About to load ligand from input files"
    ligand_pdb_obj = PDB()
    ligand_pdb_obj.load_from_files(ligand_pdb_path, ligand_pdbqt_path)

    print "About to load protein from input files"
    protein_pdb_obj = PDB()
    protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path)

    print "About to generate feature vector."
    vector = binana.compute_input_vector(ligand_pdb_obj,
        protein_pdb_obj)
    feature_vectors[d] = vector
    if len(vector) != feature_len:
      raise ValueError("Feature length incorrect on %s" % d)
    print "Feature vector generated correctly."

  with open(pickle_out, "wb") as f:
    pickle.dump(feature_vectors, f)
Exemple #24
0
  def setUp(self):
    """
    Instantiate local copy of Binana object.
    """
    self.binana = Binana()

    ### 3zp9 comes from PDBBind-CN
    _3zp9_protein = PDB()
    _3zp9_protein_pdb = os.path.join(data_dir(), "3zp9_protein_hyd.pdb")
    _3zp9_protein_pdbqt = os.path.join(data_dir(), "3zp9_protein_hyd.pdbqt")
    _3zp9_protein.load_from_files(_3zp9_protein_pdb, _3zp9_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _3zp9_ligand = PDB()
    _3zp9_ligand_pdb = os.path.join(data_dir(), "3zp9_ligand_hyd.pdb")
    _3zp9_ligand_pdbqt = os.path.join(data_dir(), "3zp9_ligand_hyd.pdbqt")
    _3zp9_ligand.load_from_files(_3zp9_ligand_pdb, _3zp9_ligand_pdbqt)

    ### 3bwf comes from PDBBind-CN
    _3bwf_protein = PDB()
    _3bwf_protein_pdb = os.path.join(data_dir(), "3bwf_protein_hyd.pdb")
    _3bwf_protein_pdbqt = os.path.join(data_dir(), "3bwf_protein_hyd.pdbqt")
    _3bwf_protein.load_from_files(_3bwf_protein_pdb, _3bwf_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _3bwf_ligand = PDB()
    _3bwf_ligand_pdb = os.path.join(data_dir(), "3bwf_ligand_hyd.pdb")
    _3bwf_ligand_pdbqt = os.path.join(data_dir(), "3bwf_ligand_hyd.pdbqt")
    _3bwf_ligand.load_from_files(_3bwf_ligand_pdb, _3bwf_ligand_pdbqt)

    self.test_cases = [("3bwf", _3bwf_protein, _3bwf_ligand),
                       ("3zp9", _3zp9_protein, _3zp9_ligand)]
Exemple #25
0
  def setUp(self):
    """
    Instantiate local copy of Binana object.
    """
    self.binana = Binana()

    # TODO(rbharath): This load sequence is getting unwieldy. Refactor.

    #### PRGR is from the DUD-E collection
    #prgr_receptor = PDB()
    #prgr_pdb = os.path.join(data_dir(), "prgr_hyd.pdb")
    #prgr_pdbqt = os.path.join(data_dir(), "prgr_hyd.pdbqt")
    #prgr_receptor.load_from_files(prgr_pdb, prgr_pdbqt)
    ## This compound is CHEMBL1164248
    #prgr_active = PDB()
    #prgr_active_pdb = os.path.join(data_dir(), "prgr_active0_hyd.pdb")
    #prgr_active_pdbqt = os.path.join(data_dir(), "prgr_active0_hyd.pdbqt")
    #prgr_active.load_from_files(prgr_active_pdb, prgr_active_pdbqt)

    #### c-Abl is taken from the Autodock Vina examples
    #cabl_receptor = PDB()
    #cabl_receptor_pdb = os.path.join(data_dir(), "c-Abl_hyd.pdb")
    #cabl_receptor_pdbqt = os.path.join(data_dir(), "c-Abl_hyd.pdbqt")
    #cabl_receptor.load_from_files(cabl_receptor_pdb,
    #    cabl_receptor_pdbqt)
    ## This compound is imatinib
    #cabl_active = PDB()
    #cabl_active_pdb = os.path.join(data_dir(), "imatinib_hyd.pdb")
    #cabl_active_pdbqt = os.path.join(data_dir(), "imatinib_hyd.pdbqt")
    #cabl_active.load_from_files(cabl_active_pdb,
    #    cabl_active_pdbqt)

    #### 1zea comes from PDBBind-CN
    ## Python complains about variables starting with numbers, so put an
    ## underscore in front of everything.
    #_1zea_protein = PDB()
    #_1zea_protein_pdb = os.path.join(data_dir(), "1zea_protein_hyd.pdb")
    #_1zea_protein_pdbqt = os.path.join(data_dir(), "1zea_protein_hyd.pdbqt")
    #_1zea_protein.load_from_files(_1zea_protein_pdb, _1zea_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_1zea_ligand = PDB()
    #_1zea_ligand_pdb = os.path.join(data_dir(), "1zea_ligand_hyd.pdb")
    #_1zea_ligand_pdbqt = os.path.join(data_dir(), "1zea_ligand_hyd.pdbqt")
    #_1zea_ligand.load_from_files(_1zea_ligand_pdb, _1zea_ligand_pdbqt)

    #### 1r5y comes from PDBBind-CN
    #_1r5y_protein = PDB()
    #_1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb")
    #_1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt")
    #_1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_1r5y_ligand = PDB()
    #_1r5y_ligand_pdb = os.path.join(data_dir(), "1r5y_ligand_hyd.pdb")
    #_1r5y_ligand_pdbqt = os.path.join(data_dir(), "1r5y_ligand_hyd.pdbqt")
    #_1r5y_ligand.load_from_files(_1r5y_ligand_pdb, _1r5y_ligand_pdbqt)

    #### 3ao4 comes from PDBBind-CN
    #_3ao4_protein = PDB()
    #_3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb")
    #_3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt")
    #_3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_3ao4_ligand = PDB()
    #_3ao4_ligand_pdb = os.path.join(data_dir(), "3ao4_ligand_hyd.pdb")
    #_3ao4_ligand_pdbqt = os.path.join(data_dir(), "3ao4_ligand_hyd.pdbqt")
    #_3ao4_ligand.load_from_files(_3ao4_ligand_pdb, _3ao4_ligand_pdbqt)

    #### 2jdm comes from PDBBind-CN
    #_2jdm_protein = PDB()
    #_2jdm_protein_pdb = os.path.join(data_dir(), "2jdm_protein_hyd.pdb")
    #_2jdm_protein_pdbqt = os.path.join(data_dir(), "2jdm_protein_hyd.pdbqt")
    #_2jdm_protein.load_from_files(_2jdm_protein_pdb, _2jdm_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_2jdm_ligand = PDB()
    #_2jdm_ligand_pdb = os.path.join(data_dir(), "2jdm_ligand_hyd.pdb")
    #_2jdm_ligand_pdbqt = os.path.join(data_dir(), "2jdm_ligand_hyd.pdbqt")
    #_2jdm_ligand.load_from_files(_2jdm_ligand_pdb, _2jdm_ligand_pdbqt)

    #### 3g5k comes from PDBBind-CN
    #_3g5k_protein = PDB()
    #_3g5k_protein_pdb = os.path.join(data_dir(), "3g5k_protein_hyd.pdb")
    #_3g5k_protein_pdbqt = os.path.join(data_dir(), "3g5k_protein_hyd.pdbqt")
    #_3g5k_protein.load_from_files(_3g5k_protein_pdb, _3g5k_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_3g5k_ligand = PDB()
    #_3g5k_ligand_pdb = os.path.join(data_dir(), "3g5k_ligand_hyd.pdb")
    #_3g5k_ligand_pdbqt = os.path.join(data_dir(), "3g5k_ligand_hyd.pdbqt")
    #_3g5k_ligand.load_from_files(_3g5k_ligand_pdb, _3g5k_ligand_pdbqt)

    #### 3str comes from PDBBind-CN
    #_3str_protein = PDB()
    #_3str_protein_pdb = os.path.join(data_dir(), "3str_protein_hyd.pdb")
    #_3str_protein_pdbqt = os.path.join(data_dir(), "3str_protein_hyd.pdbqt")
    #_3str_protein.load_from_files(_3str_protein_pdb, _3str_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_3str_ligand = PDB()
    #_3str_ligand_pdb = os.path.join(data_dir(), "3str_ligand_hyd.pdb")
    #_3str_ligand_pdbqt = os.path.join(data_dir(), "3str_ligand_hyd.pdbqt")
    #_3str_ligand.load_from_files(_3str_ligand_pdb, _3str_ligand_pdbqt)

    #### 1nu3 comes from PDBBind-CN
    #_1nu3_protein = PDB()
    #_1nu3_protein_pdb = os.path.join(data_dir(), "1nu3_protein_hyd.pdb")
    #_1nu3_protein_pdbqt = os.path.join(data_dir(), "1nu3_protein_hyd.pdbqt")
    #_1nu3_protein.load_from_files(_1nu3_protein_pdb, _1nu3_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_1nu3_ligand = PDB()
    #_1nu3_ligand_pdb = os.path.join(data_dir(), "1nu3_ligand_hyd.pdb")
    #_1nu3_ligand_pdbqt = os.path.join(data_dir(), "1nu3_ligand_hyd.pdbqt")
    #_1nu3_ligand.load_from_files(_1nu3_ligand_pdb, _1nu3_ligand_pdbqt)

    #### 2rio comes from PDBBind-CN
    #_2rio_protein = PDB()
    #_2rio_protein_pdb = os.path.join(data_dir(), "2rio_protein_hyd.pdb")
    #_2rio_protein_pdbqt = os.path.join(data_dir(), "2rio_protein_hyd.pdbqt")
    #_2rio_protein.load_from_files(_2rio_protein_pdb, _2rio_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_2rio_ligand = PDB()
    #_2rio_ligand_pdb = os.path.join(data_dir(), "2rio_ligand_hyd.pdb")
    #_2rio_ligand_pdbqt = os.path.join(data_dir(), "2rio_ligand_hyd.pdbqt")
    #_2rio_ligand.load_from_files(_2rio_ligand_pdb, _2rio_ligand_pdbqt)

    #### 2y2h comes from PDBBind-CN
    #_2y2h_protein = PDB()
    #_2y2h_protein_pdb = os.path.join(data_dir(), "2y2h_protein_hyd.pdb")
    #_2y2h_protein_pdbqt = os.path.join(data_dir(), "2y2h_protein_hyd.pdbqt")
    #_2y2h_protein.load_from_files(_2y2h_protein_pdb, _2y2h_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_2y2h_ligand = PDB()
    #_2y2h_ligand_pdb = os.path.join(data_dir(), "2y2h_ligand_hyd.pdb")
    #_2y2h_ligand_pdbqt = os.path.join(data_dir(), "2y2h_ligand_hyd.pdbqt")
    #_2y2h_ligand.load_from_files(_2y2h_ligand_pdb, _2y2h_ligand_pdbqt)

    #### 1pi5 comes from PDBBind-CN
    #_1pi5_protein = PDB()
    #_1pi5_protein_pdb = os.path.join(data_dir(), "1pi5_protein_hyd.pdb")
    #_1pi5_protein_pdbqt = os.path.join(data_dir(), "1pi5_protein_hyd.pdbqt")
    #_1pi5_protein.load_from_files(_1pi5_protein_pdb, _1pi5_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_1pi5_ligand = PDB()
    #_1pi5_ligand_pdb = os.path.join(data_dir(), "1pi5_ligand_hyd.pdb")
    #_1pi5_ligand_pdbqt = os.path.join(data_dir(), "1pi5_ligand_hyd.pdbqt")
    #_1pi5_ligand.load_from_files(_1pi5_ligand_pdb, _1pi5_ligand_pdbqt)

    #### 3fxz comes from PDBBind-CN
    #_3fxz_protein = PDB()
    #_3fxz_protein_pdb = os.path.join(data_dir(), "3fxz_protein_hyd.pdb")
    #_3fxz_protein_pdbqt = os.path.join(data_dir(), "3fxz_protein_hyd.pdbqt")
    #_3fxz_protein.load_from_files(_3fxz_protein_pdb, _3fxz_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_3fxz_ligand = PDB()
    #_3fxz_ligand_pdb = os.path.join(data_dir(), "3fxz_ligand_hyd.pdb")
    #_3fxz_ligand_pdbqt = os.path.join(data_dir(), "3fxz_ligand_hyd.pdbqt")
    #_3fxz_ligand.load_from_files(_3fxz_ligand_pdb, _3fxz_ligand_pdbqt)

    #### 4i60 comes from PDBBind-CN
    #_4i60_protein = PDB()
    #_4i60_protein_pdb = os.path.join(data_dir(), "4i60_protein_hyd.pdb")
    #_4i60_protein_pdbqt = os.path.join(data_dir(), "4i60_protein_hyd.pdbqt")
    #_4i60_protein.load_from_files(_4i60_protein_pdb, _4i60_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_4i60_ligand = PDB()
    #_4i60_ligand_pdb = os.path.join(data_dir(), "4i60_ligand_hyd.pdb")
    #_4i60_ligand_pdbqt = os.path.join(data_dir(), "4i60_ligand_hyd.pdbqt")
    #_4i60_ligand.load_from_files(_4i60_ligand_pdb, _4i60_ligand_pdbqt)

    #### 1hyv comes from PDBBind-CN
    #_1hyv_protein = PDB()
    #_1hyv_protein_pdb = os.path.join(data_dir(), "1hyv_protein_hyd.pdb")
    #_1hyv_protein_pdbqt = os.path.join(data_dir(), "1hyv_protein_hyd.pdbqt")
    #_1hyv_protein.load_from_files(_1hyv_protein_pdb, _1hyv_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_1hyv_ligand = PDB()
    #_1hyv_ligand_pdb = os.path.join(data_dir(), "1hyv_ligand_hyd.pdb")
    #_1hyv_ligand_pdbqt = os.path.join(data_dir(), "1hyv_ligand_hyd.pdbqt")
    #_1hyv_ligand.load_from_files(_1hyv_ligand_pdb, _1hyv_ligand_pdbqt)

    #### 3m1j comes from PDBBind-CN
    #_3m1j_protein = PDB()
    #_3m1j_protein_pdb = os.path.join(data_dir(), "3m1j_protein_hyd.pdb")
    #_3m1j_protein_pdbqt = os.path.join(data_dir(), "3m1j_protein_hyd.pdbqt")
    #_3m1j_protein.load_from_files(_3m1j_protein_pdb, _3m1j_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_3m1j_ligand = PDB()
    #_3m1j_ligand_pdb = os.path.join(data_dir(), "3m1j_ligand_hyd.pdb")
    #_3m1j_ligand_pdbqt = os.path.join(data_dir(), "3m1j_ligand_hyd.pdbqt")
    #_3m1j_ligand.load_from_files(_3m1j_ligand_pdb, _3m1j_ligand_pdbqt)

    #### 1y3g comes from PDBBind-CN
    #_1y3g_protein = PDB()
    #_1y3g_protein_pdb = os.path.join(data_dir(), "1y3g_protein_hyd.pdb")
    #_1y3g_protein_pdbqt = os.path.join(data_dir(), "1y3g_protein_hyd.pdbqt")
    #_1y3g_protein.load_from_files(_1y3g_protein_pdb, _1y3g_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_1y3g_ligand = PDB()
    #_1y3g_ligand_pdb = os.path.join(data_dir(), "1y3g_ligand_hyd.pdb")
    #_1y3g_ligand_pdbqt = os.path.join(data_dir(), "1y3g_ligand_hyd.pdbqt")
    #_1y3g_ligand.load_from_files(_1y3g_ligand_pdb, _1y3g_ligand_pdbqt)

    #### 6rsa comes from PDBBind-CN
    #_6rsa_protein = PDB()
    #_6rsa_protein_pdb = os.path.join(data_dir(), "6rsa_protein_hyd.pdb")
    #_6rsa_protein_pdbqt = os.path.join(data_dir(), "6rsa_protein_hyd.pdbqt")
    #_6rsa_protein.load_from_files(_6rsa_protein_pdb, _6rsa_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_6rsa_ligand = PDB()
    #_6rsa_ligand_pdb = os.path.join(data_dir(), "6rsa_ligand_hyd.pdb")
    #_6rsa_ligand_pdbqt = os.path.join(data_dir(), "6rsa_ligand_hyd.pdbqt")
    #_6rsa_ligand.load_from_files(_6rsa_ligand_pdb, _6rsa_ligand_pdbqt)

    #### 1lvk comes from PDBBind-CN
    #_1lvk_protein = PDB()
    #_1lvk_protein_pdb = os.path.join(data_dir(), "1lvk_protein_hyd.pdb")
    #_1lvk_protein_pdbqt = os.path.join(data_dir(), "1lvk_protein_hyd.pdbqt")
    #_1lvk_protein.load_from_files(_1lvk_protein_pdb, _1lvk_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_1lvk_ligand = PDB()
    #_1lvk_ligand_pdb = os.path.join(data_dir(), "1lvk_ligand_hyd.pdb")
    #_1lvk_ligand_pdbqt = os.path.join(data_dir(), "1lvk_ligand_hyd.pdbqt")
    #_1lvk_ligand.load_from_files(_1lvk_ligand_pdb, _1lvk_ligand_pdbqt)

    #### 3rj7 comes from PDBBind-CN
    #_3rj7_protein = PDB()
    #_3rj7_protein_pdb = os.path.join(data_dir(), "3rj7_protein_hyd.pdb")
    #_3rj7_protein_pdbqt = os.path.join(data_dir(), "3rj7_protein_hyd.pdbqt")
    #_3rj7_protein.load_from_files(_3rj7_protein_pdb, _3rj7_protein_pdbqt)
    ## The ligand is also specified by pdbbind
    #_3rj7_ligand = PDB()
    #_3rj7_ligand_pdb = os.path.join(data_dir(), "3rj7_ligand_hyd.pdb")
    #_3rj7_ligand_pdbqt = os.path.join(data_dir(), "3rj7_ligand_hyd.pdbqt")
    #_3rj7_ligand.load_from_files(_3rj7_ligand_pdb, _3rj7_ligand_pdbqt)

    ### 3zp9 comes from PDBBind-CN
    _3zp9_protein = PDB()
    _3zp9_protein_pdb = os.path.join(data_dir(), "3zp9_protein_hyd.pdb")
    _3zp9_protein_pdbqt = os.path.join(data_dir(), "3zp9_protein_hyd.pdbqt")
    _3zp9_protein.load_from_files(_3zp9_protein_pdb, _3zp9_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _3zp9_ligand = PDB()
    _3zp9_ligand_pdb = os.path.join(data_dir(), "3zp9_ligand_hyd.pdb")
    _3zp9_ligand_pdbqt = os.path.join(data_dir(), "3zp9_ligand_hyd.pdbqt")
    _3zp9_ligand.load_from_files(_3zp9_ligand_pdb, _3zp9_ligand_pdbqt)

    ### 3bwf comes from PDBBind-CN
    _3bwf_protein = PDB()
    _3bwf_protein_pdb = os.path.join(data_dir(), "3bwf_protein_hyd.pdb")
    _3bwf_protein_pdbqt = os.path.join(data_dir(), "3bwf_protein_hyd.pdbqt")
    _3bwf_protein.load_from_files(_3bwf_protein_pdb, _3bwf_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _3bwf_ligand = PDB()
    _3bwf_ligand_pdb = os.path.join(data_dir(), "3bwf_ligand_hyd.pdb")
    _3bwf_ligand_pdbqt = os.path.join(data_dir(), "3bwf_ligand_hyd.pdbqt")
    _3bwf_ligand.load_from_files(_3bwf_ligand_pdb, _3bwf_ligand_pdbqt)

    #self.test_cases = [("prgr", prgr_receptor, prgr_active),
    #                   ("cabl", cabl_receptor, cabl_active),
    #                   ("1zea", _1zea_protein, _1zea_ligand),
    #                   ("1r5y", _1r5y_protein, _1r5y_ligand),
    #                   ("3ao4", _3ao4_protein, _3ao4_ligand),
    #                   ("2jdm", _2jdm_protein, _2jdm_ligand),
    #                   ("3g5k", _3g5k_protein, _3g5k_ligand),
    #                   ("3str", _3str_protein, _3str_ligand),
    #                   ("2rio", _2rio_protein, _2rio_ligand)]
    self.test_cases = [("3bwf", _3bwf_protein, _3bwf_ligand)]
  def setUp(self):
    """
    Instantiate local copy of Binana object.
    """
    self.binana = Binana()

    # TODO(rbharath): This load sequence is getting unwieldy. Refactor.

    ### PRGR is from the DUD-E collection
    prgr_receptor = PDB()
    prgr_pdb = os.path.join(data_dir(), "prgr_hyd.pdb")
    prgr_pdbqt = os.path.join(data_dir(), "prgr_hyd.pdbqt")
    prgr_receptor.load_from_files(prgr_pdb, prgr_pdbqt)
    # This compound is CHEMBL1164248
    prgr_active = PDB()
    prgr_active_pdb = os.path.join(data_dir(), "prgr_active0_hyd.pdb")
    prgr_active_pdbqt = os.path.join(data_dir(), "prgr_active0_hyd.pdbqt")
    prgr_active.load_from_files(prgr_active_pdb, prgr_active_pdbqt)

    ### c-Abl is taken from the Autodock Vina examples
    cabl_receptor = PDB()
    cabl_receptor_pdb = os.path.join(data_dir(), "c-Abl_hyd.pdb")
    cabl_receptor_pdbqt = os.path.join(data_dir(), "c-Abl_hyd.pdbqt")
    cabl_receptor.load_from_files(cabl_receptor_pdb,
        cabl_receptor_pdbqt)
    # This compound is imatinib
    cabl_active = PDB()
    cabl_active_pdb = os.path.join(data_dir(), "imatinib_hyd.pdb")
    cabl_active_pdbqt = os.path.join(data_dir(), "imatinib_hyd.pdbqt")
    cabl_active.load_from_files(cabl_active_pdb,
        cabl_active_pdbqt)

    ### 1zea comes from PDBBind-CN
    # Python complains about variables starting with numbers, so put an
    # underscore in front of everything.
    _1zea_protein = PDB()
    _1zea_protein_pdb = os.path.join(data_dir(), "1zea_protein_hyd.pdb")
    _1zea_protein_pdbqt = os.path.join(data_dir(), "1zea_protein_hyd.pdbqt")
    _1zea_protein.load_from_files(_1zea_protein_pdb, _1zea_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _1zea_ligand = PDB()
    _1zea_ligand_pdb = os.path.join(data_dir(), "1zea_ligand_hyd.pdb")
    _1zea_ligand_pdbqt = os.path.join(data_dir(), "1zea_ligand_hyd.pdbqt")
    _1zea_ligand.load_from_files(_1zea_ligand_pdb, _1zea_ligand_pdbqt)

    ### 1r5y comes from PDBBind-CN
    _1r5y_protein = PDB()
    _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb")
    _1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt")
    _1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _1r5y_ligand = PDB()
    _1r5y_ligand_pdb = os.path.join(data_dir(), "1r5y_ligand_hyd.pdb")
    _1r5y_ligand_pdbqt = os.path.join(data_dir(), "1r5y_ligand_hyd.pdbqt")
    _1r5y_ligand.load_from_files(_1r5y_ligand_pdb, _1r5y_ligand_pdbqt)

    ### 3ao4 comes from PDBBind-CN
    _3ao4_protein = PDB()
    _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb")
    _3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt")
    _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _3ao4_ligand = PDB()
    _3ao4_ligand_pdb = os.path.join(data_dir(), "3ao4_ligand_hyd.pdb")
    _3ao4_ligand_pdbqt = os.path.join(data_dir(), "3ao4_ligand_hyd.pdbqt")
    _3ao4_ligand.load_from_files(_3ao4_ligand_pdb, _3ao4_ligand_pdbqt)

    ### 2jdm comes from PDBBind-CN
    _2jdm_protein = PDB()
    _2jdm_protein_pdb = os.path.join(data_dir(), "2jdm_protein_hyd.pdb")
    _2jdm_protein_pdbqt = os.path.join(data_dir(), "2jdm_protein_hyd.pdbqt")
    _2jdm_protein.load_from_files(_2jdm_protein_pdb, _2jdm_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _2jdm_ligand = PDB()
    _2jdm_ligand_pdb = os.path.join(data_dir(), "2jdm_ligand_hyd.pdb")
    _2jdm_ligand_pdbqt = os.path.join(data_dir(), "2jdm_ligand_hyd.pdbqt")
    _2jdm_ligand.load_from_files(_2jdm_ligand_pdb, _2jdm_ligand_pdbqt)

    ### 3g5k comes from PDBBind-CN
    _3g5k_protein = PDB()
    _3g5k_protein_pdb = os.path.join(data_dir(), "3g5k_protein_hyd.pdb")
    _3g5k_protein_pdbqt = os.path.join(data_dir(), "3g5k_protein_hyd.pdbqt")
    _3g5k_protein.load_from_files(_3g5k_protein_pdb, _3g5k_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _3g5k_ligand = PDB()
    _3g5k_ligand_pdb = os.path.join(data_dir(), "3g5k_ligand_hyd.pdb")
    _3g5k_ligand_pdbqt = os.path.join(data_dir(), "3g5k_ligand_hyd.pdbqt")
    _3g5k_ligand.load_from_files(_3g5k_ligand_pdb, _3g5k_ligand_pdbqt)

    ### 3str comes from PDBBind-CN
    _3str_protein = PDB()
    _3str_protein_pdb = os.path.join(data_dir(), "3str_protein_hyd.pdb")
    _3str_protein_pdbqt = os.path.join(data_dir(), "3str_protein_hyd.pdbqt")
    _3str_protein.load_from_files(_3str_protein_pdb, _3str_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _3str_ligand = PDB()
    _3str_ligand_pdb = os.path.join(data_dir(), "3str_ligand_hyd.pdb")
    _3str_ligand_pdbqt = os.path.join(data_dir(), "3str_ligand_hyd.pdbqt")
    _3str_ligand.load_from_files(_3str_ligand_pdb, _3str_ligand_pdbqt)

    ### 1nu3 comes from PDBBind-CN
    _1nu3_protein = PDB()
    _1nu3_protein_pdb = os.path.join(data_dir(), "1nu3_protein_hyd.pdb")
    _1nu3_protein_pdbqt = os.path.join(data_dir(), "1nu3_protein_hyd.pdbqt")
    _1nu3_protein.load_from_files(_1nu3_protein_pdb, _1nu3_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _1nu3_ligand = PDB()
    _1nu3_ligand_pdb = os.path.join(data_dir(), "1nu3_ligand_hyd.pdb")
    _1nu3_ligand_pdbqt = os.path.join(data_dir(), "1nu3_ligand_hyd.pdbqt")
    _1nu3_ligand.load_from_files(_1nu3_ligand_pdb, _1nu3_ligand_pdbqt)

    ### 2rio comes from PDBBind-CN
    _2rio_protein = PDB()
    _2rio_protein_pdb = os.path.join(data_dir(), "2rio_protein_hyd.pdb")
    _2rio_protein_pdbqt = os.path.join(data_dir(), "2rio_protein_hyd.pdbqt")
    _2rio_protein.load_from_files(_2rio_protein_pdb, _2rio_protein_pdbqt)
    # The ligand is also specified by pdbbind
    _2rio_ligand = PDB()
    _2rio_ligand_pdb = os.path.join(data_dir(), "2rio_ligand_hyd.pdb")
    _2rio_ligand_pdbqt = os.path.join(data_dir(), "2rio_ligand_hyd.pdbqt")
    _2rio_ligand.load_from_files(_2rio_ligand_pdb, _2rio_ligand_pdbqt)


    self.test_cases = [("prgr", prgr_receptor, prgr_active),
                       ("cabl", cabl_receptor, cabl_active),
                       ("1zea", _1zea_protein, _1zea_ligand),
                       ("1r5y", _1r5y_protein, _1r5y_ligand),
                       ("3ao4", _3ao4_protein, _3ao4_ligand),
                       ("2jdm", _2jdm_protein, _2jdm_ligand),
                       ("3g5k", _3g5k_protein, _3g5k_ligand),
                       ("3str", _3str_protein, _3str_ligand),
                       ("2rio", _2rio_protein, _2rio_ligand)]
Exemple #27
0
    def setUp(self):
        """
    Instantiate local copy of Binana object.
    """
        self.binana = Binana()

        # TODO(rbharath): This load sequence is getting unwieldy. Refactor.

        ### PRGR is from the DUD-E collection
        prgr_receptor = PDB()
        prgr_pdb = os.path.join(data_dir(), "prgr_hyd.pdb")
        prgr_pdbqt = os.path.join(data_dir(), "prgr_hyd.pdbqt")
        prgr_receptor.load_from_files(prgr_pdb, prgr_pdbqt)
        # This compound is CHEMBL1164248
        prgr_active = PDB()
        prgr_active_pdb = os.path.join(data_dir(), "prgr_active0_hyd.pdb")
        prgr_active_pdbqt = os.path.join(data_dir(), "prgr_active0_hyd.pdbqt")
        prgr_active.load_from_files(prgr_active_pdb, prgr_active_pdbqt)

        ### c-Abl is taken from the Autodock Vina examples
        cabl_receptor = PDB()
        cabl_receptor_pdb = os.path.join(data_dir(), "c-Abl_hyd.pdb")
        cabl_receptor_pdbqt = os.path.join(data_dir(), "c-Abl_hyd.pdbqt")
        cabl_receptor.load_from_files(cabl_receptor_pdb, cabl_receptor_pdbqt)
        # This compound is imatinib
        cabl_active = PDB()
        cabl_active_pdb = os.path.join(data_dir(), "imatinib_hyd.pdb")
        cabl_active_pdbqt = os.path.join(data_dir(), "imatinib_hyd.pdbqt")
        cabl_active.load_from_files(cabl_active_pdb, cabl_active_pdbqt)

        ### 1zea comes from PDBBind-CN
        # Python complains about variables starting with numbers, so put an
        # underscore in front of everything.
        _1zea_protein = PDB()
        _1zea_protein_pdb = os.path.join(data_dir(), "1zea_protein_hyd.pdb")
        _1zea_protein_pdbqt = os.path.join(data_dir(),
                                           "1zea_protein_hyd.pdbqt")
        _1zea_protein.load_from_files(_1zea_protein_pdb, _1zea_protein_pdbqt)
        # The ligand is also specified by pdbbind
        _1zea_ligand = PDB()
        _1zea_ligand_pdb = os.path.join(data_dir(), "1zea_ligand_hyd.pdb")
        _1zea_ligand_pdbqt = os.path.join(data_dir(), "1zea_ligand_hyd.pdbqt")
        _1zea_ligand.load_from_files(_1zea_ligand_pdb, _1zea_ligand_pdbqt)

        ### 1r5y comes from PDBBind-CN
        _1r5y_protein = PDB()
        _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb")
        _1r5y_protein_pdbqt = os.path.join(data_dir(),
                                           "1r5y_protein_hyd.pdbqt")
        _1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt)
        # The ligand is also specified by pdbbind
        _1r5y_ligand = PDB()
        _1r5y_ligand_pdb = os.path.join(data_dir(), "1r5y_ligand_hyd.pdb")
        _1r5y_ligand_pdbqt = os.path.join(data_dir(), "1r5y_ligand_hyd.pdbqt")
        _1r5y_ligand.load_from_files(_1r5y_ligand_pdb, _1r5y_ligand_pdbqt)

        ### 3ao4 comes from PDBBind-CN
        _3ao4_protein = PDB()
        _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb")
        _3ao4_protein_pdbqt = os.path.join(data_dir(),
                                           "3ao4_protein_hyd.pdbqt")
        _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt)
        # The ligand is also specified by pdbbind
        _3ao4_ligand = PDB()
        _3ao4_ligand_pdb = os.path.join(data_dir(), "3ao4_ligand_hyd.pdb")
        _3ao4_ligand_pdbqt = os.path.join(data_dir(), "3ao4_ligand_hyd.pdbqt")
        _3ao4_ligand.load_from_files(_3ao4_ligand_pdb, _3ao4_ligand_pdbqt)

        ### 2jdm comes from PDBBind-CN
        _2jdm_protein = PDB()
        _2jdm_protein_pdb = os.path.join(data_dir(), "2jdm_protein_hyd.pdb")
        _2jdm_protein_pdbqt = os.path.join(data_dir(),
                                           "2jdm_protein_hyd.pdbqt")
        _2jdm_protein.load_from_files(_2jdm_protein_pdb, _2jdm_protein_pdbqt)
        # The ligand is also specified by pdbbind
        _2jdm_ligand = PDB()
        _2jdm_ligand_pdb = os.path.join(data_dir(), "2jdm_ligand_hyd.pdb")
        _2jdm_ligand_pdbqt = os.path.join(data_dir(), "2jdm_ligand_hyd.pdbqt")
        _2jdm_ligand.load_from_files(_2jdm_ligand_pdb, _2jdm_ligand_pdbqt)

        ### 3g5k comes from PDBBind-CN
        _3g5k_protein = PDB()
        _3g5k_protein_pdb = os.path.join(data_dir(), "3g5k_protein_hyd.pdb")
        _3g5k_protein_pdbqt = os.path.join(data_dir(),
                                           "3g5k_protein_hyd.pdbqt")
        _3g5k_protein.load_from_files(_3g5k_protein_pdb, _3g5k_protein_pdbqt)
        # The ligand is also specified by pdbbind
        _3g5k_ligand = PDB()
        _3g5k_ligand_pdb = os.path.join(data_dir(), "3g5k_ligand_hyd.pdb")
        _3g5k_ligand_pdbqt = os.path.join(data_dir(), "3g5k_ligand_hyd.pdbqt")
        _3g5k_ligand.load_from_files(_3g5k_ligand_pdb, _3g5k_ligand_pdbqt)

        ### 3str comes from PDBBind-CN
        _3str_protein = PDB()
        _3str_protein_pdb = os.path.join(data_dir(), "3str_protein_hyd.pdb")
        _3str_protein_pdbqt = os.path.join(data_dir(),
                                           "3str_protein_hyd.pdbqt")
        _3str_protein.load_from_files(_3str_protein_pdb, _3str_protein_pdbqt)
        # The ligand is also specified by pdbbind
        _3str_ligand = PDB()
        _3str_ligand_pdb = os.path.join(data_dir(), "3str_ligand_hyd.pdb")
        _3str_ligand_pdbqt = os.path.join(data_dir(), "3str_ligand_hyd.pdbqt")
        _3str_ligand.load_from_files(_3str_ligand_pdb, _3str_ligand_pdbqt)

        ### 1nu3 comes from PDBBind-CN
        _1nu3_protein = PDB()
        _1nu3_protein_pdb = os.path.join(data_dir(), "1nu3_protein_hyd.pdb")
        _1nu3_protein_pdbqt = os.path.join(data_dir(),
                                           "1nu3_protein_hyd.pdbqt")
        _1nu3_protein.load_from_files(_1nu3_protein_pdb, _1nu3_protein_pdbqt)
        # The ligand is also specified by pdbbind
        _1nu3_ligand = PDB()
        _1nu3_ligand_pdb = os.path.join(data_dir(), "1nu3_ligand_hyd.pdb")
        _1nu3_ligand_pdbqt = os.path.join(data_dir(), "1nu3_ligand_hyd.pdbqt")
        _1nu3_ligand.load_from_files(_1nu3_ligand_pdb, _1nu3_ligand_pdbqt)

        ### 2rio comes from PDBBind-CN
        _2rio_protein = PDB()
        _2rio_protein_pdb = os.path.join(data_dir(), "2rio_protein_hyd.pdb")
        _2rio_protein_pdbqt = os.path.join(data_dir(),
                                           "2rio_protein_hyd.pdbqt")
        _2rio_protein.load_from_files(_2rio_protein_pdb, _2rio_protein_pdbqt)
        # The ligand is also specified by pdbbind
        _2rio_ligand = PDB()
        _2rio_ligand_pdb = os.path.join(data_dir(), "2rio_ligand_hyd.pdb")
        _2rio_ligand_pdbqt = os.path.join(data_dir(), "2rio_ligand_hyd.pdbqt")
        _2rio_ligand.load_from_files(_2rio_ligand_pdb, _2rio_ligand_pdbqt)

        self.test_cases = [("prgr", prgr_receptor, prgr_active),
                           ("cabl", cabl_receptor, cabl_active),
                           ("1zea", _1zea_protein, _1zea_ligand),
                           ("1r5y", _1r5y_protein, _1r5y_ligand),
                           ("3ao4", _3ao4_protein, _3ao4_ligand),
                           ("2jdm", _2jdm_protein, _2jdm_ligand),
                           ("3g5k", _3g5k_protein, _3g5k_ligand),
                           ("3str", _3str_protein, _3str_ligand),
                           ("2rio", _2rio_protein, _2rio_ligand)]
def featurize_dude(dude_dir, target, pickle_dir, num_jobs):
  """Featurize DUD-E docked poses and write features to pickle_out.
 
  Parameters
  ----------
  dude_dir: string
    Path to DUD-E directory
  target: string
    Name of DUD-E target.
  pickle_dir: string
    Path to directory to output pickles 
  """
  target_dir = os.path.join(dude_dir, target)
  actives_dir = os.path.join(target_dir, "actives")
  decoys_dir = os.path.join(target_dir, "decoys")
  actives = [a for a in os.listdir(actives_dir)]
  decoys = [a for a in os.listdir(decoys_dir)]
  receptor = os.path.join(target_dir, "receptor.pdb")
  pickle_out = os.path.join(target_dir, "out.pkl.gz")
  # Just for debugging purposes
  actives = actives[:1]

  num_per_job = int(math.ceil(len(actives)/float(num_jobs)))
  print "Number per job: %d" % num_per_job
  protein_pdb_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdb"
  protein_pdbqt_path = "/home/rbharath/DUD-E/aa2ar/receptor_hyd.pdbqt"

  print "About to load protein from input files"
  protein_pdb_obj = PDB()
  protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path)

  binana = Binana()
  feature_len = binana.num_features()
  feature_vectors = {}
  for compound in actives:
    compound_name = compound.split(".")[0]
    compound_pdbqt = compound_name + "_hyd_out.pdbqt"
    compound_pdbqt = os.path.join(actives_dir, compound_pdbqt)

    # Convert the pdbqt to pdb
    pdbqt_to_pdb(compound_pdbqt, actives_dir)
    compound_pdb = compound_name + "_hyd_out.pdb"
    compound_pdb = os.path.join(actives_dir, compound_pdb)

    structures = MultiStructure()
    structures.load_from_files(compound_pdb, compound_pdbqt)

    vectors = []
    for key in sorted(structures.molecules.keys()):
      structure = structures.molecules[key]
      print "type(structure)"
      print type(structure)
      vectors.append(binana.compute_input_vector(structure,
          protein_pdb_obj))
    feature_vectors[compound_name] = vectors

  with gzip.open(pickle_out, "wb") as f:
    pickle.dump(feature_vectors, f)


  decoys = decoys[:1]
Exemple #29
0
  def test_carbon_charges(self):
    """
    TestPDB: Verify that carbon groups are charged correctly.
    """
    # Guanidine is positively charged at physiological pH
    guanidine_pdb = PDB()
    guanidine_pdb_path = os.path.join(data_dir(),
        "guanidine_hyd.pdb")
    guanidine_pdbqt_path = os.path.join(data_dir(),
        "guanidine_hyd.pdbqt")
    guanidine_pdb.load_from_files(
        guanidine_pdb_path, guanidine_pdbqt_path)
    carbon_charges = guanidine_pdb.identify_carbon_charges()
    assert len(carbon_charges) == 1
    assert carbon_charges[0].positive  # Should be positive

    # sulfaguanidine contains a guanidine group that is likely to be
    # positively protonated at physiological pH
    sulfaguanidine_pdb = PDB()
    sulfaguanidine_pdb_path = os.path.join(data_dir(),
        "sulfaguanidine_hyd.pdb")
    sulfaguanidine_pdbqt_path = os.path.join(data_dir(),
        "sulfaguanidine_hyd.pdbqt")
    sulfaguanidine_pdb.load_from_files(
        sulfaguanidine_pdb_path, sulfaguanidine_pdbqt_path)
    carbon_charges = sulfaguanidine_pdb.identify_carbon_charges()
    assert len(carbon_charges) == 1
    assert carbon_charges[0].positive  # Should be positive

    # Formic acid is a carboxylic acid, which should be negatively charged.
    formic_acid_pdb = PDB()
    formic_acid_pdb_path = os.path.join(data_dir(),
        "formic_acid_hyd.pdb")
    formic_acid_pdbqt_path = os.path.join(data_dir(),
        "formic_acid_hyd.pdbqt")
    formic_acid_pdb.load_from_files(
        formic_acid_pdb_path, formic_acid_pdbqt_path)
    carbon_charges = formic_acid_pdb.identify_carbon_charges()
    assert len(carbon_charges) == 1
    assert not carbon_charges[0].positive  # Should be negatively charged.
Exemple #30
0
    def setUp(self):
        """
    Instantiate local copy of Binana object.
    """
        self.binana = Binana()

        ### 3zp9 comes from PDBBind-CN
        _3zp9_protein = PDB()
        _3zp9_protein_pdb = os.path.join(data_dir(), "3zp9_protein_hyd.pdb")
        _3zp9_protein_pdbqt = os.path.join(data_dir(),
                                           "3zp9_protein_hyd.pdbqt")
        _3zp9_protein.load_from_files(_3zp9_protein_pdb, _3zp9_protein_pdbqt)
        # The ligand is also specified by pdbbind
        _3zp9_ligand = PDB()
        _3zp9_ligand_pdb = os.path.join(data_dir(), "3zp9_ligand_hyd.pdb")
        _3zp9_ligand_pdbqt = os.path.join(data_dir(), "3zp9_ligand_hyd.pdbqt")
        _3zp9_ligand.load_from_files(_3zp9_ligand_pdb, _3zp9_ligand_pdbqt)

        ### 3bwf comes from PDBBind-CN
        _3bwf_protein = PDB()
        _3bwf_protein_pdb = os.path.join(data_dir(), "3bwf_protein_hyd.pdb")
        _3bwf_protein_pdbqt = os.path.join(data_dir(),
                                           "3bwf_protein_hyd.pdbqt")
        _3bwf_protein.load_from_files(_3bwf_protein_pdb, _3bwf_protein_pdbqt)
        # The ligand is also specified by pdbbind
        _3bwf_ligand = PDB()
        _3bwf_ligand_pdb = os.path.join(data_dir(), "3bwf_ligand_hyd.pdb")
        _3bwf_ligand_pdbqt = os.path.join(data_dir(), "3bwf_ligand_hyd.pdbqt")
        _3bwf_ligand.load_from_files(_3bwf_ligand_pdb, _3bwf_ligand_pdbqt)

        self.test_cases = [("3bwf", _3bwf_protein, _3bwf_ligand),
                           ("3zp9", _3zp9_protein, _3zp9_ligand)]
    def test_carbon_charges(self):
        """
    TestPDB: Verify that carbon groups are charged correctly.
    """
        # Guanidine is positively charged at physiological pH
        guanidine_pdb = PDB()
        guanidine_pdb_path = os.path.join(data_dir(), "guanidine_hyd.pdb")
        guanidine_pdbqt_path = os.path.join(data_dir(), "guanidine_hyd.pdbqt")
        guanidine_pdb.load_from_files(guanidine_pdb_path, guanidine_pdbqt_path)
        carbon_charges = guanidine_pdb.identify_carbon_charges()
        assert len(carbon_charges) == 1
        assert carbon_charges[0].positive  # Should be positive

        # sulfaguanidine contains a guanidine group that is likely to be
        # positively protonated at physiological pH
        sulfaguanidine_pdb = PDB()
        sulfaguanidine_pdb_path = os.path.join(data_dir(),
                                               "sulfaguanidine_hyd.pdb")
        sulfaguanidine_pdbqt_path = os.path.join(data_dir(),
                                                 "sulfaguanidine_hyd.pdbqt")
        sulfaguanidine_pdb.load_from_files(sulfaguanidine_pdb_path,
                                           sulfaguanidine_pdbqt_path)
        carbon_charges = sulfaguanidine_pdb.identify_carbon_charges()
        assert len(carbon_charges) == 1
        assert carbon_charges[0].positive  # Should be positive

        # Formic acid is a carboxylic acid, which should be negatively charged.
        formic_acid_pdb = PDB()
        formic_acid_pdb_path = os.path.join(data_dir(), "formic_acid_hyd.pdb")
        formic_acid_pdbqt_path = os.path.join(data_dir(),
                                              "formic_acid_hyd.pdbqt")
        formic_acid_pdb.load_from_files(formic_acid_pdb_path,
                                        formic_acid_pdbqt_path)
        carbon_charges = formic_acid_pdb.identify_carbon_charges()
        assert len(carbon_charges) == 1
        assert not carbon_charges[0].positive  # Should be negatively charged.
def featurize_pdbbind(pdbbind_dir, pickle_out):
    """Featurize all entries in pdbbind_dir and write features to pickle_out

  pdbbind_dir should be a dir, with K subdirs, one for each protein-ligand
  complex to be featurized. The ligand and receptor should each have a pdb
  and pdbqt file. The ligand files should end in '_ligand_hyd.${FILETYPE}'
  while the receptor files should end in '_protein_hyd.${FILETYPE}'

  pdbbind_dir: string
    Path to pdbbind directory.
  pickle_out: string
    Path to write pickle output.
  """
    assert os.path.isdir(pdbbind_dir)
    # Instantiate copy of binana vector
    binana = Binana()
    feature_vectors = {}

    # Extract the subdirectories in pdbbind_dir
    subdirs = [
        d for d in os.listdir(pdbbind_dir)
        if os.path.isdir(os.path.join(pdbbind_dir, d))
    ]
    # TODO(rbharath): ONLY FOR DEBUGGING!
    subdirs = subdirs[900:]

    num_atoms = len(Binana.atom_types)
    # See features/tests/nnscore_test.py:TestBinana.testComputeInputVector
    # for derivation.
    feature_len = (3 * num_atoms * (num_atoms + 1) / 2 + num_atoms + 12 + 6 +
                   3 + 6 + 3 + 6 + 3 + 1)
    for count, d in enumerate(subdirs):
        print "\nprocessing %d-th pdb %s" % (count, d)
        subdir = os.path.join(pdbbind_dir, d)

        print "About to extract ligand and protein input files"
        ligand_pdb, ligand_pdbqt = None, None
        protein_pdb, protein_pdbqt = None, None
        for f in os.listdir(subdir):
            if re.search("_ligand_hyd.pdb$", f):
                ligand_pdb = f
            elif re.search("_ligand_hyd.pdbqt$", f):
                ligand_pdbqt = f
            elif re.search("_protein_hyd.pdb$", f):
                protein_pdb = f
            elif re.search("_protein_hyd.pdbqt$", f):
                protein_pdbqt = f

        print "Extracted Input Files:"
        print(ligand_pdb, ligand_pdbqt, protein_pdb, protein_pdbqt)
        if (not ligand_pdb or not ligand_pdbqt or not protein_pdb
                or not protein_pdbqt):
            raise ValueError("Required files not present for %s" % d)
        ligand_pdb_path = os.path.join(subdir, ligand_pdb)
        ligand_pdbqt_path = os.path.join(subdir, ligand_pdbqt)
        protein_pdb_path = os.path.join(subdir, protein_pdb)
        protein_pdbqt_path = os.path.join(subdir, protein_pdbqt)

        print "About to load ligand from input files"
        ligand_pdb_obj = PDB()
        ligand_pdb_obj.load_from_files(ligand_pdb_path, ligand_pdbqt_path)

        print "About to load protein from input files"
        protein_pdb_obj = PDB()
        protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path)

        print "About to generate feature vector."
        vector = binana.compute_input_vector(ligand_pdb_obj, protein_pdb_obj)
        feature_vectors[d] = vector
        if len(vector) != feature_len:
            raise ValueError("Feature length incorrect on %s" % d)
        print "Feature vector generated correctly."

    with open(pickle_out, "wb") as f:
        pickle.dump(feature_vectors, f)
class TestPDB(unittest.TestCase):
    """"
  Test PDB class.
  """
    def setUp(self):
        """
    Instantiate a dummy PDB file.
    """
        self.temp_dir = tempfile.mkdtemp()
        self.pdb = PDB()

        _, self.pdb_filename = tempfile.mkstemp(suffix=".pdb",
                                                dir=self.temp_dir)

        self.prgr_pdb = PDB()
        prgr_pdb_path = os.path.join(data_dir(), "prgr_hyd.pdb")
        prgr_pdbqt_path = os.path.join(data_dir(), "prgr_hyd.pdbqt")
        self.prgr_pdb.load_from_files(prgr_pdb_path, prgr_pdbqt_path)

        self._1r5y_protein = PDB()
        _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb")
        _1r5y_protein_pdbqt = os.path.join(data_dir(),
                                           "1r5y_protein_hyd.pdbqt")
        self._1r5y_protein.load_from_files(_1r5y_protein_pdb,
                                           _1r5y_protein_pdbqt)

        self.proteins = [("prgr", self.prgr_pdb), ("1r5y", self._1r5y_protein)]

    def tearDown(self):
        """
    Delete temporary directory.
    """
        shutil.rmtree(self.temp_dir)

    def test_add_new_atom(self):
        """
    TestPDB: Verifies that new atoms can be added.
    """
        # Verify that no atoms are present when we start.
        assert len(self.pdb.all_atoms.keys()) == 0
        empty_atom = Atom()
        self.pdb.add_new_atom(empty_atom)
        # Verify that we now have one atom
        assert len(self.pdb.all_atoms.keys()) == 1

    def test_get_residues(self):
        """
    TestPDB: Tests that all residues in PDB are identified.
    """
        residues = self.prgr_pdb.get_residues()
        # prgr.pdb has 280 unique residues
        assert len(residues.keys()) == 280
        prgr_residues = [
            "LEU", "ILE", "ASN", "LEU", "LEU", "MET", "SER", "ILE", "GLU",
            "PRO", "ASP", "VAL", "ILE", "TYR", "ALA", "GLY", "HIS", "ASP",
            "THR", "SER", "SER", "SER", "LEU", "LEU", "THR", "SER", "LEU",
            "ASN", "GLN", "LEU", "GLY", "GLU", "ARG", "GLN", "LEU", "LEU",
            "SER", "VAL", "VAL", "LYS", "TRP", "SER", "LYS", "SER", "LEU",
            "PRO", "GLY", "PHE", "ARG", "LEU", "HIS", "ILE", "ASP", "ASP",
            "GLN", "ILE", "THR", "LEU", "ILE", "GLN", "TYR", "SER", "TRP",
            "MET", "SER", "LEU", "MET", "VAL", "PHE", "GLY", "LEU", "GLY",
            "TRP", "ARG", "SER", "TYR", "LYS", "HIS", "VAL", "SER", "GLY",
            "GLN", "MET", "LEU", "TYR", "PHE", "ALA", "PRO", "ASP", "LEU",
            "ILE", "LEU", "ASN", "GLU", "GLN", "ARG", "MET", "LYS", "GLU",
            "PHE", "TYR", "SER", "LEU", "CYS", "LEU", "THR", "MET", "TRP",
            "GLN", "ILE", "PRO", "GLN", "GLU", "PHE", "VAL", "LYS", "LEU",
            "GLN", "VAL", "SER", "GLN", "GLU", "GLU", "PHE", "LEU", "CYS",
            "MET", "LYS", "VAL", "LEU", "LEU", "LEU", "LEU", "ASN", "THR",
            "ILE", "PRO", "LEU", "GLU", "GLY", "LEU", "PHE", "MET", "ARG",
            "TYR", "ILE", "GLU", "LEU", "ALA", "ILE", "ARG", "ARG", "PHE",
            "TYR", "GLN", "LEU", "THR", "LYS", "LEU", "LEU", "ASP", "ASN",
            "LEU", "HIS", "ASP", "LEU", "VAL", "LYS", "GLN", "LEU", "HIS",
            "LEU", "TYR", "CYS", "LEU", "ASN", "THR", "PHE", "ILE", "GLN",
            "SER", "ARG", "ALA", "LEU", "SER", "VAL", "GLU", "PHE", "PRO",
            "GLU", "MET", "MET", "SER", "GLU", "VAL", "ILE", "ALA", "ALA",
            "GLN", "LEU", "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET",
            "VAL", "LYS", "PRO", "LEU", "LEU", "PHE", "HIS", "LYS", "ASN",
            "LEU", "ASP", "ASP", "ILE", "THR", "LEU", "ILE", "GLN", "TYR",
            "SER", "TRP", "MET", "THR", "ILE", "PRO", "LEU", "GLU", "GLY",
            "LEU", "ARG", "VAL", "LYS", "GLN", "LEU", "HIS", "LEU", "TYR",
            "CYS", "LEU", "ASN", "THR", "PHE", "ILE", "GLN", "SER", "ARG",
            "ALA", "LEU", "SER", "VAL", "GLU", "PHE", "PRO", "GLU", "MET",
            "MET", "SER", "GLU", "VAL", "ILE", "ALA", "ALA", "GLN", "LEU",
            "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET", "VAL", "LYS",
            "PRO"
        ]
        # Recall the keys have format RESNAME_RESNUMBER_CHAIN
        resnames = [reskey.split("_")[0].strip() for reskey in residues]
        resnames.sort()
        prgr_residues.sort()
        assert resnames == prgr_residues
        # prgr.pdb has 2749 unique atoms.
        atom_count = 0
        for (_, atom_indices) in residues.iteritems():
            atom_count += len(atom_indices)
        print atom_count
        assert atom_count == 2788

    def test_get_lysine_charges(self):
        """
    TestPDB: Test that lysine charges are identified correctly.
    """
        res_list = self.prgr_pdb.get_residues()
        lysine_charges = self.prgr_pdb.get_lysine_charges(res_list)
        # prgr has 14 lysines.
        print len(lysine_charges)
        assert len(lysine_charges) == 14
        for charge in lysine_charges:
            # Lysine should be posistively charged
            assert charge.positive

    def test_get_arginine_charges(self):
        """
    TestPDB: Test that arginine charges are identified correctly.
    """
        res_list = self.prgr_pdb.get_residues()
        arginine_charges = self.prgr_pdb.get_arginine_charges(res_list)
        # prgr has 10 arginines
        assert len(arginine_charges) == 10
        for charge in arginine_charges:
            # The guanidium in arginine should be positively charged.
            assert charge.positive

    def test_get_histidine_charges(self):
        """
    TestPDB: Test that histidine charges are identified correctly.
    """
        res_list = self.prgr_pdb.get_residues()
        histidine_charges = self.prgr_pdb.get_histidine_charges(res_list)
        # prgr has 7 arginines
        assert len(histidine_charges) == 7
        for charge in histidine_charges:
            # The nitrogens pick up positive charges
            assert charge.positive

    def test_get_glutamic_acid_charges(self):
        """
    TestPDB: Test that glutamic acid charges are identified correctly.
    """
        res_list = self.prgr_pdb.get_residues()
        glutamic_acid_charges = self.prgr_pdb.get_glutamic_acid_charges(
            res_list)
        assert len(glutamic_acid_charges) == 16
        for charge in glutamic_acid_charges:
            # The carboxyls get deprotonated.
            assert not charge.positive

    def test_get_aspartic_acid_charges(self):
        """
    TestPDB: Test that aspartic acid charges are identified correctly.
    """
        res_list = self.prgr_pdb.get_residues()
        aspartic_acid_charges = self.prgr_pdb.get_aspartic_acid_charges(
            res_list)
        assert len(aspartic_acid_charges) == 9
        for charge in aspartic_acid_charges:
            # The carboxyls get deprotonated
            assert not charge.positive

    def test_assign_ligand_aromatics(self):
        """
    TestPDB: Test that non-protein aromatic rings are assigned correctly.
    """
        ### 3ao4 comes from PDBBind-CN and contains some cruft in the PDB file:
        ### atoms without residues labelled. This triggered some problems with
        ### non-protein aromatics complaining.
        # TODO(rbharath): Add a stub here.
        _3ao4_protein = PDB()
        _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb")
        _3ao4_protein_pdbqt = os.path.join(data_dir(),
                                           "3ao4_protein_hyd.pdbqt")
        _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt)

    def test_remove_redundant_rings(self):
        """
    TestPDB: Test that redundant rings are removed.
    """
        # Recall that each ring is represented as a list of atom indices.
        # Test that rings of length 0 are removed
        assert remove_redundant_rings([[]]) == []
        # Set that supersets are removed
        assert (remove_redundant_rings([[1, 2, 3], [1, 3, 4, 5],
                                        [1, 2, 3, 4, 5]]) == [[1, 2, 3],
                                                              [1, 3, 4, 5]])
        # Ensure that duplicate rings are handled correctly (that is, only one
        # copy of a duplicate ring should remain)
        assert remove_redundant_rings([[1, 2, 3], [1, 3, 2]]) == [[1, 2, 3]]

    def test_assign_protein_aromatics(self):
        """
    TestPDB: Test that aromatic rings are assigned correctly.
    """
        for name, protein in self.proteins:
            # The proteins should have aromatic rings assigned already by
            # load_from_files()
            print "Processing aromatics for %s" % name
            for aromatic in protein.aromatic_rings:
                assert aromatic is not None

    def test_get_phenylalanine_aromatics(self):
        """
    TestPDB: Test that phenylalanine aromatic rings are retrieved.
    """
        res_list = self.prgr_pdb.get_residues()
        phenylalanine_aromatics = (
            self.prgr_pdb.get_phenylalanine_aromatics(res_list))

        # prgr has 13 phenylalanines, each of which has 1 aromatic ring.
        assert len(phenylalanine_aromatics) == 13
        for aromatic in phenylalanine_aromatics:
            # The aromatic rings in phenylalanine have 6 elements each
            assert len(aromatic.indices) == 6

    def test_get_tyrosine_aromatics(self):
        """
    TestPDB: Test that tyrosine aromatic rings are retrieved.
    """
        # prgr has 10 tyrosines, each of which has 1 aromatic ring.
        res_list = self.prgr_pdb.get_residues()
        tyrosine_aromatics = self.prgr_pdb.get_tyrosine_aromatics(res_list)
        assert len(tyrosine_aromatics) == 10
        for aromatic in tyrosine_aromatics:
            # The aromatic rings in tyrosine have 6 elements each
            assert len(aromatic.indices) == 6

    def test_get_histidine_aromatics(self):
        """
    TestPDB: Test that histidine aromatic rings are retrieved.
    """
        res_list = self.prgr_pdb.get_residues()
        histidine_aromatics = self.prgr_pdb.get_histidine_aromatics(res_list)
        # prgr has 7 histidines, each of which has 1 aromatic ring.
        assert len(histidine_aromatics) == 7
        for aromatic in histidine_aromatics:
            # The aromatic rings in histidine have 6 elements each
            print len(aromatic.indices)
            assert len(aromatic.indices) == 5

    def test_get_tryptophan_aromatics(self):
        """
    TestPDB: Test that tryptophan aromatic rings are retrieved.
    """
        res_list = self.prgr_pdb.get_residues()
        tryptophan_aromatics = self.prgr_pdb.get_tryptophan_aromatics(res_list)
        # prgr has 5 tryptophans, each of which has 2 aromatic ring.
        print len(tryptophan_aromatics)
        assert len(tryptophan_aromatics) == 10
        num_five_rings, num_six_rings = 0, 0
        for aromatic in tryptophan_aromatics:
            # One aromatic ring in tryptophan hahas 6 elements each,
            # while the other has 5 elements.
            if len(aromatic.indices) == 6:
                num_six_rings += 1
            elif len(aromatic.indices) == 5:
                num_five_rings += 1
        assert num_six_rings == 5
        assert num_five_rings == 5

    def test_connected_atoms(self):
        """
    TestPDB: Verifies that connected atom retrieval works.
    """
        # Verify that no atoms are present when we start.
        assert len(self.pdb.all_atoms.keys()) == 0
        carbon_atom = Atom(element="C")
        oxygen_atom = Atom(element="O")
        hydrogen_atom = Atom(element="H")

        self.pdb.add_new_atom(carbon_atom)
        self.pdb.add_new_atom(oxygen_atom)
        self.pdb.add_new_atom(hydrogen_atom)

        # We want a carboxyl, so C connects O and H
        carbon_atom.indices_of_atoms_connecting = [2, 3]
        oxygen_atom.indices_of_atoms_connecting = [1]
        hydrogen_atom.indices_of_atoms_connecting = [1]

        connected_oxygens = self.pdb.connected_atoms(1, "O")
        assert len(connected_oxygens) == 1

        connected_hydrogens = self.pdb.connected_atoms(1, "H")
        assert len(connected_hydrogens) == 1

    def test_load_bonds_from_pdb(self):
        """
    TestPDB: Verifies that bonds can be loaded from PDB.
    """
        pdb = PDB()
        # Test that we can load CO2
        carbon_atom = Atom(element="C")
        oxygen_atom_1 = Atom(element="O")
        oxygen_atom_2 = Atom(element="O")

        pdb.add_new_atom(carbon_atom)
        pdb.add_new_atom(oxygen_atom_1)
        pdb.add_new_atom(oxygen_atom_2)
        lines = [
            "CONECT    1    2    3                                                 "
            "CONECT    2                                                           "
            "CONECT    3                                                           "
        ]
        with tempfile.NamedTemporaryFile() as temp:
            temp.write("\n".join(lines))
            temp.flush()
            pdb.load_bonds_from_pdb(temp.name)
        assert len(carbon_atom.indices_of_atoms_connecting) == 2
        assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0
        assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0

    def test_connected_heavy_atoms(self):
        """
    TestPDB: Verifies retrieval of connected heavy atoms.
    """
        # Verify that no atoms are present when we start.
        assert len(self.pdb.all_atoms.keys()) == 0
        carbon_atom = Atom(element="C")
        oxygen_atom = Atom(element="O")
        hydrogen_atom = Atom(element="H")

        self.pdb.add_new_atom(carbon_atom)
        self.pdb.add_new_atom(oxygen_atom)
        self.pdb.add_new_atom(hydrogen_atom)

        # We want a carboxyl, so C connects O and H
        carbon_atom.indices_of_atoms_connecting = [2, 3]
        oxygen_atom.indices_of_atoms_connecting = [1]
        hydrogen_atom.indices_of_atoms_connecting = [1]

        connected_heavy_atoms = self.pdb.connected_heavy_atoms(1)
        assert len(connected_heavy_atoms) == 1
        assert connected_heavy_atoms[0] == 2

    def test_assign_non_protein_charges(self):
        """
    TestPDB: Verify that charges are properly added to ligands.
    """
        # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
        # There should be 3 charged groups, two positive, one negative
        ammonium_sulfate_pdb = PDB()
        ammonium_sulfate_pdb_path = os.path.join(data_dir(),
                                                 "ammonium_sulfate_hyd.pdb")
        ammonium_sulfate_pdbqt_path = os.path.join(
            data_dir(), "ammonium_sulfate_hyd.pdbqt")
        # Notice that load automatically identifies non-protein charges.
        ammonium_sulfate_pdb.load_from_files(ammonium_sulfate_pdb_path,
                                             ammonium_sulfate_pdbqt_path)
        assert len(ammonium_sulfate_pdb.charges) == 3
        num_pos, num_neg = 0, 0
        for charge in ammonium_sulfate_pdb.charges:
            if charge.positive:
                num_pos += 1
            else:
                num_neg += 1
        assert num_pos == 2
        assert num_neg == 1

    def test_metallic_charges(self):
        """
    TestPDB: Verify that non-protein charges are assigned properly.
    """
        # Test metallic ion charge.
        magnesium_pdb = PDB()
        magnesium_atom = Atom(element="MG",
                              coordinates=Point(coords=np.array([0, 0, 0])))
        magnesium_pdb.add_new_non_protein_atom(magnesium_atom)
        metallic_charges = magnesium_pdb.identify_metallic_charges()
        assert len(metallic_charges) == 1

    def test_nitrogen_charges(self):
        """
    TestPDB: Verify that nitrogen groups are charged correctly.
    """
        # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
        # The labeling should pick up 2 charged nitrogen groups for two
        # ammoniums.
        ammonium_sulfate_pdb = PDB()
        ammonium_sulfate_pdb_path = os.path.join(data_dir(),
                                                 "ammonium_sulfate_hyd.pdb")
        ammonium_sulfate_pdbqt_path = os.path.join(
            data_dir(), "ammonium_sulfate_hyd.pdbqt")
        ammonium_sulfate_pdb.load_from_files(ammonium_sulfate_pdb_path,
                                             ammonium_sulfate_pdbqt_path)
        nitrogen_charges = ammonium_sulfate_pdb.identify_nitrogen_charges()
        assert len(nitrogen_charges) == 2
        assert nitrogen_charges[0].positive  # Should be positive
        assert nitrogen_charges[1].positive  # Should be positive

        # Test pyrrolidine (CH2)4NH. The nitrogen here should be sp3
        # hybridized, so is likely to pick up an extra proton to its nitrogen
        # at physiological pH.
        pyrrolidine_pdb = PDB()
        pyrrolidine_pdb_path = os.path.join(data_dir(), "pyrrolidine_hyd.pdb")
        pyrrolidine_pdbqt_path = os.path.join(data_dir(),
                                              "pyrrolidine_hyd.pdbqt")
        pyrrolidine_pdb.load_from_files(pyrrolidine_pdb_path,
                                        pyrrolidine_pdbqt_path)
        nitrogen_charges = pyrrolidine_pdb.identify_nitrogen_charges()
        assert len(nitrogen_charges) == 1
        assert nitrogen_charges[0].positive  # Should be positive

    def test_carbon_charges(self):
        """
    TestPDB: Verify that carbon groups are charged correctly.
    """
        # Guanidine is positively charged at physiological pH
        guanidine_pdb = PDB()
        guanidine_pdb_path = os.path.join(data_dir(), "guanidine_hyd.pdb")
        guanidine_pdbqt_path = os.path.join(data_dir(), "guanidine_hyd.pdbqt")
        guanidine_pdb.load_from_files(guanidine_pdb_path, guanidine_pdbqt_path)
        carbon_charges = guanidine_pdb.identify_carbon_charges()
        assert len(carbon_charges) == 1
        assert carbon_charges[0].positive  # Should be positive

        # sulfaguanidine contains a guanidine group that is likely to be
        # positively protonated at physiological pH
        sulfaguanidine_pdb = PDB()
        sulfaguanidine_pdb_path = os.path.join(data_dir(),
                                               "sulfaguanidine_hyd.pdb")
        sulfaguanidine_pdbqt_path = os.path.join(data_dir(),
                                                 "sulfaguanidine_hyd.pdbqt")
        sulfaguanidine_pdb.load_from_files(sulfaguanidine_pdb_path,
                                           sulfaguanidine_pdbqt_path)
        carbon_charges = sulfaguanidine_pdb.identify_carbon_charges()
        assert len(carbon_charges) == 1
        assert carbon_charges[0].positive  # Should be positive

        # Formic acid is a carboxylic acid, which should be negatively charged.
        formic_acid_pdb = PDB()
        formic_acid_pdb_path = os.path.join(data_dir(), "formic_acid_hyd.pdb")
        formic_acid_pdbqt_path = os.path.join(data_dir(),
                                              "formic_acid_hyd.pdbqt")
        formic_acid_pdb.load_from_files(formic_acid_pdb_path,
                                        formic_acid_pdbqt_path)
        carbon_charges = formic_acid_pdb.identify_carbon_charges()
        assert len(carbon_charges) == 1
        assert not carbon_charges[0].positive  # Should be negatively charged.

    def test_phosphorus_charges(self):
        """
    TestPDB: Verify that Phosphorus groups are charged correctly.
    """
        # CID82671 contains a phosphate between two aromatic groups.
        phosphate_pdb = PDB()
        phosphate_pdb_path = os.path.join(data_dir(), "82671_hyd.pdb")
        phosphate_pdbqt_path = os.path.join(data_dir(), "82671_hyd.pdb")
        phosphate_pdb.load_from_files(phosphate_pdb_path, phosphate_pdbqt_path)
        phosphorus_charges = phosphate_pdb.identify_phosphorus_charges()
        assert len(phosphorus_charges) == 1
        assert not phosphorus_charges[
            0].positive  # Should be negatively charged.

    def test_sulfur_charges(self):
        """
    TestPDB: Verify that sulfur groups are charged correctly.
    """
        triflic_acid_pdb = PDB()
        triflic_acid_pdb_path = os.path.join(data_dir(),
                                             "triflic_acid_hyd.pdb")
        triflic_acid_pdbqt_path = os.path.join(data_dir(),
                                               "triflic_acid_hyd.pdbqt")
        triflic_acid_pdb.load_from_files(triflic_acid_pdb_path,
                                         triflic_acid_pdbqt_path)
        sulfur_charges = (triflic_acid_pdb.identify_sulfur_charges())
        assert len(sulfur_charges) == 1
        assert not sulfur_charges[0].positive  # Should be negatively charged.

    def test_ligand_assign_aromatics(self):
        """
    TestPDB: Verify that aromatic rings in ligands are identified.
    """
        benzene_pdb = PDB()
        benzene_pdb_path = os.path.join(data_dir(), "benzene_hyd.pdb")
        benzene_pdbqt_path = os.path.join(data_dir(), "benzene_hyd.pdbqt")
        benzene_pdb.load_from_files(benzene_pdb_path, benzene_pdbqt_path)

        # A benzene should have exactly one aromatic ring.
        print benzene_pdb.aromatic_rings
        assert len(benzene_pdb.aromatic_rings) == 1
        # The first 6 atoms in the benzene pdb form the aromatic ring.
        assert (set(benzene_pdb.aromatic_rings[0].indices) == set(
            [1, 2, 3, 4, 5, 6]))

    def test_assign_secondary_structure(self):
        """
    TestPDB: Verify that secondary structure is assigned meaningfully.
    """
        # TODO(rbharath): This test is just a stub. Add a more realistic test
        # that checks that nontrivial secondary structure is computed correctly
        # here.
        self.prgr_pdb.assign_secondary_structure()

    def test_get_structure_dict(self):
        """
    TestPDB: Verify that dict with rudimentary structure labels is generated.

    TODO(rbharath): This is just a stub. Add some nontrivial tests here.
    """
        structures = self.prgr_pdb.get_structure_dict()
        print structures
        print len(structures)
def featurize_fingerprint(pdb_directories, pickle_out):
  """Featurize all pdbs in provided directories."""
  # Instantiate copy of binana vector
  binana = Binana()
  # See features/tests/nnscore_test.py:TestBinana.testComputeInputVector
  # for derivation.
  feature_len = binana.num_features()
  feature_vectors = {}
  for count, pdb_dir in enumerate(pdb_directories):
    print "\nprocessing %d-th pdb %s" % (count, dir)

    print "About to extract ligand and protein input files"
    ligand_pdb, ligand_pdbqt = None, None
    protein_pdb, protein_pdbqt = None, None
    for f in os.listdir(pdb_dir):
      if re.search("_ligand_hyd.pdb$", f):
        ligand_pdb = f
      elif re.search("_ligand_hyd.pdbqt$", f):
        ligand_pdbqt = f
      elif re.search("_protein_hyd.pdb$", f):
        protein_pdb = f
      elif re.search("_protein_hyd.pdbqt$", f):
        protein_pdbqt = f

    print "Extracted Input Files:"
    print (ligand_pdb, ligand_pdbqt, protein_pdb, protein_pdbqt)
    if (not ligand_pdb or not ligand_pdbqt or not protein_pdb or not
        protein_pdbqt):
        raise ValueError("Required files not present for %s" % pdb_dir)

    ligand_pdb_path = os.path.join(pdb_dir, ligand_pdb)
    ligand_pdbqt_path = os.path.join(pdb_dir, ligand_pdbqt)
    protein_pdb_path = os.path.join(pdb_dir, protein_pdb)
    protein_pdbqt_path = os.path.join(pdb_dir, protein_pdbqt)

    print "About to load ligand from input files"
    ligand_pdb_obj = PDB()
    ligand_pdb_obj.load_from_files(ligand_pdb_path, ligand_pdbqt_path)

    print "About to load protein from input files"
    protein_pdb_obj = PDB()
    protein_pdb_obj.load_from_files(protein_pdb_path, protein_pdbqt_path)

    print "About to generate feature vector."
    features = binana.compute_input_vector(ligand_pdb_obj,
        protein_pdb_obj)
    if len(features) != feature_len:
      raise ValueError("Feature length incorrect on %s" % pdb_dir)
    print "Feature vector generated correctly."

    print "About to compute ligand smiles string."
    ligand_mol = Chem.MolFromPDBFile(ligand_pdb_path)
    # TODO(rbharath): Why does this fail sometimes?
    if ligand_mol is None:
      continue
    smiles = Chem.MolToSmiles(ligand_mol)

    print "About to compute sequence."
    protein = md.load(protein_pdb_path)
    seq = [r.name for r in protein.top.residues] 

    # Write the computed quantities
    feature_vectors[pdb_dir] = (features, smiles, seq)
  print "About to write pickle to " + pickle_out
  with open(pickle_out, "wb") as f:
    pickle.dump(feature_vectors, f)
Exemple #35
0
class TestPDB(unittest.TestCase):
  """"
  Test PDB class.
  """

  def setUp(self):
    """
    Instantiate a dummy PDB file.
    """
    self.temp_dir = tempfile.mkdtemp()
    self.pdb = PDB()

    _, self.pdb_filename = tempfile.mkstemp(suffix=".pdb",
        dir=self.temp_dir)

    self.prgr_pdb = PDB()
    prgr_pdb_path = os.path.join(data_dir(), "prgr_hyd.pdb")
    prgr_pdbqt_path = os.path.join(data_dir(), "prgr_hyd.pdbqt")
    self.prgr_pdb.load_from_files(prgr_pdb_path, prgr_pdbqt_path)

    self._1r5y_protein = PDB()
    _1r5y_protein_pdb = os.path.join(data_dir(), "1r5y_protein_hyd.pdb")
    _1r5y_protein_pdbqt = os.path.join(data_dir(), "1r5y_protein_hyd.pdbqt")
    self._1r5y_protein.load_from_files(_1r5y_protein_pdb, _1r5y_protein_pdbqt)

    self.proteins = [("prgr", self.prgr_pdb), ("1r5y", self._1r5y_protein)]



  def tearDown(self):
    """
    Delete temporary directory.
    """
    shutil.rmtree(self.temp_dir)

  def test_add_new_atom(self):
    """
    TestPDB: Verifies that new atoms can be added.
    """
    # Verify that no atoms are present when we start.
    assert len(self.pdb.all_atoms.keys()) == 0
    empty_atom = Atom()
    self.pdb.add_new_atom(empty_atom)
    # Verify that we now have one atom
    assert len(self.pdb.all_atoms.keys()) == 1

  def test_get_residues(self):
    """
    TestPDB: Tests that all residues in PDB are identified.
    """
    residues = self.prgr_pdb.get_residues()
    # prgr.pdb has 280 unique residues
    assert len(residues.keys()) == 280
    prgr_residues = ["LEU", "ILE", "ASN", "LEU", "LEU", "MET", "SER",
        "ILE", "GLU", "PRO", "ASP", "VAL", "ILE", "TYR", "ALA", "GLY", "HIS",
        "ASP", "THR", "SER", "SER", "SER", "LEU", "LEU", "THR", "SER", "LEU",
        "ASN", "GLN", "LEU", "GLY", "GLU", "ARG", "GLN", "LEU", "LEU", "SER",
        "VAL", "VAL", "LYS", "TRP", "SER", "LYS", "SER", "LEU", "PRO", "GLY",
        "PHE", "ARG", "LEU", "HIS", "ILE", "ASP", "ASP", "GLN", "ILE", "THR",
        "LEU", "ILE", "GLN", "TYR", "SER", "TRP", "MET", "SER", "LEU", "MET",
        "VAL", "PHE", "GLY", "LEU", "GLY", "TRP", "ARG", "SER", "TYR", "LYS",
        "HIS", "VAL", "SER", "GLY", "GLN", "MET", "LEU", "TYR", "PHE", "ALA",
        "PRO", "ASP", "LEU", "ILE", "LEU", "ASN", "GLU", "GLN", "ARG", "MET",
        "LYS", "GLU", "PHE", "TYR", "SER", "LEU", "CYS", "LEU", "THR", "MET",
        "TRP", "GLN", "ILE", "PRO", "GLN", "GLU", "PHE", "VAL", "LYS", "LEU",
        "GLN", "VAL", "SER", "GLN", "GLU", "GLU", "PHE", "LEU", "CYS", "MET",
        "LYS", "VAL", "LEU", "LEU", "LEU", "LEU", "ASN", "THR", "ILE", "PRO",
        "LEU", "GLU", "GLY", "LEU", "PHE", "MET", "ARG", "TYR", "ILE", "GLU",
        "LEU", "ALA", "ILE", "ARG", "ARG", "PHE", "TYR", "GLN", "LEU", "THR",
        "LYS", "LEU", "LEU", "ASP", "ASN", "LEU", "HIS", "ASP", "LEU", "VAL",
        "LYS", "GLN", "LEU", "HIS", "LEU", "TYR", "CYS", "LEU", "ASN", "THR",
        "PHE", "ILE", "GLN", "SER", "ARG", "ALA", "LEU", "SER", "VAL", "GLU",
        "PHE", "PRO", "GLU", "MET", "MET", "SER", "GLU", "VAL", "ILE", "ALA",
        "ALA", "GLN", "LEU", "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET",
        "VAL", "LYS", "PRO", "LEU", "LEU", "PHE", "HIS", "LYS", "ASN", "LEU",
        "ASP", "ASP", "ILE", "THR", "LEU", "ILE", "GLN", "TYR", "SER", "TRP",
        "MET", "THR", "ILE", "PRO", "LEU", "GLU", "GLY", "LEU", "ARG", "VAL",
        "LYS", "GLN", "LEU", "HIS", "LEU", "TYR", "CYS", "LEU", "ASN", "THR",
        "PHE", "ILE", "GLN", "SER", "ARG", "ALA", "LEU", "SER", "VAL", "GLU",
        "PHE", "PRO", "GLU", "MET", "MET", "SER", "GLU", "VAL", "ILE", "ALA",
        "ALA", "GLN", "LEU", "PRO", "LYS", "ILE", "LEU", "ALA", "GLY", "MET",
        "VAL", "LYS", "PRO"]
    # Recall the keys have format RESNAME_RESNUMBER_CHAIN
    resnames = [reskey.split("_")[0].strip() for reskey in residues]
    resnames.sort()
    prgr_residues.sort()
    assert resnames == prgr_residues
    # prgr.pdb has 2749 unique atoms.
    atom_count = 0
    for (_, atom_indices) in residues.iteritems():
      atom_count += len(atom_indices)
    print atom_count
    assert atom_count == 2788

  def test_get_lysine_charges(self):
    """
    TestPDB: Test that lysine charges are identified correctly.
    """
    res_list = self.prgr_pdb.get_residues()
    lysine_charges = self.prgr_pdb.get_lysine_charges(res_list)
    # prgr has 14 lysines.
    print len(lysine_charges)
    assert len(lysine_charges) == 14
    for charge in lysine_charges:
      # Lysine should be posistively charged
      assert charge.positive

  def test_get_arginine_charges(self):
    """
    TestPDB: Test that arginine charges are identified correctly.
    """
    res_list = self.prgr_pdb.get_residues()
    arginine_charges = self.prgr_pdb.get_arginine_charges(res_list)
    # prgr has 10 arginines
    assert len(arginine_charges) == 10
    for charge in arginine_charges:
      # The guanidium in arginine should be positively charged.
      assert charge.positive

  def test_get_histidine_charges(self):
    """
    TestPDB: Test that histidine charges are identified correctly.
    """
    res_list = self.prgr_pdb.get_residues()
    histidine_charges = self.prgr_pdb.get_histidine_charges(res_list)
    # prgr has 7 arginines
    assert len(histidine_charges) == 7
    for charge in histidine_charges:
      # The nitrogens pick up positive charges
      assert charge.positive

  def test_get_glutamic_acid_charges(self):
    """
    TestPDB: Test that glutamic acid charges are identified correctly.
    """
    res_list = self.prgr_pdb.get_residues()
    glutamic_acid_charges = self.prgr_pdb.get_glutamic_acid_charges(res_list)
    assert len(glutamic_acid_charges) == 16
    for charge in glutamic_acid_charges:
      # The carboxyls get deprotonated.
      assert not charge.positive

  def test_get_aspartic_acid_charges(self):
    """
    TestPDB: Test that aspartic acid charges are identified correctly.
    """
    res_list = self.prgr_pdb.get_residues()
    aspartic_acid_charges = self.prgr_pdb.get_aspartic_acid_charges(res_list)
    assert len(aspartic_acid_charges) == 9
    for charge in aspartic_acid_charges:
      # The carboxyls get deprotonated
      assert not charge.positive

  def test_assign_ligand_aromatics(self):
    """
    TestPDB: Test that non-protein aromatic rings are assigned correctly.
    """
    ### 3ao4 comes from PDBBind-CN and contains some cruft in the PDB file:
    ### atoms without residues labelled. This triggered some problems with
    ### non-protein aromatics complaining.
    # TODO(rbharath): Add a stub here.
    _3ao4_protein = PDB()
    _3ao4_protein_pdb = os.path.join(data_dir(), "3ao4_protein_hyd.pdb")
    _3ao4_protein_pdbqt = os.path.join(data_dir(), "3ao4_protein_hyd.pdbqt")
    _3ao4_protein.load_from_files(_3ao4_protein_pdb, _3ao4_protein_pdbqt)

  def test_remove_redundant_rings(self):
    """
    TestPDB: Test that redundant rings are removed.
    """
    # Recall that each ring is represented as a list of atom indices.
    # Test that rings of length 0 are removed
    assert remove_redundant_rings([[]]) == []
    # Set that supersets are removed
    assert (remove_redundant_rings([[1, 2, 3], [1, 3, 4, 5], [1, 2, 3, 4, 5]])
        == [[1, 2, 3], [1, 3, 4, 5]])
    # Ensure that duplicate rings are handled correctly (that is, only one
    # copy of a duplicate ring should remain)
    assert remove_redundant_rings([[1, 2, 3], [1, 3, 2]]) == [[1, 2, 3]]

  def test_assign_protein_aromatics(self):
    """
    TestPDB: Test that aromatic rings are assigned correctly.
    """
    for name, protein in self.proteins:
      # The proteins should have aromatic rings assigned already by
      # load_from_files()
      print "Processing aromatics for %s" % name
      for aromatic in protein.aromatic_rings:
        assert aromatic is not None

  def test_get_phenylalanine_aromatics(self):
    """
    TestPDB: Test that phenylalanine aromatic rings are retrieved.
    """
    res_list = self.prgr_pdb.get_residues()
    phenylalanine_aromatics = (
        self.prgr_pdb.get_phenylalanine_aromatics(res_list))

    # prgr has 13 phenylalanines, each of which has 1 aromatic ring.
    assert len(phenylalanine_aromatics) == 13
    for aromatic in phenylalanine_aromatics:
      # The aromatic rings in phenylalanine have 6 elements each
      assert len(aromatic.indices) == 6

  def test_get_tyrosine_aromatics(self):
    """
    TestPDB: Test that tyrosine aromatic rings are retrieved.
    """
    # prgr has 10 tyrosines, each of which has 1 aromatic ring.
    res_list = self.prgr_pdb.get_residues()
    tyrosine_aromatics = self.prgr_pdb.get_tyrosine_aromatics(res_list)
    assert len(tyrosine_aromatics) == 10
    for aromatic in tyrosine_aromatics:
      # The aromatic rings in tyrosine have 6 elements each
      assert len(aromatic.indices) == 6

  def test_get_histidine_aromatics(self):
    """
    TestPDB: Test that histidine aromatic rings are retrieved.
    """
    res_list = self.prgr_pdb.get_residues()
    histidine_aromatics = self.prgr_pdb.get_histidine_aromatics(res_list)
    # prgr has 7 histidines, each of which has 1 aromatic ring.
    assert len(histidine_aromatics) == 7
    for aromatic in histidine_aromatics:
      # The aromatic rings in histidine have 6 elements each
      print len(aromatic.indices)
      assert len(aromatic.indices) == 5

  def test_get_tryptophan_aromatics(self):
    """
    TestPDB: Test that tryptophan aromatic rings are retrieved.
    """
    res_list = self.prgr_pdb.get_residues()
    tryptophan_aromatics = self.prgr_pdb.get_tryptophan_aromatics(res_list)
    # prgr has 5 tryptophans, each of which has 2 aromatic ring.
    print len(tryptophan_aromatics)
    assert len(tryptophan_aromatics) == 10 
    num_five_rings, num_six_rings = 0, 0
    for aromatic in tryptophan_aromatics:
      # One aromatic ring in tryptophan hahas 6 elements each,
      # while the other has 5 elements.
      if len(aromatic.indices) == 6:
        num_six_rings += 1
      elif len(aromatic.indices) == 5:
        num_five_rings += 1
    assert num_six_rings == 5
    assert num_five_rings == 5

  def test_connected_atoms(self):
    """
    TestPDB: Verifies that connected atom retrieval works.
    """
    # Verify that no atoms are present when we start.
    assert len(self.pdb.all_atoms.keys()) == 0
    carbon_atom = Atom(element="C")
    oxygen_atom = Atom(element="O")
    hydrogen_atom = Atom(element="H")

    self.pdb.add_new_atom(carbon_atom)
    self.pdb.add_new_atom(oxygen_atom)
    self.pdb.add_new_atom(hydrogen_atom)

    # We want a carboxyl, so C connects O and H
    carbon_atom.indices_of_atoms_connecting = [2, 3]
    oxygen_atom.indices_of_atoms_connecting = [1]
    hydrogen_atom.indices_of_atoms_connecting = [1]

    connected_oxygens = self.pdb.connected_atoms(1, "O")
    assert len(connected_oxygens) == 1

    connected_hydrogens = self.pdb.connected_atoms(1, "H")
    assert len(connected_hydrogens) == 1

  def test_load_bonds_from_pdb(self):
    """
    TestPDB: Verifies that bonds can be loaded from PDB.
    """
    pdb = PDB()
    # Test that we can load CO2
    carbon_atom = Atom(element="C")
    oxygen_atom_1 = Atom(element="O")
    oxygen_atom_2 = Atom(element="O")

    pdb.add_new_atom(carbon_atom)
    pdb.add_new_atom(oxygen_atom_1)
    pdb.add_new_atom(oxygen_atom_2)
    lines = [
      "CONECT    1    2    3                                                 "
      "CONECT    2                                                           "
      "CONECT    3                                                           "
    ]
    with tempfile.NamedTemporaryFile() as temp:
      temp.write("\n".join(lines))
      temp.flush()
      pdb.load_bonds_from_pdb(temp.name)
    assert len(carbon_atom.indices_of_atoms_connecting) == 2
    assert len(oxygen_atom_1.indices_of_atoms_connecting) == 0
    assert len(oxygen_atom_2.indices_of_atoms_connecting) == 0


  def test_connected_heavy_atoms(self):
    """
    TestPDB: Verifies retrieval of connected heavy atoms.
    """
    # Verify that no atoms are present when we start.
    assert len(self.pdb.all_atoms.keys()) == 0
    carbon_atom = Atom(element="C")
    oxygen_atom = Atom(element="O")
    hydrogen_atom = Atom(element="H")

    self.pdb.add_new_atom(carbon_atom)
    self.pdb.add_new_atom(oxygen_atom)
    self.pdb.add_new_atom(hydrogen_atom)

    # We want a carboxyl, so C connects O and H
    carbon_atom.indices_of_atoms_connecting = [2, 3]
    oxygen_atom.indices_of_atoms_connecting = [1]
    hydrogen_atom.indices_of_atoms_connecting = [1]

    connected_heavy_atoms = self.pdb.connected_heavy_atoms(1)
    assert len(connected_heavy_atoms) == 1
    assert connected_heavy_atoms[0] == 2

  def test_assign_non_protein_charges(self):
    """
    TestPDB: Verify that charges are properly added to ligands.
    """
    # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
    # There should be 3 charged groups, two positive, one negative
    ammonium_sulfate_pdb = PDB()
    ammonium_sulfate_pdb_path = os.path.join(data_dir(),
        "ammonium_sulfate_hyd.pdb")
    ammonium_sulfate_pdbqt_path = os.path.join(data_dir(),
        "ammonium_sulfate_hyd.pdbqt")
    # Notice that load automatically identifies non-protein charges.
    ammonium_sulfate_pdb.load_from_files(
        ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path)
    assert len(ammonium_sulfate_pdb.charges) == 3
    num_pos, num_neg = 0, 0
    for charge in ammonium_sulfate_pdb.charges:
      if charge.positive:
        num_pos += 1
      else:
        num_neg += 1
    assert num_pos == 2
    assert num_neg == 1

  def test_metallic_charges(self):
    """
    TestPDB: Verify that non-protein charges are assigned properly.
    """
    # Test metallic ion charge.
    magnesium_pdb = PDB()
    magnesium_atom = Atom(element="MG",
        coordinates=Point(coords=np.array([0,0,0])))
    magnesium_pdb.add_new_non_protein_atom(magnesium_atom)
    metallic_charges = magnesium_pdb.identify_metallic_charges()
    assert len(metallic_charges) == 1

  def test_nitrogen_charges(self):
    """
    TestPDB: Verify that nitrogen groups are charged correctly.
    """
    # Test ammonium sulfate: (NH4)+(NH4)+(SO4)(2-)
    # The labeling should pick up 2 charged nitrogen groups for two
    # ammoniums.
    ammonium_sulfate_pdb = PDB()
    ammonium_sulfate_pdb_path = os.path.join(data_dir(),
        "ammonium_sulfate_hyd.pdb")
    ammonium_sulfate_pdbqt_path = os.path.join(data_dir(),
        "ammonium_sulfate_hyd.pdbqt")
    ammonium_sulfate_pdb.load_from_files(
        ammonium_sulfate_pdb_path, ammonium_sulfate_pdbqt_path)
    nitrogen_charges = ammonium_sulfate_pdb.identify_nitrogen_charges()
    assert len(nitrogen_charges) == 2
    assert nitrogen_charges[0].positive  # Should be positive
    assert nitrogen_charges[1].positive  # Should be positive

    # Test pyrrolidine (CH2)4NH. The nitrogen here should be sp3
    # hybridized, so is likely to pick up an extra proton to its nitrogen
    # at physiological pH.
    pyrrolidine_pdb = PDB()
    pyrrolidine_pdb_path = os.path.join(data_dir(),
        "pyrrolidine_hyd.pdb")
    pyrrolidine_pdbqt_path = os.path.join(data_dir(),
        "pyrrolidine_hyd.pdbqt")
    pyrrolidine_pdb.load_from_files(pyrrolidine_pdb_path,
        pyrrolidine_pdbqt_path)
    nitrogen_charges = pyrrolidine_pdb.identify_nitrogen_charges()
    assert len(nitrogen_charges) == 1
    assert nitrogen_charges[0].positive  # Should be positive

  def test_carbon_charges(self):
    """
    TestPDB: Verify that carbon groups are charged correctly.
    """
    # Guanidine is positively charged at physiological pH
    guanidine_pdb = PDB()
    guanidine_pdb_path = os.path.join(data_dir(),
        "guanidine_hyd.pdb")
    guanidine_pdbqt_path = os.path.join(data_dir(),
        "guanidine_hyd.pdbqt")
    guanidine_pdb.load_from_files(
        guanidine_pdb_path, guanidine_pdbqt_path)
    carbon_charges = guanidine_pdb.identify_carbon_charges()
    assert len(carbon_charges) == 1
    assert carbon_charges[0].positive  # Should be positive

    # sulfaguanidine contains a guanidine group that is likely to be
    # positively protonated at physiological pH
    sulfaguanidine_pdb = PDB()
    sulfaguanidine_pdb_path = os.path.join(data_dir(),
        "sulfaguanidine_hyd.pdb")
    sulfaguanidine_pdbqt_path = os.path.join(data_dir(),
        "sulfaguanidine_hyd.pdbqt")
    sulfaguanidine_pdb.load_from_files(
        sulfaguanidine_pdb_path, sulfaguanidine_pdbqt_path)
    carbon_charges = sulfaguanidine_pdb.identify_carbon_charges()
    assert len(carbon_charges) == 1
    assert carbon_charges[0].positive  # Should be positive

    # Formic acid is a carboxylic acid, which should be negatively charged.
    formic_acid_pdb = PDB()
    formic_acid_pdb_path = os.path.join(data_dir(),
        "formic_acid_hyd.pdb")
    formic_acid_pdbqt_path = os.path.join(data_dir(),
        "formic_acid_hyd.pdbqt")
    formic_acid_pdb.load_from_files(
        formic_acid_pdb_path, formic_acid_pdbqt_path)
    carbon_charges = formic_acid_pdb.identify_carbon_charges()
    assert len(carbon_charges) == 1
    assert not carbon_charges[0].positive  # Should be negatively charged.

  def test_phosphorus_charges(self):
    """
    TestPDB: Verify that Phosphorus groups are charged correctly.
    """
    # CID82671 contains a phosphate between two aromatic groups.
    phosphate_pdb = PDB()
    phosphate_pdb_path = os.path.join(data_dir(),
      "82671_hyd.pdb")
    phosphate_pdbqt_path = os.path.join(data_dir(),
      "82671_hyd.pdb")
    phosphate_pdb.load_from_files(
        phosphate_pdb_path, phosphate_pdbqt_path)
    phosphorus_charges = phosphate_pdb.identify_phosphorus_charges()
    assert len(phosphorus_charges) == 1
    assert not phosphorus_charges[0].positive  # Should be negatively charged.


  def test_sulfur_charges(self):
    """
    TestPDB: Verify that sulfur groups are charged correctly.
    """
    triflic_acid_pdb = PDB()
    triflic_acid_pdb_path = os.path.join(data_dir(),
      "triflic_acid_hyd.pdb")
    triflic_acid_pdbqt_path = os.path.join(data_dir(),
      "triflic_acid_hyd.pdbqt")
    triflic_acid_pdb.load_from_files(
      triflic_acid_pdb_path,
      triflic_acid_pdbqt_path)
    sulfur_charges = (
        triflic_acid_pdb.identify_sulfur_charges())
    assert len(sulfur_charges) == 1
    assert not sulfur_charges[0].positive  # Should be negatively charged.


  def test_ligand_assign_aromatics(self):
    """
    TestPDB: Verify that aromatic rings in ligands are identified.
    """
    benzene_pdb = PDB()
    benzene_pdb_path = os.path.join(data_dir(), "benzene_hyd.pdb")
    benzene_pdbqt_path = os.path.join(data_dir(), "benzene_hyd.pdbqt")
    benzene_pdb.load_from_files(benzene_pdb_path, benzene_pdbqt_path)

    # A benzene should have exactly one aromatic ring.
    print benzene_pdb.aromatic_rings
    assert len(benzene_pdb.aromatic_rings) == 1
    # The first 6 atoms in the benzene pdb form the aromatic ring.
    assert (set(benzene_pdb.aromatic_rings[0].indices)
         == set([1,2,3,4,5,6]))

  def test_assign_secondary_structure(self):
    """
    TestPDB: Verify that secondary structure is assigned meaningfully.
    """
    # TODO(rbharath): This test is just a stub. Add a more realistic test
    # that checks that nontrivial secondary structure is computed correctly
    # here.
    self.prgr_pdb.assign_secondary_structure()
    

  def test_get_structure_dict(self):
    """
    TestPDB: Verify that dict with rudimentary structure labels is generated.

    TODO(rbharath): This is just a stub. Add some nontrivial tests here.
    """
    structures = self.prgr_pdb.get_structure_dict()
    print structures
    print len(structures)