Beispiel #1
0
class PDBTransformer:
  def __init__(self):
    self.protein = None
    self.ligand = None
    self.system = None
    self.box = None
    self.box_x = 100000.0
    self.box_y = 100000.0
    self.box_z = 100000.0


  def transform(self, protein_pdb, protein_pdbqt, ligand_pdb, ligand_pdbqt, 
                save_dir, box_x, box_y, box_z, nb_rotations = 0, nb_reflections=0):
    '''Takes as input files (strings) for pdb/pdbqt of the protein, pdb/pdbqt of the ligand, a filename to be saved 
    of the merged system called system_pdb, a filename to be saved of the box called box_pdb, a filename with a .p extension
    of the box in pickle format called box_pickle, and 3 floats for the x,y,z dimensions of the box

    This function then computes the centroid of the ligand; decrements this centroid from the atomic coordinates of protein and
    ligand atoms, and then merges the translated protein and ligand. This combined system/complex is then saved.

    This function then removes all atoms outside of the given box dimensions, and saves the resulting box in PDB file format
    as well as in a pickle format of the box's instance of the PDB object.
    '''


    self.protein = PDB()
    self.protein.load_from_files(protein_pdb, protein_pdbqt)

    self.ligand = PDB()
    self.ligand.load_from_files(ligand_pdb, ligand_pdbqt)

    self.box_x = float(box_x)
    self.box_y = float(box_y)
    self.box_z = float(box_z)

    protein_name = str(protein_pdb).split("/")[len(str(protein_pdb).split("/"))-2]
    system_pdb_file = "%s/%s.pdb" %(save_dir, protein_name)
    system_pickle_file = "%s/%s.pickle" %(save_dir, protein_name)

    ligand_centroid = compute_centroid(self.ligand)
    self.ligand = self.subtract_centroid(self.ligand, ligand_centroid)
    self.protein = self.subtract_centroid(self.protein, ligand_centroid)

    self.system = self.merge_molecules(self.protein, self.ligand)
    original_system = deepcopy(self.system)
    self.box = self.generate_box(original_system)
    original_box = deepcopy(self.box)

    transformed_systems = {}
    transformed_boxes = {}
    transformed_systems[(0,0)] = original_system
    transformed_boxes[(0,0)] = original_box

    for i in range(0,int(nb_rotations)):
      rotated_system = self.rotate_molecule(original_system)
      transformed_systems[(i+1,0)] =  rotated_system
      for j in range(0,int(nb_reflections)):
        reflected_system = self.reflect_molecule(rotated_system)
        transformed_systems[(i+1,j+1)] = reflected_system

    transformed_boxes = {}
    for key, transformed_system in transformed_systems.iteritems(): 
      transformed_box = self.generate_box(transformed_system)
      transformed_boxes[key] = transformed_box

    for key, transformed_system in transformed_systems.iteritems():
      print(key)
      print("%s/%s_%d_%d_system.pdb" %(save_dir, protein_name, key[0], key[1]))
      transformed_system.save_pdb("%s/%s_%d_%d_system.pdb" %(save_dir, protein_name, key[0], key[1]))
      pickle.dump(transformed_system, open("%s/%s_%d_%d_system.pickle" %(save_dir, protein_name, key[0], key[1]), "wb"))

    for key, transformed_box in transformed_boxes.iteritems():
      transformed_box.save_pdb("%s/%s_%d_%d_box.pdb" %(save_dir, protein_name, key[0], key[1]))
      pickle.dump(transformed_box, open("%s/%s_%d_%d_box.pickle" %(save_dir, protein_name, key[0], key[1]), "wb"))

    return(transformed_boxes)



  def generate_box(self, mol):
    '''
    generate_box takes as input a molecule of class PDB and removes all atoms outside of the given box dims
    '''
    molecule = deepcopy(mol)
    atoms_to_remove = []
    for index, atom in molecule.all_atoms.iteritems():
      coords = np.abs(atom.coordinates.coords)
      if coords[0] >= (self.box_x/2.) or coords[1] >= (self.box_y/2.) or coords[2] >= (self.box_z/2.):
        atoms_to_remove.append((index, atom))

    for index, atom in atoms_to_remove:
      molecule = self.remove_atom(molecule, index, atom)

    return(molecule)

  def remove_atom(self, molecule, atom_index, atom):
    '''
    remove_atom works by simply deleting the entry corresponding to that atom from the 
    molecule.all_atoms dictionary.
    '''


    if molecule.all_atoms[atom_index] != atom:
      print("Removing atoms from dictionary not safe!!")
      return

    del molecule.all_atoms[atom_index]
    return(molecule)

  def subtract_centroid(self, molecule, centroid):
    '''
    subtracts the centroid, a numpy array of dim 3, from all coordinates of all atoms in the molecule
    '''


    for atom in molecule.all_atoms:
      molecule.all_atoms[atom].coordinates.coords -= centroid
    return(molecule)

  def rotate_molecule(self, mol):
    '''
    Pseudocode:
    1. Generate random rotation matrix. This matrix applies a random transformation to any 
      3-vector such that, were the random transformation repeatedly applied, it would randomly
      sample along the surface of a sphere with radius equal to the norm of the given 3-vector
      cf. generate_random_rotation_matrix() for details
    2. Apply R to all atomic coordinatse. 
    3. Return rotated molecule
    '''
    molecule = deepcopy(mol)
    R = generate_random_rotation_matrix()
    all_coordinates = np.column_stack([molecule.all_atoms[index].coordinates.coords for index in molecule.all_atoms.iterkeys()])
    rotated_coordinates = np.dot(R, all_coordinates)
    for j, index in enumerate(molecule.all_atoms.iterkeys()):
      molecule.all_atoms[index].coordinates.coords = rotated_coordinates[:,j]

    return(molecule)

  def reflect_molecule(self, mol):
    '''
    Pseudocode:
    1. Generate unit vector that is randomly distributed around 3-sphere
    2. For each atom, project its coordinates onto the random unit vector from (1),
      and subtract twice the projection from the original coordinates to obtain its reflection 
    '''
    molecule = deepcopy(mol)
    a = generate_random_unit_vector()
    for index, atom in molecule.all_atoms.iteritems():
      v = atom.coordinates.coords 
      reflected_coords = v - 2. * (np.dot(a,v) / (np.dot(a,a)) * a)
      molecule.all_atoms[index].coordinates.coords = reflected_coords
    return(molecule)

  def merge_molecules(self, protein, ligand):
    '''
    Takes as input protein and ligand objects of class PDB and adds ligand atoms to the protein,
    and returns the new instance of class PDB called system that contains both sets of atoms.
    '''

    system = deepcopy(protein)
    greatest_index = len(protein.all_atoms) + 1
    autoindex = greatest_index + 1
    for index, ligand_atom in ligand.all_atoms.iteritems():
      system.add_new_atom(ligand_atom)
    return(system)
Beispiel #2
0
  def transform(self, protein_pdb, protein_pdbqt, ligand_pdb, ligand_pdbqt, 
                save_dir, box_x, box_y, box_z, nb_rotations = 0, nb_reflections=0):
    '''Takes as input files (strings) for pdb/pdbqt of the protein, pdb/pdbqt of the ligand, a filename to be saved 
    of the merged system called system_pdb, a filename to be saved of the box called box_pdb, a filename with a .p extension
    of the box in pickle format called box_pickle, and 3 floats for the x,y,z dimensions of the box

    This function then computes the centroid of the ligand; decrements this centroid from the atomic coordinates of protein and
    ligand atoms, and then merges the translated protein and ligand. This combined system/complex is then saved.

    This function then removes all atoms outside of the given box dimensions, and saves the resulting box in PDB file format
    as well as in a pickle format of the box's instance of the PDB object.
    '''


    self.protein = PDB()
    self.protein.load_from_files(protein_pdb, protein_pdbqt)

    self.ligand = PDB()
    self.ligand.load_from_files(ligand_pdb, ligand_pdbqt)

    self.box_x = float(box_x)
    self.box_y = float(box_y)
    self.box_z = float(box_z)

    protein_name = str(protein_pdb).split("/")[len(str(protein_pdb).split("/"))-2]
    system_pdb_file = "%s/%s.pdb" %(save_dir, protein_name)
    system_pickle_file = "%s/%s.pickle" %(save_dir, protein_name)

    ligand_centroid = compute_centroid(self.ligand)
    self.ligand = self.subtract_centroid(self.ligand, ligand_centroid)
    self.protein = self.subtract_centroid(self.protein, ligand_centroid)

    self.system = self.merge_molecules(self.protein, self.ligand)
    original_system = deepcopy(self.system)
    self.box = self.generate_box(original_system)
    original_box = deepcopy(self.box)

    transformed_systems = {}
    transformed_boxes = {}
    transformed_systems[(0,0)] = original_system
    transformed_boxes[(0,0)] = original_box

    for i in range(0,int(nb_rotations)):
      rotated_system = self.rotate_molecule(original_system)
      transformed_systems[(i+1,0)] =  rotated_system
      for j in range(0,int(nb_reflections)):
        reflected_system = self.reflect_molecule(rotated_system)
        transformed_systems[(i+1,j+1)] = reflected_system

    transformed_boxes = {}
    for key, transformed_system in transformed_systems.iteritems(): 
      transformed_box = self.generate_box(transformed_system)
      transformed_boxes[key] = transformed_box

    for key, transformed_system in transformed_systems.iteritems():
      print(key)
      print("%s/%s_%d_%d_system.pdb" %(save_dir, protein_name, key[0], key[1]))
      transformed_system.save_pdb("%s/%s_%d_%d_system.pdb" %(save_dir, protein_name, key[0], key[1]))
      pickle.dump(transformed_system, open("%s/%s_%d_%d_system.pickle" %(save_dir, protein_name, key[0], key[1]), "wb"))

    for key, transformed_box in transformed_boxes.iteritems():
      transformed_box.save_pdb("%s/%s_%d_%d_box.pdb" %(save_dir, protein_name, key[0], key[1]))
      pickle.dump(transformed_box, open("%s/%s_%d_%d_box.pickle" %(save_dir, protein_name, key[0], key[1]), "wb"))

    return(transformed_boxes)