def align_structs(id1, chain1, id2, chain2): """ the main function. gets the ids and the chain's names and finds the alignment with the best RMSD. prints the best RMSD, and saving the alignments file in cif format :param id1: the first file id :param chain1: the first protein's chain :param id2: the second file id :param chain2: the second protein's chain """ # generating the relevant data lst = pdb.PDBList() protein1 = lst.retrieve_pdb_file(id1) protein2 = lst.retrieve_pdb_file(id2) parser = pdb.MMCIFParser() struct1 = parser.get_structure("p1", protein1) struct2 = parser.get_structure("p2", protein2) # creating a lists of CA atoms to align atoms1 = create_atoms_list(struct1, chain1) atoms2 = create_atoms_list(struct2, chain2) if len(atoms1) != len(atoms2): atoms1, atoms2 = bonus_9_2(chain1, chain2, struct1, struct2) # making the align super_imposer = pdb.Superimposer() super_imposer.set_atoms(atoms1, atoms2) super_imposer.apply(struct2[0].get_atoms()) print(super_imposer.rms) # saving the aligned structure to files saving_file(id1, struct1) saving_file(id2, struct2)
class Protein: """Instance is one protein""" # Initialize modules length, read into memory. modules_path = './data/modules-length' with open(modules_path, 'r') as f: modules_length = dict() for line in f: words = line.split() modules_length[words[0]] = int(words[1]) # Set up biopyhton PDB parser parser = PDB.PDBParser() # Set up biopython Superimposer sup = PDB.Superimposer() @staticmethod def kabsch(q, p, modules_range=None): """Calculate RMSD between parts of two proteins using the Kabsch method. :param q: First instance of Protein. :param p: Second instance of Protein. :param modules_range: Range of the modules that should be considered. If not given take the whole proteins. :return: RMSD between the proteins for the given range. """ assert len(q.residue_chain) == len(p.residue_chain) if modules_range is None: Protein.sup.set_atoms(q.residue_chain, p.residue_chain) return Protein.sup.rms else: modules_rmsd = [] for i in modules_range: start = sum(q.modules_sections[:i]) end = start + q.modules_sections[i] Protein.sup.set_atoms(q.residue_chain[start:end], p.residue_chain[start:end]) modules_rmsd.append(Protein.sup.rms) return modules_rmsd def __init__(self, structure_name, pdb_path, json_path, strict=True): self.name = structure_name self.path = pdb_path self.structure = Protein.parser.get_structure(self.name, self.path) with open(json_path, 'r') as f: self.modules_chain = json.load(f)['nodes'] self.modules_sections = [ Protein.modules_length[x + '.pdb'] for x in self.modules_chain ] self.residue_chain = list() for residue in self.structure.get_residues(): self.residue_chain.append(residue['CA']) if strict: assert sum(self.modules_sections) == len(self.residue_chain)
def __call__(self): super_imposer = PDB.Superimposer() super_imposer.set_atoms( self.ref_atoms, self.alt_atoms, ) return super_imposer
def superimpose_and_rotate(eq_chain1, eq_chain2, moving_chain, curr_struct, rec_level_complex): """Superimpose 2 chains and add another with the rotation parameters obtained. Return structure object with added chain, information about clashes and a flag for having added something. Keyword arguments: eq_chain1 -- common chain in the current structure (curr_struct) eq_chain2 -- common chain in the structure from which a chain wants to be added (moving_chain) moving_chain -- chain that may be added to the current complex rec_level_complex -- recursion level of building the complex filename2 -- name of the file that contains the moving_chain """ # all residues from same chain (common chain) are retrieved from the 2 structures. Example: chain A res_chain1 = list(eq_chain1.get_residues()) res_chain2 = list(eq_chain2.get_residues()) # get the atoms of the previous list, ONLY belonging to common RESIDUES! to be then able to superimpose # so first we obtain a list of the common residues common_res_s1 = get_list_of_common_res(res_chain1, res_chain2) common_res_s2 = get_list_of_common_res(res_chain2, res_chain1) # then we obtain a list of atom objects to use it later common_atoms_s1 = get_atom_list_from_res_list(common_res_s1) common_atoms_s2 = get_atom_list_from_res_list(common_res_s2) # debug if len(common_atoms_s1) != len(common_atoms_s2): return curr_struct, 0, False, set(), moving_chain # use the Superimposer sup = pdb.Superimposer() # first argument is fixed, second is moving. both are lists of Atom objects sup.set_atoms(common_atoms_s1, common_atoms_s2) rms = sup.rms # rotate moving atoms sup.apply(list(moving_chain.get_atoms())) # add to the fixed structure, the moved chain added = 0 clash, clashing_chains = is_steric_clash(curr_struct, moving_chain) # something is added if there's no clashes and the RMSD is very low, indicating that the two chains are actually the same if not clash and rms <= 3.0: my_id = moving_chain.id chain_names = [x.id for x in curr_struct[0].get_chains()] while added == 0: rand = ( create_random_chars_id(6), str(rec_level_complex) ) # random ID + a number that indicates the recursion level at which this chain has been added if my_id + rand not in chain_names: moving_chain.id = tuple(list(my_id) + list(rand)) curr_struct[0].add(moving_chain) added = 1 return curr_struct, added, clash, clashing_chains, moving_chain
def Superimpose_Chain(reference, interaction, target, pairs, collisions_accepted, radius, percent): """Superimpose two chains with the same length and returns the related chain object moved accordingly Input: -reference = chain object used as reference -interaction = String with two chain names related between them -target = string chain name of the common chain -pairs = Pairs dictionary: -Keys = interaction pair -Values = dictionary: -Keys = Chain name -Values = Chain object -radius = integrer required for become the empty radius (measure in Amstrongs) around each atom, user input -collisions_accepted = number of atom collisions allowed in each join, user input -percent = similarity percentage accepted, user input Output: -mobile_chain = list of atoms from the mobile chain that have had their coordinates moved for superimposing return None if the chain can't be added to the actual model """ fixed_atoms, mobile_atoms = Check_Similarity(reference, pairs[interaction][target], percent) sup = pdb.Superimposer() #apply superimposer tool sup.set_atoms(fixed_atoms, mobile_atoms) #set both lists of atoms here to get rotation rotran = sup.rotran mobile_chain_name = interaction.replace( target, "", 1) # Obtain the chain name of the mobile one mobile_chain = cp.deepcopy(pairs[interaction][mobile_chain_name]) mobile_chain.transform(rotran[0], rotran[1]) model_atom_list = [] for chain in reference.get_parent(): model_atom_list.extend(chain.get_atoms()) addition_atom_list = list(mobile_chain.get_atoms()) collisions = Collision_Check(model_atom_list, addition_atom_list, radius) if len(collisions) > 0: s.argprint("Number of collisions:", s.options.verbose, s.options.quiet, 2) s.argprint(len(collisions), s.options.verbose, s.options.quiet, 2) if len(collisions) > collisions_accepted: e = s.CollisionAppears(target, mobile_chain_name, collisions) s.argprint(e, s.options.verbose, s.options.quiet, 2) s.argprint(e.Get_Collisions(), s.options.verbose, s.options.quiet, 3) return None else: return mobile_chain
def superimpose(self, hetid=None, within=8.0): """ The superimposition method. If optional `hetid` is supplied, only binding site residues will used for the superimposition. :param hetid: Ligand identifier :type hetid: str :param binding_site: flag :type binding_site: bool :param within: binding site cutoff distance :type within: float .. code-block:: python # Example useage from pdb_superimposer import ChainSuperimposer, Helper ref = Helper.protein_from_file("2VTA", "2VTA.pdb") ref_chain = [c for c in ref[0]][0] other = Helper.protein_from_file("6YLK", "6YLK.pdb") ref_chain = [c for c in other[0]][0] cs = ChainSuperimposer(reference=ref_chain, other=other_chain, other_struc=other) cs.superimpose() """ if hetid: # detect binding site bs = self.binding_site(chain=self.other, hetid=hetid, within=within) self.selected_index = self.selected_index.intersection(bs) super_imposer = PDB.Superimposer() # set the active atoms reference_atms = [ atm for resi in self.reference_seq.values() for atm in resi if resi.get_id()[1] in self.selected_index and atm.element != "H" ] other_atms = [ atm for resi in self.other_seq.values() for atm in resi if resi.get_id()[1] in self.selected_index and atm.element != "H" ] super_imposer.set_atoms(reference_atms, other_atms) # apply the transformation matrix to the whole chain super_imposer.apply(self.other_struc.get_atoms()) self.rms = super_imposer.rms
def superimpose(first, second): superimpose = PDB.Superimposer() superimpose.set_atoms(first, second) model = second_model superimpose.apply(model.get_atoms()) print("The RMSD is: ") print(superimpose.rms) structure = second_structure io = PDB.PDBIO() io.set_structure(structure) saved_file = input("Name of output file (.pdb)?") io.save(saved_file) return saved_file
def superimpose(fixed_vector, moving_vector, moving_atom_list): """ Rotates and translates a list of moving atoms from a moving vector to a fixed vector. :param fixed_vector: vector used as reference. :param moving_vector: vector that will rotate and translate. :param moving_atom_list: list of atoms that we want to do the rotation and translation of the moving vector. :return: the input list of atoms is rotated an translated. """ # Do the superimposition with BioPython sup = bio.Superimposer() # Set the vectors: first element is the fix vector (bond of the core) and second is the moving (bond of the fragment) sup.set_atoms(fixed_vector, moving_vector) # Apply the transformation to the atoms of the fragment (translate and rotate) return sup.apply(moving_atom_list)
def align(ref_structure, mobile_structure): # translation_mobile_to_ref = ref_structure.atoms.center_of_mass() - mobile_structure.atoms.center_of_mass() # # # mobile0 = mobile_structure.select_atoms('name CA').positions - mobile_structure.atoms.center_of_mass() # ref0 = ref_structure.select_atoms('name CA').positions - ref_structure.atoms.center_of_mass() # rotation_mobile_to_ref, rmsd = align.rotation_matrix(mobile0, ref0) # ref_atoms = [] # alt_atoms = [] # for atom in ref_structure.get_atoms(): # if atom.name == "CA": # ref_atoms.append(atom) # # for atom in mobile_structure.get_atoms(): # if atom.name == "CA": # alt_atoms.append(atom) # for (ref_model, alt_model) in zip(ref_structure, mobile_structure): # for (ref_chain, alt_chain) in zip(ref_model, alt_model): # for ref_res, alt_res in zip(ref_chain, alt_chain): # # # CA = alpha carbon # # print("\tModel: {}".format(ref_model)) # # print("\tChain: {}".format(ref_chain)) # # print("\tResidue: {}".format(ref_res)) # # print(ref_res) # print(alt_res) # # print(dir(ref_res)) # # print([x for x in ref_res.get_atoms()]) # ref_atoms.append(ref_res['CA']) # alt_atoms.append(alt_res['CA']) ref_atoms, alt_atoms = common_set(ref_structure, mobile_structure) # print("\tAligning {} atoms agianst {} atoms".format(len(ref_atoms), # len(alt_atoms), # ) # ) super_imposer = PDB.Superimposer() super_imposer.set_atoms( ref_atoms, alt_atoms, ) return super_imposer
def residuelist_rmsd(c1_list, c2_list, sidechains=False, superimpose=True): import forgi.threedee.model.similarity as ftms if len(c1_list) != len(c2_list): raise Exception( "Chains of different length. (Maybe an RNA-DNA hybrid?)") #c1_list.sort(key=lambda x: x.id[1]) #c2_list.sort(key=lambda x: x.id[1]) to_residues = [] crds1 = [] crds2 = [] all_atoms1 = [] all_atoms2 = [] for r1, r2 in zip(c1_list, c2_list): if sidechains: anames = nonsidechain_atoms + \ side_chain_atoms[r1.resname.strip()] else: anames = nonsidechain_atoms #anames = a_5_names + a_3_names for a in anames: try: at1 = r1[a] at2 = r2[a] except: continue else: all_atoms1.append(at1) all_atoms2.append(at2) crds1.append(at1.coord) crds2.append(at2.coord) to_residues.append(r1) diff_vecs = ftms._pointwise_deviation(crds1, crds2) dev_per_res = defaultdict(list) for i, res in enumerate(to_residues): dev_per_res[res].append(diff_vecs[i]) if superimpose: sup = bpdb.Superimposer() sup.set_atoms(all_atoms1, all_atoms2) return (len(all_atoms1), sup.rms, sup.rotran, dev_per_res) else: return (len(all_atoms1), ftuv._vector_set_rmsd(crds1, crds2), None, dev_per_res)
def superimpose_chains_test(chain_one_real, chain_two_real): """ Superimposes two structures and returns the superimposed structure and the RMSD of the superimposition. """ chain_one = copy.deepcopy(chain_one_real) chain_two = copy.deepcopy(chain_two_real) super_imposer = pdb.Superimposer() atoms_one = sorted(list(chain_one.get_atoms())) atoms_two = sorted(list(chain_two.get_atoms())) # Fix lengths so that they are the same min_len = min(len(atoms_one), len(atoms_two)) atoms_one = atoms_one[:min_len] atoms_two = atoms_two[:min_len] super_imposer.set_atoms(atoms_one, atoms_two) return super_imposer
def superimpose_chains(chain_structure_one, chain_structure_two): """ Superimpose two chains or structures returning a superimposer object. """ chain_one = copy.deepcopy(chain_structure_one) chain_two = copy.deepcopy(chain_structure_two) super_imposer = pdb.Superimposer() atoms_one = sorted(list(chain_one.get_atoms())) atoms_two = sorted(list(chain_two.get_atoms())) min_len = min(len(atoms_one), len(atoms_two)) atoms_one = atoms_one[:min_len] atoms_two = atoms_two[:min_len] super_imposer.set_atoms(atoms_one, atoms_two) return super_imposer
def superimpose(structure_one_real, structure_two_real): """ Superimposes two structures and returns the superimposed structure and the rmsd of the superimposition. """ structure_one = copy.deepcopy(structure_one_real) structure_two = copy.deepcopy(structure_two_real) super_imposer = pdb.Superimposer() atoms_one = list(structure_one.get_atoms()) atoms_two = list(structure_two.get_atoms()) # Fix lengths so that they are the same min_len = min(len(atoms_one), len(atoms_two)) atoms_one = atoms_one[:min_len] atoms_two = atoms_two[:min_len] super_imposer.set_atoms(atoms_one, atoms_two) super_imposer.apply(list(structure_two[0].get_atoms())) return (structure_two, super_imposer.rms)
def align_structures(residues, structures, bead_name=atom_name, quiet=False): """Superimpose all models in structures by the desired beads in the nominated residues and return the target The target is simply the first structure in the list""" # Take the first model in the first structure in structures, and # pick out the BB beads in the appropriate residues so we have a # target for superposition target_model = structures[0][0] target_beads_dict = beads_from_model(residues, target_model, bead_name) target_beads = list(target_beads_dict.values()) longest_line_len = 0 n = 0 conf = "open" for structure in structures: if not quiet: print_line = "Aligning " + str(structure) + "..." print(print_line, end="\r") longest_line_len = max(longest_line_len, len(print_line)) for model in structure: # Get list of beads to align by mobile_beads_dict = beads_from_model(residues, model, bead_name) mobile_beads = list(mobile_beads_dict.values()) # Superimpose the model on the target sup = PDB.Superimposer() sup.set_atoms(target_beads, mobile_beads) sup.apply(model.get_atoms()) n += 1 if n == (len(structures) / 2.0) + 1: conf = "closed" n = 1 structure_filename = conf + str(n) + ".pdb" # if not os.path.isfile(structure_filename): # save_structure(structure, structure_filename) if not quiet: final_print_str = ("Performed " + str(len(structures)) + " structure alignments.") num_spaces = max(0, longest_line_len - len(final_print_str) + 12) print(final_print_str + " " * num_spaces) return target_model
def generateResults(self): self._calculateCommonAtoms() resultSet = ResultSet(self.problemId, self.currentMainSolution) #prepare results for solution in self.solutions: result = Result(resultSet, solution) resultSet.results.append(result) #rmsd for i in resultSet.results: solution = i.solution try: sup = PDB.Superimposer() sup.set_atoms(solution.commonAtomRealSolution, solution.commonAtomThisSolution) sup.apply(solution.commonAtomThisSolution) i.rmsd = sup.rms except ZeroDivisionError: pass #INF for i in resultSet.results: solution = i.solution solutionCount = len(list(solution.structure.get_atoms())) realSolutionCount = len( list(self.currentMainSolution.structure.get_atoms())) commonCount = len(solution.commonAtomRealSolution) i.inf = (commonCount) / (solutionCount + realSolutionCount - commonCount) #DI for i in resultSet.results: solution = i.solution try: i.di = i.rmsd / i.inf except (ZeroDivisionError, TypeError) as e: pass return resultSet
def superimpose_models(self, reference_model=0): """Superimpose two or more structures. Superimpose two or more structures by using the Bio.PDB.Superimposer class. Args: reference_model is an int with the reference model (default=0) """ ref_model = self.structure[reference_model] if self.reference is not None: ref_model = self.reference[0] for alt_model in self.structure: ref_atoms = [] alt_atoms = [] # Iterate over the structure method to obtain all atoms of interest # for the analysis and superimposes the structures using them for (ref_chain, alt_chain) in zip(ref_model, alt_model): for ref_res, alt_res in \ zip(ref_chain, alt_chain): # assert ref_res.resname == alt_res.resname, \ # "{:s} is not equal to {:s}".format(ref_res.resname, # alt_res.resname) # assert ref_res.id == alt_res.id, \ # "{:s} is not equal to {:s}".format(ref_res.id, # alt_res.id) # CA = alpha carbon if ref_res.has_id('CA'): if self.__atom == []: ref_atoms.extend(list(ref_res.get_atom())) alt_atoms.extend(list(alt_res.get_atom())) else: for atoms in self.__atom: try: ref_atoms.append(ref_res[atoms]) alt_atoms.append(alt_res[atoms]) except KeyError: raise KeyError(('Your input data is ' 'misssing information for ' '{:s} atoms. Input more ' 'complete data or select a' ' smaller set of ' 'atoms').format(atoms)) # Align these paired atom lists: super_imposer = pdb.Superimposer() super_imposer.set_atoms(ref_atoms, alt_atoms) if ref_model.get_full_id() == alt_model.get_full_id(): # Check for self/self get zero RMS, zero translation # and identity matrix for the rotation. assert np.abs(super_imposer.rms) < 0.0000001 assert np.max(np.abs(super_imposer.rotran[1])) < 0.000001 assert np.max(np.abs(super_imposer.rotran[0]) - np.identity(3)) < 0.000001 else: # Update the structure by moving all the atoms in # this model (not just the ones used for the alignment) super_imposer.apply(alt_model.get_atoms())
def align_pdbs(referencePath, fitPath, optionsrefatomsA, optionsfitatomsA, optionsoutA, optionsaddatomsA=""): if not os.path.exists(referencePath): print "Error: File path for reference PDB or CIF file does not exist." print("Type -h or --help for description and options.") sys.exit(1) if not os.path.exists(fitPath): print "Error: File path for PDB or CIF file to fit to reference does not exist." print("Type -h or --help for description and options.") sys.exit(1) ref_al = [] for i in optionsrefatomsA.split(':'): ref_al.append(tuple(i.split(','))) fit_al = [] for i in optionsfitatomsA.split(':'): fit_al.append(tuple(i.split(','))) ref_structure, dir_path, _ = _read_structure(referencePath, 'reference', 'reference') fit_structure, dir_path, _ = _read_structure(fitPath, 'fit', 'fit') # Use the first model in the pdb-files for alignment # Change the number 0 if you want to align to another structure ref_model = ref_structure[0] fit_model = fit_structure[0] # Make a list of the atoms (in the structures) you wish to align. # In this case we use CA atoms whose index is in the specified range ref_atoms = [] fit_atoms = [] # Iterate of all chains in the model in order to find all residues for ref_chain in ref_model: for r_a in ref_al: if ref_chain.get_id() == r_a[1]: for ref_res in ref_chain: # Check if residue number ( .get_id() ) is in the list if ref_res.get_id()[1] in range(int(r_a[2]), int(r_a[3]) + 1): # Append CA atom to list ref_atoms.append(ref_res[r_a[0]]) # Do the same for the sample structure for fit_chain in fit_model: for r_a in fit_al: if fit_chain.get_id() == r_a[1]: for fit_res in fit_chain: if fit_res.get_id()[1] in range(int(r_a[2]), int(r_a[3]) + 1): fit_atoms.append(fit_res[r_a[0]]) # Now we initiate the superimposer: super_imposer = struct.Superimposer() super_imposer.set_atoms(ref_atoms, fit_atoms) super_imposer.apply(fit_model.get_atoms()) if optionsaddatomsA != "": # A region is defined by a chain, an initial residue number, a followup residue number # to start of region, and another residue number fotr the end of the region B,3,10 # both begining and end are included. add_region = [] for i in optionsaddatomsA.split(':'): add_region.append(tuple(i.split(','))) if len(add_region) != 2: print( "ERROR: Only two entries in the addatom option. One for reference, and one for fit." ) sys.exit(1) print("Adding residues from (" + referencePath + "," + add_region[0][0] + "," + add_region[0][1] + "," + add_region[0][2] + ") ") print(" to (" + fitPath + "," + add_region[1][0] + "," + add_region[1][1] + "," + add_region[1][2] + ")") add_res = [] # Add residues before missing segment from incomplete chain (fit) for i in fit_model: if i.get_id() == add_region[1][0]: for j in i.get_residues(): if j.get_id()[1] < int(add_region[0][1]): add_res.append(j) # add residues in missing segment from complete chain (reference) for i in ref_model: if i.get_id() == add_region[0][0]: for j in i.get_residues(): if j.get_id()[1] in range(int(add_region[0][1]), int(add_region[0][2]) + 1): add_res.append(j) # add residues after missing segment.from incomplete (fit) for i in fit_model: if i.get_id() == add_region[1][0]: for j in i.get_residues(): if j.get_id()[1] > int(add_region[0][2]): add_res.append(j) newChain = struct.Chain.Chain(add_region[1][0]) for i in add_res: newChain.add(i) # put chains in a list chain_order = [] for i in fit_model: chain_order.append(i) # delete all chains from the fit model for i in chain_order: fit_model.detach_child(i.get_id()) # Add chains back in fit_model making sure that newChain replaces the incoplete chain for i in chain_order: if i.get_id() == add_region[1][0]: fit_model.add(newChain) else: fit_model.add(i) ########### TEST ############### # for i in fit_model: # print(i,i.get_id()) # for j in i.get_residues(): # print(j.get_full_id(),j.get_resname()) # sys.exit(1) # TODO it is possible that a section with different resids is added to a fitted section. In this case residd # fit the chains that were just added need to be renumbered approprietly. # Last check to make sure residue numbers in completed chain are monotonically increased. # if add_res_range_from_to[0][1:2] == add_res_range_from_to[1][1:2]: # pass # else: # get_fit_first_res_id = True # fit_first_res_id = -1 # for i in fit_model: # if i.get_id() == add_res_range_from_to[1][0]: # for j in i.get_residues(): # if get_fit_first_res_id: # get_fit_first_res_id = False # fit_first_res_id = j.get_id()[1] # get_ref_first_res_id = True # ref_first_res_id = -1 # for i in ref_model: # if i.get_id() == add_res_range_from_to[0][0]: # for j in i.get_residues(): # if get_ref_first_res_id: # get_ref_first_res_id = False # ref_first_res_id = j.get_id()[1] # Print RMSD: print("Fiting " + fitPath + " by " + optionsrefatomsA + " to " + referencePath + " by " + optionsfitatomsA + "\ RMSD=" + str(super_imposer.rms)) # Save the aligned version _save_structure(fit_model, dir_path + optionsoutA)
def get_simrna_ready(self, renumber_residues=True): """Get simrna_ready .. - take only first model, - renumber residues if renumber_residues=True .. warning:: requires: Biopython""" try: from Bio import PDB from Bio.PDB import PDBIO except: sys.exit( 'Error: Install biopython to use this function (pip biopython)' ) import warnings warnings.filterwarnings( 'ignore', '.*Invalid or missing.*', ) warnings.filterwarnings( 'ignore', '.*with given element *', ) import copy G_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split( ) A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split( ) U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split( ) C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split( ) ftmp = '/tmp/out.pdb' self.write(ftmp, v=False) parser = PDB.PDBParser() struct = parser.get_structure('', ftmp) model = struct[0] s2 = PDB.Structure.Structure(struct.id) m2 = PDB.Model.Model(model.id) chains2 = [] missing = [] for chain in model.get_list(): res = [] for r in chain: res.append(r) res = copy.copy(res) c2 = PDB.Chain.Chain(chain.id) c = 1 # new chain, goes from 1 if renumber True for r in res: # hack for amber/qrna r.resname = r.resname.strip() if r.resname == 'RC3': r.resname = 'C' if r.resname == 'RU3': r.resname = 'U' if r.resname == 'RG3': r.resname = 'G' if r.resname == 'RA3': r.resname = 'A' if r.resname == 'C3': r.resname = 'C' if r.resname == 'U3': r.resname = 'U' if r.resname == 'G3': r.resname = 'G' if r.resname == 'A3': r.resname = 'A' if r.resname == 'RC5': r.resname = 'C' if r.resname == 'RU5': r.resname = 'U' if r.resname == 'RG5': r.resname = 'G' if r.resname == 'RA5': r.resname = 'A' if r.resname == 'C5': r.resname = 'C' if r.resname == 'U5': r.resname = 'U' if r.resname == 'G5': r.resname = 'G' if r.resname == 'A5': r.resname = 'A' if r.resname.strip() == 'RC': r.resname = 'C' if r.resname.strip() == 'RU': r.resname = 'U' if r.resname.strip() == 'RG': r.resname = 'G' if r.resname.strip() == 'RA': r.resname = 'A' r2 = PDB.Residue.Residue(r.id, r.resname.strip(), r.segid) if renumber_residues: r2.id = (r2.id[0], c, r2.id[2]) ## renumber residues if c == 1: p_missing = True #if p_missing: # try: # x = r["O5'"] # x.id = ' P' # x.name = ' P' # x.fullname = ' P' # print "REMARK 000 FIX O5' -> P fix in chain ", chain.id # except: # pass for a in r: if a.id == 'P': p_missing = False if p_missing: currfn = __file__ if currfn == '': path = '.' else: path = os.path.dirname(currfn) if os.path.islink( currfn ): #path + os.sep + os.path.basename(__file__)): path = os.path.dirname( os.readlink(path + os.sep + os.path.basename(currfn))) po3_struc = PDB.PDBParser().get_structure( '', path + '/data/PO3_inner.pdb') po3 = [ po3_atom for po3_atom in po3_struc[0].get_residues() ][0] r_atoms = [r["O4'"], r["C4'"], r["C3'"]] po3_atoms = [po3["O4'"], po3["C4'"], po3["C3'"]] sup = PDB.Superimposer() sup.set_atoms(r_atoms, po3_atoms) rms = round(sup.rms, 3) sup.apply(po3_struc.get_atoms()) # to all atoms of po3 r.add(po3['P']) r.add(po3['OP1']) r.add(po3['OP2']) try: r.add(po3["O5'"]) except: del r["O5'"] r.add(po3["O5'"]) p_missing = False # off this function # save it #io = PDB.PDBIO() #io.set_structure( po3_struc ) #io.save("po3.pdb") if str(r.get_resname()).strip() == "G": for an in G_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except KeyError: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "A": for an in A_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except KeyError: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "C": for an in C_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "U": for an in U_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except KeyError: #print 'Missing:', an, r,' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) c += 1 chains2.append(c2) io = PDBIO() s2.add(m2) for chain2 in chains2: m2.add(chain2) #print c2 #print m2 io.set_structure(s2) #fout = fn.replace('.pdb', '_fx.pdb') fout = '/tmp/outout.pdb' # hack io.save(fout) if missing: print 'REMARK 000 Missing atoms:' for i in missing: print 'REMARK 000 +', i[0], i[1], i[2], 'residue #', i[3] #raise Exception('Missing atoms in %s' % self.fn) s = StrucFile(fout) self.lines = s.lines
def mp_superpose_opm(reference_chain, target, filename, target_chain='A', ref_model_id=0, target_model_id=0, ref_align_atoms=[], target_align_atoms=[], write_opm=False): # Adapted from https://gist.github.com/andersx/6354971 # Copyright (c) 2010-2016 Anders S. Christensen # Get reference structure e.g. from OPM def get_ref_struc(keyword): try: ref_struc = extract_from_opm(keyword) except: logger.error("no OPM - id found - add a workaround, e.g. PDBTM") return else: logger.info("Obtain structure from OPM: successful") return ref_struc reference = reference_chain.split('_')[0] ref_chain = reference_chain.split('_')[1] ref_struc = get_ref_struc(reference) # Parse reference (from string) and target structure (from file) parser = PDB.PDBParser(QUIET=True) bio_ref_struc_raw = parser.get_structure("reference", io.StringIO(ref_struc)) bio_target_struc_raw = parser.get_structure("target", target) # Select the model number - normally always the first bio_ref_struc = bio_ref_struc_raw[ref_model_id] bio_target_struc = bio_target_struc_raw[target_model_id] # List of residues to align align_ref_atoms = [] for ind, chain in enumerate(bio_ref_struc_raw.get_chains()): if chain.id == ref_chain: for res in chain.get_residues(): # bio_ref_struc.get_residues(): if ref_align_atoms == [] or res.get_id()[1] in ref_align_atoms: for atom in res: if atom.get_name() == 'CA': align_ref_atoms.append(atom) align_target_atoms = [] for ind, chain in enumerate(bio_target_struc.get_chains()): if chain.id == target_chain: for res in chain.get_residues( ): # bio_target_struc.get_residues(): if target_align_atoms == [] or res.get_id( )[1] in target_align_atoms: for atom in res: if atom.get_name() == 'CA': align_target_atoms.append(atom) # Superposer super_imposer = PDB.Superimposer() super_imposer.set_atoms(align_ref_atoms, align_target_atoms) super_imposer.apply(bio_target_struc) logger.info(f"RMSD of superimposed structures: {super_imposer.rms}") bioio = PDB.PDBIO() bioio.set_structure(bio_target_struc) bioio.save(filename) logger.info(f"Aligned structure saved") if write_opm: with open(os.path.join(os.path.dirname(filename), 'ref_opm.pdb'), 'w') as fp: fp.write(ref_struc) logger.info(f"write_opm set to true - OPM structure saved")
def align_structures(ref_structure, mobile_structure, mobile_xmap): # translation_mobile_to_ref = ref_structure.atoms.center_of_mass() - mobile_structure.atoms.center_of_mass() # # # mobile0 = mobile_structure.select_atoms('name CA').positions - mobile_structure.atoms.center_of_mass() # ref0 = ref_structure.select_atoms('name CA').positions - ref_structure.atoms.center_of_mass() # rotation_mobile_to_ref, rmsd = align.rotation_matrix(mobile0, ref0) ref_atoms = [] alt_atoms = [] for (ref_model, alt_model) in zip(ref_structure, mobile_structure): for (ref_chain, alt_chain) in zip(ref_model, alt_model): for ref_res, alt_res in zip(ref_chain, alt_chain): # CA = alpha carbon try: # print("\tModel: {}".format(ref_model)) # print("\tChain: {}".format(ref_chain)) # print("\tResidue: {}".format(ref_res)) # print(ref_res) # print(dir(ref_res)) # print([x for x in ref_res.get_atoms()]) ref_atoms.append(ref_res['CA']) alt_atoms.append(alt_res['CA']) except: pass super_imposer = PDB.Superimposer() super_imposer.set_atoms( ref_atoms, alt_atoms, ) translation_mobile_to_ref = super_imposer.rotran[1] rotation_mobile_to_ref = super_imposer.rotran[0] # print("\tTranslation is: {}".format(translation_mobile_to_ref)) # print("\tRotation is: {}".format(rotation_mobile_to_ref)) # xmap_np = interpolate_uniform_grid(mobile_xmap, # translation_mobile_to_ref, # np.transpose(rotation_mobile_to_ref), # ) rtop = clipper_python.RTop_orth( clipper_python.Mat33_double(np.transpose(rotation_mobile_to_ref)), clipper_python.Vec3_double(translation_mobile_to_ref), ) xmap_new = clipper_python.Xmap_float( mobile_xmap.xmap.spacegroup, mobile_xmap.xmap.cell, mobile_xmap.xmap.grid_sampling, ) clipper_python.rotate_translate( mobile_xmap.xmap, xmap_new, rtop, ) return xmap_new.export_numpy()
def pdb_rmsd(c1, c2, sidechains=False, superimpose=True, apply_sup=False): ''' Calculate the all-atom rmsd between two RNA chains. :param c1: A Bio.PDB.Chain :param c2: Another Bio.PDB.Chain :return: The rmsd between the locations of all the atoms in the chains. ''' a_5_names = ['P', 'O5*', 'C5*', 'C4*', 'O4*', 'O2*'] a_5_names += ['P', "O5'", "C5'", "C4'", "O4'", "O2'"] a_3_names = ["C1*", "C2*", "C3*", "O3*"] a_3_names += ["C1'", "C2'", "C3'", "O3'"] a_names = dict() a_names['U'] = a_5_names + [ 'N1', 'C2', 'O2', 'N3', 'C4', 'O4', 'C5', 'C6' ] + a_3_names a_names['C'] = a_5_names + [ 'N1', 'C2', 'O2', 'N3', 'C4', 'N4', 'C5', 'C6' ] + a_3_names a_names['A'] = a_5_names + [ 'N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N6', 'N7', 'C8', 'N9' ] + a_3_names a_names['G'] = a_5_names + [ 'N1', 'C2', 'N2', 'N3', 'C4', 'C5', 'C6', 'O6', 'N7', 'C8', 'N9' ] + a_3_names a_names['U'] = a_5_names + [ 'N1', 'C2', 'O2', 'N3', 'C4', 'O4', 'C5', 'C6' ] + a_3_names a_names['C'] = a_5_names + [ 'N1', 'C2', 'O2', 'N3', 'C4', 'N4', 'C5', 'C6' ] + a_3_names a_names['A'] = a_5_names + [ 'N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N6', 'N7', 'C8', 'N9' ] + a_3_names a_names['G'] = a_5_names + [ 'N1', 'C2', 'N2', 'N3', 'C4', 'C5', 'C6', 'O6', 'N7', 'C8', 'N9' ] + a_3_names all_atoms1 = [] all_atoms2 = [] acceptable_residues = ['A', 'C', 'G', 'U', 'rA', 'rC', 'rG', 'rU', 'DG'] c1_list = [ cr for cr in c1.get_list() if cr.resname.strip() in acceptable_residues ] c2_list = [ cr for cr in c2.get_list() if cr.resname.strip() in acceptable_residues ] if len(c1_list) != len(c2_list): #print >>sys.stderr, "Chains of different length", len(c1.get_list()), len(c2.get_list()) raise Exception("Chains of different length.") #c1_list.sort(key=lambda x: x.id[1]) #c2_list.sort(key=lambda x: x.id[1]) for r1, r2 in zip(c1_list, c2_list): if sidechains: anames = backbone_atoms + a_names[c1[i].resname.strip()] else: anames = backbone_atoms #anames = a_5_names + a_3_names for a in anames: try: at1 = r1[a] at2 = r2[a] all_atoms1 += [at1] all_atoms2 += [at2] except: continue #print "rmsd len:", len(all_atoms1), len(all_atoms2) if superimpose: sup = bpdb.Superimposer() sup.set_atoms(all_atoms1, all_atoms2) if apply_sup: sup.apply(c2.get_atoms()) return (len(all_atoms1), sup.rms, sup.rotran) else: crvs1 = np.array([a.get_vector().get_array() for a in all_atoms1]) crvs2 = np.array([a.get_vector().get_array() for a in all_atoms2]) return (len(all_atoms1), ftuv.vector_set_rmsd(crvs1, crvs2), None)
def pdb_rmsd(c1, c2, sidechains=False, superimpose=True, apply_sup=False): ''' Calculate the all-atom rmsd between two RNA chains. :param c1: A Bio.PDB.Chain :param c2: Another Bio.PDB.Chain :return: The rmsd between the locations of all the atoms in the chains. ''' import forgi.threedee.model.similarity as ftms a_5_names = ['P', 'O5*', 'C5*', 'C4*', 'O4*', 'O2*'] a_5_names += ['P', "O5'", "C5'", "C4'", "O4'", "O2'"] a_3_names = ["C1*", "C2*", "C3*", "O3*"] a_3_names += ["C1'", "C2'", "C3'", "O3'"] a_names = dict() a_names['U'] = a_5_names + [ 'N1', 'C2', 'O2', 'N3', 'C4', 'O4', 'C5', 'C6' ] + a_3_names a_names['C'] = a_5_names + [ 'N1', 'C2', 'O2', 'N3', 'C4', 'N4', 'C5', 'C6' ] + a_3_names a_names['A'] = a_5_names + [ 'N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N6', 'N7', 'C8', 'N9' ] + a_3_names a_names['G'] = a_5_names + [ 'N1', 'C2', 'N2', 'N3', 'C4', 'C5', 'C6', 'O6', 'N7', 'C8', 'N9' ] + a_3_names a_names['U'] = a_5_names + [ 'N1', 'C2', 'O2', 'N3', 'C4', 'O4', 'C5', 'C6' ] + a_3_names a_names['C'] = a_5_names + [ 'N1', 'C2', 'O2', 'N3', 'C4', 'N4', 'C5', 'C6' ] + a_3_names a_names['A'] = a_5_names + [ 'N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N6', 'N7', 'C8', 'N9' ] + a_3_names a_names['G'] = a_5_names + [ 'N1', 'C2', 'N2', 'N3', 'C4', 'C5', 'C6', 'O6', 'N7', 'C8', 'N9' ] + a_3_names all_atoms1 = [] all_atoms2 = [] acceptable_residues = ['A', 'C', 'G', 'U', 'rA', 'rC', 'rG', 'rU', 'DG'] c1_list = [ cr for cr in c1.get_list() if cr.resname.strip() in acceptable_residues ] c2_list = [ cr for cr in c2.get_list() if cr.resname.strip() in acceptable_residues ] if len(c1_list) != len(c2_list): #print >>sys.stderr, "Chains of different length", len(c1.get_list()), len(c2.get_list()) raise Exception( "Chains of different length. (Maybe an RNA-DNA hybrid?)") #c1_list.sort(key=lambda x: x.id[1]) #c2_list.sort(key=lambda x: x.id[1]) to_residues = [] crds1 = [] crds2 = [] for r1, r2 in zip(c1_list, c2_list): if sidechains: anames = backbone_atoms + a_names[c1[i].resname.strip()] else: anames = backbone_atoms #anames = a_5_names + a_3_names for a in anames: try: at1 = r1[a] at2 = r2[a] except: continue else: all_atoms1.append(at1) all_atoms2.append(at2) crds1.append(at1.coord) crds2.append(at2.coord) to_residues.append(r1) diff_vecs = ftms._pointwise_deviation(crds1, crds2) dev_per_res = defaultdict(list) for i, res in enumerate(to_residues): dev_per_res[res].append(diff_vecs[i]) #print "rmsd len:", len(all_atoms1), len(all_atoms2) if superimpose: sup = bpdb.Superimposer() sup.set_atoms(all_atoms1, all_atoms2) if apply_sup: sup.apply(c2.get_atoms()) return (len(all_atoms1), sup.rms, sup.rotran, dev_per_res) else: crvs1 = np.array([a.get_vector().get_array() for a in all_atoms1]) crvs2 = np.array([a.get_vector().get_array() for a in all_atoms2]) return (len(all_atoms1), ftuv.vector_set_rmsd(crvs1, crvs2), None, dev_per_res)
def structure_in_created_structures(structure, created_structures): """Ask if structure is already in created_structures (a list of structures). Returns a boolean. Considerations: Return True if all of the chains in structure are in one of the structures in created_structures and RMSD <= 3.0, meaning they are the same structure """ # make a deepcopy of these objects structure = structure.copy() created_structures = created_structures.copy() # Get the ids of the chains in structure chain_ids_structure = tuple( sorted([x.id[1] for x in structure.get_chains()])) # loop through each of the contents of created_structures: for created_structure in created_structures: # get the chains of created_structure chain_ids_created_structure = tuple( sorted([x.id[1] for x in created_structure.get_chains()])) # ask if the number of each and ids of the chains are the same: if chain_ids_structure == chain_ids_created_structure: # pick one chain in structure to compare with the chains in created chain_str = list(structure.get_chains())[0] id_str = chain_str.id[1] # try to find a partner in created_structure: for chain_created_str in created_structure.get_chains(): id_created_str = chain_created_str.id[1] # if they have the same id they are potential partners. Superimpose these next. The id_created_str has also to be avaliable in possible_partners if id_str == id_created_str: # get list of residues: res_chain1 = list(chain_str.get_residues()) res_chain2 = list(chain_created_str.get_residues()) # get the atoms of the previous list, ONLY belonging to common RESIDUES! to be then able to superimpose # so first we obtain a list of the common residues common_res_s1 = get_list_of_common_res( res_chain1, res_chain2) common_res_s2 = get_list_of_common_res( res_chain2, res_chain1) # then we obtain a list of atom objects to use it later common_atoms_s1 = get_atom_list_from_res_list( common_res_s1) common_atoms_s2 = get_atom_list_from_res_list( common_res_s2) # continue if the common atoms is full if len(common_atoms_s1) > 0: # use the Superimposer sup = pdb.Superimposer() # first argument is fixed, second is moving. both are lists of Atom objects sup.set_atoms(common_atoms_s2, common_atoms_s1) # if I have superimposed same ID but different structure, try another chain if sup.rms > 3.0: continue # apply rotation to whole common structure sup.apply(list(structure.get_atoms())) # if the previous chain_str and chain_created_str are real partners they should also result in haveing all they cross-superimposed chains with partners partners = set() for searching_partner in created_structure.get_chains( ): partner_found = False for possible_partner in structure.get_chains(): if partner_found is True: break if possible_partner.id[1] == searching_partner.id[ 1] and possible_partner not in partners: # get list of residues: res_partner1 = list( searching_partner.get_residues()) res_partner2 = list( possible_partner.get_residues()) # get the atoms of the previous list, ONLY belonging to common RESIDUES! to be then able to superimpose # so first we obtain a list of the common residues common_res_p1 = get_list_of_common_res( res_partner1, res_partner2) common_res_p2 = get_list_of_common_res( res_partner2, res_partner1) # then we obtain a list of coordinates common_coords_p1 = np.array([ list(x.get_coord()) for x in get_atom_list_from_res_list( common_res_p1) ]) common_coords_p2 = np.array([ list(x.get_coord()) for x in get_atom_list_from_res_list( common_res_p2) ]) rms = rmsd.kabsch_rmsd( common_coords_p2, common_coords_p1) if rms <= 3.0: partners.add(possible_partner) partner_found = True if len(partners) == len( list(created_structure.get_chains())): return True # all chains have a partner, which means that the structure is in the created_structures # if you didn't find any match return false: return False
def complex_builder(interaction_dict, pdb_models, num_models, max_chains, verbose): output_objects = [] for i in range(1, num_models + 1): if verbose: sys.stderr.write("Building Macrocomplex " + str(i) + " ...\n") macrocomplex = starting_model(pdb_models, verbose).copy() if verbose: sys.stderr.write( "Building it from model {}, which contains chains {} and {} \n\n" .format(macrocomplex.id, [chain.id for chain in macrocomplex.get_chains()][0], [chain.id for chain in macrocomplex.get_chains()][1])) for key, values in interaction_dict.items(): print("{}:{} tuples".format(key, len(values))) for tuple in values: print("{}:{}".format(key, [element.id for element in tuple])) print("\n") model_stech = generate_model_profile(macrocomplex) macrocomplex.id = "Model_" + str(i) run = True # While this variable is true, the program will keep trying to add chains to the macrocomplex num_of_chains = 2 # The model starts with 2 chains already num_empty_chains = 0 # NUmber of chains that have all their interactions depleted while run: for chain in macrocomplex: # Iterates the macrocomplex chains if num_of_chains < max_chains: # If the number of chains still hasn't reached the maximum allowed if len(interaction_dict[chain.id]) != 0: if verbose: sys.stderr.write( "*** Adding interactions from chain {} ***\n\n" .format(chain.id)) # If this chain still has pending interactions. Chain id is the key of the dictionary random.shuffle( interaction_dict[chain.id] ) # Shuffle the interactions list (to avoid repetitive behaviour) for tuple in interaction_dict[chain.id]: fix = tuple[ 1] # Get the chain instance that corresponds to the same chain in macrocomplex to_move = tuple[ 2] # Get the chain instance that interacts with the other sup = PDB.Superimposer( ) # Generates a superimposer instance chain_atoms, fix_atoms = chain.get_common_atoms( fix) # Get common atoms between the # macrocomplex chain and the one in the interaction dictionary sup.set_atoms( chain_atoms, fix_atoms) # Generate the superposition move = to_move.copy( ) # Make a copy of the chain to move sup.apply(move) # Apply superposition matrix move_atoms = sorted(move.get_atoms()) if not has_clashes(move_atoms, macrocomplex): if verbose: sys.stderr.write( "-> Succesful superposition between " + str(chain.id) + " from macrocomplex and chain " + fix.id + " from model " + tuple[0].id + ".") sys.stderr.write(" Chain " + str(num_of_chains) + " added: Chain " + move.id + " from model " + tuple[0].id + ".\n\n") move.parent = None # Sets the parent to none to evade biopython's strict id policy macrocomplex.add( move) # Adds the target chain to the model model_stech.setdefault(move.id, 0) model_stech[move.id] += 1 num_of_chains += 1 index = interaction_dict[chain.id].index(tuple) del interaction_dict[chain.id][ index] # elimino la tupla con el modelo que acabamos de añadir. faltaria eliinar la tupla con el mismo objeto. for redundant_tuple in interaction_dict[ move.id]: if redundant_tuple[0].id == tuple[0].id: index = interaction_dict[ move.id].index(redundant_tuple) del interaction_dict[move.id][index] else: if verbose: sys.stderr.write( "-> Unsuccesful superposition between " + str(chain.id) + " from macrocomplex and chain " + fix.id + " from model " + tuple[0].id + ".") sys.stderr.write(" Chain " + move.id + " from model " + tuple[0].id + " NOT ADDED.\n\n") #para eliminar la otra tupla quizá hay que identificarla por la id del modelo y luego mirar como eliminarla for key, values in interaction_dict.items(): print("{}:{} tuples".format(key, len(values))) print("----------") for tuple in values: print("{}:{}".format( key, [element.id for element in tuple])) print("\n") else: if verbose: print("Chain " + chain.id + " empty") num_empty_chains += 1 else: run = False # When the maximum chain treshold is reached stop running break if num_empty_chains >= len( macrocomplex ): # If all chains are empty of interactions stop running run = False if verbose: stechometry_string = "" # Print the model's stechometry for key in sorted(model_stech.keys()): stechometry_string += key + ":" + str( model_stech[key]) + "," stechometry_string = stechometry_string[:-1] print("Macrocomplex's" + str(i) + " Stoichiometry is: " + stechometry_string) print("Macrocomplex " + str(i) + " finished") output_objects.append(macrocomplex) # Add model to the models list return output_objects
def complex_builder(interaction_dict, pdb_models, num_models, max_chains, stoich_dict, clash_dist, verbose): """Function that iteratively builds a macrocomplex model from the pairs of interactions. First, it selects the starting pair of interacting chains of the macrocomplex. From that, the chains of the macrocomplex are being iterated looking for possible interactions according to the interaction dictionary. If it does not encounter clashes, the two chains that are equivalent superimpose, resulting in the addition of the other interacting chain to the complex. That specific model of interacting chains is removed from a list of pending interactions, since it is already added. If clashes are found, the model is not added, waiting. Returns a list of resulting macrocomplex models. Keyword arguments: interaction_dict -- dictionary of tuples with the possible interactions a given chain can perform. pdb_models -- PDB models containing chains with unified IDs according to sequence similarity num_models -- maximum number of models the function is going to produce max_chains -- maximum number of chains allowed in the models stoich_dict -- dictionary with the stoichiometry specified by the user clash_dist -- minimum clash distance between 2 atoms. The default minimum is 2 A verbose -- boolean, prints to stderr the progress of the program Considerations: Through all the process, the current stoichiometry of the building complex is analyzed. If a specific stoichiometry is provided, that is compared with the one of the macrocomplex. If the stoichiometry is fulfilled, the process stops. If there are no possible interactions left, the process stops. If the maximum number of chains is overpassed, the process stops. If no chains are added to complex during three iterations through the chains, the process stops""" output_objects = [] for i in range(1, int(num_models) + 1): if verbose: sys.stderr.write("Building Macrocomplex " + str(i) + " ...\n") macrocomplex = starting_model(pdb_models, verbose) for key, tuple in interaction_dict.items(): for tuple in interaction_dict[key]: if tuple[0].id == str(macrocomplex.id): index = interaction_dict[key].index(tuple) del interaction_dict[key][index] if verbose: sys.stderr.write( "Building it from model {}, which contains chains {} and {}.\n\n" .format(macrocomplex.id, [chain.id for chain in macrocomplex.get_chains()][0], [chain.id for chain in macrocomplex.get_chains()][1])) model_stoich = get_model_stoichiometry(macrocomplex) macrocomplex.id = "Model_" + str(i) run = True # While this variable is true, the program will keep trying to add chains to the macrocomplex u = 1 num_of_chains = 2 num_empty_chains = 0 while run: for chain in macrocomplex: # Iterating through each chain of the macrocomplex if num_of_chains < int( max_chains ): # If the number of chains still hasn't reached the maximum allowed interaction_copy = interaction_dict.copy() if len( interaction_dict[chain.id] ) != 0: # Check if the chain has possible interactions left to make random.shuffle( interaction_dict[chain.id] ) # Shuffle the interactions list (avoiding repetitive behaviour) for tuple in interaction_dict[chain.id]: fix = tuple[ 1] # Chain instance that corresponds to the same chain in macrocomplex move = tuple[ 2] # Chain instance that interacts with the other if stoich_dict: move_chain_id = move.id model_stoich.setdefault(move_chain_id, 0) model_number_chain = model_stoich[ move_chain_id] stoich_dict.setdefault(move_chain_id, 0) if stoich_dict[ move_chain_id] <= model_number_chain: # Don't add the chain if surpasses the stoichiometry given if verbose: sys.stderr.write( " The current chain already fulfills the stoichiometry of the complex.\n" ) sys.stderr.write(" Chain " + move.id + " from model " + tuple[0].id + " NOT ADDED.\n\n") continue # go to the next interaction tuple sup = PDB.Superimposer() chain_atoms, fix_atoms = chain.get_common_atoms( fix) sup.set_atoms( chain_atoms, fix_atoms) # Generate the superposition sup.apply(move) move_atoms = sorted(move.get_atoms()) if not has_clashes(move_atoms, macrocomplex, clash_dist): if verbose: sys.stderr.write( " Succesful superposition between " + str(chain.id) + " from macrocomplex and chain " + fix.id + " from model " + tuple[0].id + ".\n") sys.stderr.write(" Chain " + str(num_of_chains) + " added: Chain " + move.id + " from model " + tuple[0].id + ".\n\n") move.parent = None macrocomplex.add(move) model_stoich.setdefault(move.id, 0) model_stoich[move.id] += 1 num_of_chains += 1 index = interaction_dict[chain.id].index(tuple) del interaction_dict[chain.id][ index] # Deleting the model that has just been added from the interaction_dict for redundant_tuple in interaction_dict[ move.id]: if redundant_tuple[0].id == tuple[0].id: index = interaction_dict[ move.id].index(redundant_tuple) del interaction_dict[move.id][index] else: if verbose: sys.stderr.write( " Unsuccesful superposition between " + str(chain.id) + " from macrocomplex and chain " + fix.id + " from model " + tuple[0].id + ".\n") sys.stderr.write(" Chain " + move.id + " from model " + tuple[0].id + " NOT ADDED.\n\n") else: if verbose: sys.stderr.write( "No possible interactions with chain " + chain.id + " left.\n") num_empty_chains += 1 if stoich_dict: if stoich_dict == model_stoich: run = False break else: run = False # When the maximum chain treshold is reached, stop running break if num_empty_chains >= len( macrocomplex ): # If all chains of the macrocomplex have no possible interactions to make, stop running run = False if interaction_dict == interaction_copy: # After 3 iterations without being able to add any chain to the macrocomplex, stop running u += 1 if u == 3: run = False break stoichiometry_string = "" for key in sorted(model_stoich.keys()): stoichiometry_string += key + ":" + str(model_stoich[key]) + "," stoichiometry_string = stoichiometry_string[:-1] sys.stderr.write("\nStoichiometry of macrocomplex " + str(i) + " is: " + stoichiometry_string + ".\n") sys.stderr.write("Macrocomplex " + str(i) + " finished.\n") output_objects.append(macrocomplex) return output_objects
def main(): parser = argparse.ArgumentParser(description='Compares a list of pdb \ files to a reference pdb structure') parser.add_argument('-p', '--pdb_file_list', action='store', nargs=1, dest='pdb', help='List of pdb files (.txt) - provide \ full pathname if not in current directory') parser.add_argument('-r', '--reference_pdb', action='store', nargs=1, dest='ref', help='Full path to reference .pdb file') parser.add_argument('-n', '--name', action='store', nargs=1, dest='name', default=['./'], help='Output file name \ suffix to be used') parser.add_argument('-i', '--input_directory', action='store', nargs=1, dest='input', default=['./'], help='Directory where \ input pdb files are stored') parser.add_argument('-o', '--output_directory', action='store', nargs=1, dest='output', default=['./'], help='Directory where \ output log should be written') args = vars(parser.parse_args()) #Get list of all pdb files after filtering ZDOCK output with open(args['pdb'][0], 'r') as pdb_file_list: pdb_files = pdb_file_list.read().splitlines() #Get a list of the atom objects from each structure parser = PDB.PDBParser() structure_atoms = [] for pdb_file in pdb_files: structure = parser.get_structure(pdb_file, args['input'][0] + pdb_file) ca_atoms = [ atom for atom in structure.get_atoms() if atom.get_id() == 'CA' ] structure_atoms.append(ca_atoms) ref_struct = parser.get_structure('ref', args['ref'][0]) ref_atoms = [ atom for atom in ref_struct.get_atoms() if atom.get_id() == 'CA' ] good_pdb = [] good_rms = [] all_pdb = [] all_rms = [] sup = PDB.Superimposer() for idx, struct in enumerate(structure_atoms): sup.set_atoms(ref_atoms, struct) all_pdb.append(pdb_files[idx]) all_rms.append(sup.rms) if sup.rms <= 10: good_pdb.append(pdb_files[idx]) good_rms.append(sup.rms) good_struct = pd.DataFrame() good_struct['pdb'] = good_pdb good_struct['rms'] = good_rms good_struct.to_csv(args['output'][0] + '/' + 'good_' + args['name'][0] + '.csv', index=False) all_struct = pd.DataFrame() all_struct['pdb'] = all_pdb all_struct['rms'] = all_rms all_struct.to_csv(args['output'][0] + '/' + 'all_' + args['name'][0] + '.csv', index=False)
from Bio import PDB from Bio.PDB import PDBIO parser = PDB.MMCIFParser() structure = parser.get_structure("2DN1", "dn\\2dn1.cif") atom1 = structure[0]["A"][10]["CA"] atom2 = structure[0]["A"][20]["CA"] atom3 = structure[0]["A"][30]["CA"] atom4 = structure[0]["B"][10]["CA"] atom5 = structure[0]["B"][20]["CA"] atom6 = structure[0]["B"][30]["CA"] moving = [atom1, atom2, atom3] fixed = [atom4, atom5, atom6] sup = PDB.Superimposer() sup.set_atoms(fixed, moving) print(sup.rotran) print('RMS:', sup.rms) chainA = structure[0]['A'] chainA.transform(sup.rotran[0], sup.rotran[1]) io = PDBIO() io.set_structure(structure) io.save('my_structure_temp.pdb')