def get_all_interaction_pairs(pdb_filename, print_files=True): """ Takes a pdb file path and generates a folder with all the pairs of interacting chains without checking if there is redundant content. This simulates the user input :param pdb_filename: pdb file with the structure we want to break into interactions :param print_files: parameter indicating if we want to output the interaction pairs to a directory. :return: a directory with pdb files of the interactions and a list with the first element being the list of all interactions, ... to finish this with adri """ parser = PDBParser(PERMISSIVE=1) # Load pdb structure to a pdb file structure_id = get_structure_name(pdb_filename) filename = pdb_filename structure = parser.get_structure(structure_id, filename) neighbor_chains = get_neighbor_chains(structure) # Create a new directory with the interaction pdb files if print_files: if not os.path.exists('%s_all_interactions' % structure_id): os.makedirs('%s_all_interactions' % structure_id) else: for the_file in os.listdir('%s_all_interactions' % structure_id): file_path = os.path.join('%s_all_interactions' % structure_id, the_file) if os.path.isfile(file_path): os.unlink(file_path) io = PDBIO() io.set_structure(structure) for chain in neighbor_chains: for other_chain in neighbor_chains[chain]: io.save( '%s_all_interactions/%s_%s%s.pdb' % (structure_id, structure_id, chain.get_id(), other_chain.get_id()), ChainSelect(chain, other_chain)) else: interaction_list = [] structure_counter = 0 for chain, neighbor in neighbor_chains.items(): for chain2 in neighbor: new_str = Structure.Structure( '%s_%s' % (structure_id, structure_counter)) structure_counter += 1 new_str.add(Model.Model(0)) new_str[0].add(chain) new_str[0].add(chain2) interaction_list.append(new_str) return [interaction_list, 's%s_all_interactions' % structure_id]
def set_new_positions(self, positions): self.positions = positions new_model = Model.Model(len(self.structure.child_list), len(self.structure.child_list) + 1) chain = self.structure[0].child_list[0].copy() new_model.add(chain) self.structure.add(new_model) chain = new_model.child_list[0] counter = 0 for residue in chain.get_residues(): for atom in residue.get_atoms(): atom.set_coord(positions[counter]) counter += 1
def dump_pdb(self, filename): ''' If the BulgeGraph has a chain created for it, dump that as well. @param filename: The filename of the pdb file to which the chain coordinates will be written. ''' if self.chain is None: return self.chain.child_list.sort() mod = bpm.Model(' ') s = bps.Structure(' ') mod.add(self.chain) s.add(mod) io = bp.PDBIO() io.set_structure(s) io.save(filename)
def get_structure_slice_by_residues(struct: Structure, domain_name: str, chain_order: int, start: int, finish: int) -> Structure: """ Return new structure that contains new model (id=1), new chain (id=1) with residues from 'start' to 'finish' of specified chain of input structure :param struct: input structure to slice :param chain_order: order of chain to extract residues :param start: start residue :param finish: finish residues :param domain_name: new structure name :return: new structure """ new_chain = Chain.Chain(1) chain = list(struct.get_chains())[chain_order] for i in range(start, finish + 1): new_chain.add(chain[i]) model = Model.Model(1) model.add(new_chain) domain = Structure.Structure(domain_name) domain.add(model) return domain
def initialize_res(residue): '''Creates a new structure containing a single amino acid. The type and geometry of the amino acid are determined by the argument, which has to be either a geometry object or a single-letter amino acid code. The amino acid will be placed into chain A of model 0.''' if isinstance(residue, Geo): geo = residue else: geo = geometry(residue) segID = 1 AA = geo.residue_name CA_N_length = geo.CA_N_length CA_C_length = geo.CA_C_length N_CA_C_angle = geo.N_CA_C_angle CA_coord = [0., 0., 0.] C_coord = [CA_C_length, 0, 0] N_coord = [ CA_N_length * math.cos(N_CA_C_angle * (math.pi / 180.0)), CA_N_length * math.sin(N_CA_C_angle * (math.pi / 180.0)), 0 ] N = Atom("N", N_coord, 0.0, 1.0, " ", " N", 0, "N") CA = Atom("CA", CA_coord, 0.0, 1.0, " ", " CA", 0, "C") C = Atom("C", C_coord, 0.0, 1.0, " ", " C", 0, "C") ##Create Carbonyl atom (to be moved later) C_O_length = geo.C_O_length CA_C_O_angle = geo.CA_C_O_angle N_CA_C_O_diangle = geo.N_CA_C_O_diangle carbonyl = calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle) O = Atom("O", carbonyl, 0.0, 1.0, " ", " O", 0, "O") if (AA == 'G'): res = makeGly(segID, N, CA, C, O, geo) elif (AA == 'A'): res = makeAla(segID, N, CA, C, O, geo) elif (AA == 'S'): res = makeSer(segID, N, CA, C, O, geo) elif (AA == 'C'): res = makeCys(segID, N, CA, C, O, geo) elif (AA == 'V'): res = makeVal(segID, N, CA, C, O, geo) elif (AA == 'I'): res = makeIle(segID, N, CA, C, O, geo) elif (AA == 'L'): res = makeLeu(segID, N, CA, C, O, geo) elif (AA == 'T'): res = makeThr(segID, N, CA, C, O, geo) elif (AA == 'R'): res = makeArg(segID, N, CA, C, O, geo) elif (AA == 'K'): res = makeLys(segID, N, CA, C, O, geo) elif (AA == 'D'): res = makeAsp(segID, N, CA, C, O, geo) elif (AA == 'E'): res = makeGlu(segID, N, CA, C, O, geo) elif (AA == 'N'): res = makeAsn(segID, N, CA, C, O, geo) elif (AA == 'Q'): res = makeGln(segID, N, CA, C, O, geo) elif (AA == 'M'): res = makeMet(segID, N, CA, C, O, geo) elif (AA == 'H'): res = makeHis(segID, N, CA, C, O, geo) elif (AA == 'P'): res = makePro(segID, N, CA, C, O, geo) elif (AA == 'F'): res = makePhe(segID, N, CA, C, O, geo) elif (AA == 'Y'): res = makeTyr(segID, N, CA, C, O, geo) elif (AA == 'W'): res = makeTrp(segID, N, CA, C, O, geo) else: res = makeGly(segID, N, CA, C, O, geo) cha = Chain('A') cha.add(res) mod = Model(0) mod.add(cha) struc = Structure('X') struc.add(mod) return struc
invalid = True if invalid: continue num_items = {"O": 0, "T": 0} for hit in item: num_items[hit[5]] += 1 for hit in item: if len(hit[1]) > 1: structure = PDBParser(QUIET=False).get_structure( "dimer", path_structure + "dimer.pdb") new_model = Model(2) new_chain = Chain("C") new_model.add(new_chain) new_chain_mark = Chain("M") add_markers(new_chain_mark) new_model.add(new_chain_mark) structure.add(new_model) # mutations = {} [sorted IDs] : [cobalt], [residues], num_HIS, clash_info_list, num, type, coords res_co = Residue((" ", runtime_mark_id, " "), "Co3", " ") res_co.add(hit[0]) new_chain.add(res_co) # Add cobalt in its own residue for res in hit[1]: # Add rotamers
def initialize_res(residue): '''Creates a new structure containing a single amino acid. The type and geometry of the amino acid are determined by the argument, which has to be either a geometry object or a single-letter amino acid code. The amino acid will be placed into chain A of model 0.''' if isinstance( residue, Geo ): geo = residue else: geo=geometry(residue) segID=1 AA= geo.residue_name CA_N_length=geo.CA_N_length CA_C_length=geo.CA_C_length N_CA_C_angle=geo.N_CA_C_angle CA_coord= [0.,0.,0.] C_coord= [CA_C_length,0,0] N_coord = [CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0] N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N") CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C") C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C") ##Create Carbonyl atom (to be moved later) C_O_length=geo.C_O_length CA_C_O_angle=geo.CA_C_O_angle N_CA_C_O_diangle=geo.N_CA_C_O_diangle carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle) O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O") if(AA=='G'): res=makeGly(segID, N, CA, C, O, geo) elif(AA=='A'): res=makeAla(segID, N, CA, C, O, geo) elif(AA=='S'): res=makeSer(segID, N, CA, C, O, geo) elif(AA=='C'): res=makeCys(segID, N, CA, C, O, geo) elif(AA=='V'): res=makeVal(segID, N, CA, C, O, geo) elif(AA=='I'): res=makeIle(segID, N, CA, C, O, geo) elif(AA=='L'): res=makeLeu(segID, N, CA, C, O, geo) elif(AA=='T'): res=makeThr(segID, N, CA, C, O, geo) elif(AA=='R'): res=makeArg(segID, N, CA, C, O, geo) elif(AA=='K'): res=makeLys(segID, N, CA, C, O, geo) elif(AA=='D'): res=makeAsp(segID, N, CA, C, O, geo) elif(AA=='E'): res=makeGlu(segID, N, CA, C, O, geo) elif(AA=='N'): res=makeAsn(segID, N, CA, C, O, geo) elif(AA=='Q'): res=makeGln(segID, N, CA, C, O, geo) elif(AA=='M'): res=makeMet(segID, N, CA, C, O, geo) elif(AA=='H'): res=makeHis(segID, N, CA, C, O, geo) elif(AA=='P'): res=makePro(segID, N, CA, C, O, geo) elif(AA=='F'): res=makePhe(segID, N, CA, C, O, geo) elif(AA=='Y'): res=makeTyr(segID, N, CA, C, O, geo) elif(AA=='W'): res=makeTrp(segID, N, CA, C, O, geo) else: res=makeGly(segID, N, CA, C, O, geo) cha= Chain('A') cha.add(res) mod= Model(0) mod.add(cha) struc= Structure('X') struc.add(mod) return struc
def compare_interactions(interaction1, interaction2): structure1 = Structure.Structure('1') structure2 = Structure.Structure('2') structure1.add(Model.Model(0)) structure2.add(Model.Model(0)) homodimer = False for chain in interaction1: if len(list(structure1[0].get_chains())) == 1 and compare_chains( chain, list(structure1[0].get_chains())[0]): homodimer = True structure1[0].add(Chain.Chain(chain.get_id())) res_counter = 0 for residue in chain: if 'CA' in [x.get_id() for x in residue.get_atoms()]: atom = residue['CA'] structure1[0][chain.get_id()].add( Residue.Residue( ('', res_counter, ''), residue.get_resname(), residue.get_segid())) structure1[0][chain.get_id()][('', res_counter, '')].add(atom.copy()) res_counter += 1 for chain in interaction2: structure2[0].add(Chain.Chain(chain.get_id())) res_counter = 0 for residue in chain: if 'CA' in [x.get_id() for x in residue.get_atoms()]: atom = residue['CA'] structure2[0][chain.get_id()].add( Residue.Residue( ('', res_counter, ''), residue.get_resname(), residue.get_segid())) structure2[0][chain.get_id()][('', res_counter, '')].add(atom.copy()) res_counter += 1 if homodimer: for int in [structure1[0], structure2[0]]: trim_to_superimpose( list(int.get_chains())[0], list(int.get_chains())[1]) for chain1 in structure1[0]: for chain2 in structure2[0]: if chain1.get_id() != chain2.get_id(): continue trim_to_superimpose(chain1, chain2) # print(list(chain1.get_residues())[0]) # print(list(chain2.get_residues())[0]) # print(list(structure1.get_chains())) # print(list(structure2.get_chains())) result = str_comparison_superimpose(structure1, structure2) return result
def get_chain_ids(model: Model): chains = model.get_chains() chain_list = [] for chain in chains: chain_list.append(chain.get_id()) return chain_list
def assemble_multiscale_visualization(topology_fn, rmf_fn, pdb_dir, outprefix=None, chimerax=True, xl_fn=None): """ Render multiscale versions of rigid bodies from PDB files + flexible beads from RMF files w/o mapped crosslinks. Args: topology_fn (str): Topolgy file in pipe-separated-value (PSV) format as required in integrative modeling using IMP. For details on how to write a topology file, see: https://integrativemodeling.org/2.13.0/doc/ref/classIMP_1_1pmi_1_1topology_1_1TopologyReader.html rmf_fn (str): Name of the RMF file. pdb_dir (str): Directory containing all the PDB files for the rigid bodies used in modeling. outprefix (str, optional): Prefix for output files. Defaults to None. chimerax (bool, optional): If true, a Chimerax script will be written (extension ".cxc"). Defaults to True. xl_fn (str, optional): A file containing a XL dataset. Defaults to None. If this dataset is supplied, then it will be mapped on to the overall structure with satisfied XLs drawn in blue and violated XLs drawn in red. A XL dataset should be supplied in a comma-separated-value (CSV) format containing at least the following fields protein1, residue1, protein2, residue2, sat where the last field <sat> is a boolean 1 or 0 depending on whether the particular XL is satisfied (in the ensemble sense) as a result of the integrative modeling exercise. """ # ------------------------------------------- # read the RMF file and extract all particles # ------------------------------------------- of = RMF.open_rmf_file_read_only(rmf_fn) rmf_model = IMP.Model() hier = IMP.rmf.create_hierarchies(of, rmf_model)[0] IMP.rmf.load_frame(of, 0) particles = IMP.core.get_leaves(hier) rmf_ps = {} for p in particles: molname = p.get_parent().get_parent().get_parent().get_name().strip() name = p.get_name().strip() coord = IMP.core.XYZ(p).get_coordinates() rmf_ps[(molname, name)] = coord # -------------------------------------------------------------- # map pdb residues to rmf particles for each rigid body pdb file # -------------------------------------------------------------- # read the topology file t = TopologyReader(topology_fn, pdb_dir=pdb_dir) components = t.get_components() map_pdb2rmf = {} rigid_body_models = {} rigid_body_residues = {} chain_ids = {} # these are matched to the chimerax rmf plugin chain_id_count = 0 for c in components: # ignore unstructured residues if c.pdb_file == "BEADS": continue mol = c.molname pdb_prefix = os.path.basename(c.pdb_file).split(".pdb")[0] chain_id = c.chain resrange = c.residue_range offset = c.pdb_offset r0 = resrange[0] + offset r1 = resrange[1] + 1 + offset if mol not in chain_ids: chain_ids[mol] = string.ascii_uppercase[chain_id_count] chain_id_count += 1 if pdb_prefix not in map_pdb2rmf: map_pdb2rmf[pdb_prefix] = {} this_rigid_body_model = PDBParser().get_structure("x", c.pdb_file)[0] this_rigid_body_residues = {(r.full_id[2], r.id[1]): r for r in this_rigid_body_model.get_residues()} rigid_body_models[pdb_prefix] = this_rigid_body_model rigid_body_residues[pdb_prefix] = this_rigid_body_residues for r in range(r0, r1): key = (chain_id, r) val = (mol, r) if key in rigid_body_residues[pdb_prefix]: map_pdb2rmf[pdb_prefix][key] = val # -------------------------------- # align all pdb files with the rmf # -------------------------------- print("\nAligning all rigid body structures...") align = SVDSuperimposer() for pdb_prefix, mapper in map_pdb2rmf.items(): pdb_coords = [] pdb_atoms = [] rmf_coords = [] residues = rigid_body_residues[pdb_prefix] for (chain, pdb_res), (mol, rmf_res) in mapper.items(): r = residues[(chain, pdb_res)] pdb_coords.append(r["CA"].coord) pdb_atoms.extend([a for a in r.get_atoms()]) rmf_coords.append(rmf_ps[(mol, str(rmf_res))]) pdb_coords = np.array(pdb_coords) rmf_coords = np.array(rmf_coords) align.set(rmf_coords, pdb_coords) align.run() rotmat, vec = align.get_rotran() [a.transform(rotmat, vec) for a in pdb_atoms] # -------------------------- # assemble the composite pdb # -------------------------- mols = set(sorted([c.molname for c in components])) print("\nChain IDs by molecule:") for k, v in chain_ids.items(): print("molecule %s, chain ID %s" % (k, v)) reslists = {mol: [] for mol in mols} for pdb_prefix, mapper in map_pdb2rmf.items(): residues = rigid_body_residues[pdb_prefix] for (chain, pdb_res), (mol, rmf_res) in mapper.items(): r = residues[(chain, pdb_res)] ; resid = rmf_res new_id = (r.id[0], resid, r.id[2]) new_resname = r.resname new_segid = r.segid new_atoms = r.get_atoms() new_residue = Residue.Residue(id=new_id, resname=new_resname, segid=new_segid) [new_residue.add(a) for a in new_atoms] reslists[mol].append(new_residue) composite_model = Model.Model(0) for mol, chain_id in chain_ids.items(): this_residues = sorted(reslists[mol], key=lambda r: r.id[1]) this_chain = Chain.Chain(chain_id) [this_chain.add(r) for r in this_residues] composite_model.add(this_chain) # save the composite pdb to file io = PDBIO() io.set_structure(composite_model) if outprefix is None: outprefix = "centroid_model" io.save(outprefix + ".pdb") # ------------------------------------------------------------------- # chimerax rendering (hide most of the rmf except unstructured beads) # ------------------------------------------------------------------- if not chimerax: exit() print("\nWriting UCSF Chimerax script...") s = "" s += "open %s\n" % (outprefix + ".pdb") s += "open %s\n" % rmf_fn s += "hide\n" s += "show cartoon\n" s += "color #%d %s\n" % (CHIMERAX_PDB_MODEL_NUM, STRUCT_COLOR) s += "color #%d %s\n" % (CHIMERAX_RMF_MODEL_NUM, UNSTRUCT_COLOR) s += "hide #%d\n" % CHIMERAX_RMF_MODEL_NUM struct_residues = [] for key, val in map_pdb2rmf.items(): struct_residues.extend(list(val.values())) unstruct_atomspec = {} for p in rmf_ps: molname, particle_name = p rmf_chain_id = chain_ids[molname] if "bead" in particle_name: r0, r1 = particle_name.split("_")[0].split("-") r0 = int(r0) ; r1 = int(r1) this_atomspec = "#%d/%s:%d-%d" % \ (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r0, r1) for r in range(r0, r1+1): unstruct_atomspec[(molname, r)] = this_atomspec else: if (molname, int(particle_name)) not in struct_residues: r = int(particle_name) this_atomspec = "#%d/%s:%d" % \ (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r) unstruct_atomspec[(molname, r)] = this_atomspec s += "show %s\n" % (" ".join(set(unstruct_atomspec.values()))) # ---------------------------------------------------------- # if crosslink data is supplied, write out a pseudobond file # ---------------------------------------------------------- if xl_fn is not None: # parse XL data df = pd.read_csv(os.path.abspath(xl_fn)) xls = [] for i in range(len(df)): this_df = df.iloc[i] p1 = this_df["protein1"] ; r1 = this_df["residue1"] p2 = this_df["protein2"] ; r2 = this_df["residue2"] sat = this_df["sat"] xls.append((p1, r1, p2, r2, sat)) # get lists of struct atomspecs atomspec = {} for (mol, particle_name) in rmf_ps: if "bead" in particle_name: continue if (mol, int(particle_name)) in unstruct_atomspec: continue chain_id = chain_ids[mol] resid = int(particle_name) atomspec[(mol, resid)] = "#%d/%s:%d@CA" % \ (CHIMERAX_PDB_MODEL_NUM, chain_id, resid) # now add in all the unstruct atomspecs atomspec.update(unstruct_atomspec) # write pseudobond script s_pb = "" s_pb += "; radius = %2.2f\n" % XL_RADIUS s_pb += "; dashes = 0\n" for xl in xls: p1, r1, p2, r2, sat = xl atomspec_1 = atomspec[(p1, r1)] atomspec_2 = atomspec[(p2, r2)] if atomspec_1 == atomspec_2: continue color = SAT_XL_COLOR if sat else VIOL_XL_COLOR s_pb += "%s %s %s\n" % (atomspec_1, atomspec_2, color) s_pb += "\n" pb_fn = outprefix + "_XLs.pb" with open(pb_fn, "w") as of: of.write(s_pb) s += "open %s\n" % pb_fn s += "preset 'overall look' publication\n" chimerax_out_fn = outprefix + ".cxc" with open(chimerax_out_fn, "w") as of: of.write(s)
def generate_pairwise_subunits_from_pdb(pdb_file_path, templates_path, file_type, verbose): """Take an existing complex and fragment it into each of the pairwise interactions between subunits. Keyword arguments: pdb_file_path -- path where the complex PDB is templates_path -- folder where the resulting folders will be saved file_type -- type of file verbose -- if a log of the program execution is saved Considerations: Does not consider nucleic acid sequences, it is only for testing the program on different complexes""" num_file = 0 if file_type == 'PDB': parser = pdb.PDBParser(PERMISSIVE=1) else: parser = pdb.MMCIFParser() structure = parser.get_structure('pdb_name', pdb_file_path) # give unique chain identifiers to a structure, it has to be similar to the ids of the chains used in build_complex, to be able to use further the structure_in_created_structures() function id_nch = 0 for chain in structure.get_chains(): actual_id = chain.id chain.id = (complete_chain_alphabet[id_nch] + '_', actual_id) id_nch += 1 # free the ./templates_path/ os.system('rm -rf ' + templates_path + '*') # initialize the saved pairs and structures saved_pairs = set() saved_structures = [] # loop through all possible pairwise files for chain1 in structure.get_chains(): for chain2 in structure.get_chains(): # the following strings define the pairs already saved comb = tuple(list(chain1.id) + list(chain2.id)) comb_rev = tuple(list(chain2.id) + list(chain1.id)) if chain1 is not chain2 and comb not in saved_pairs: # save the combination saved_pairs.add(comb) saved_pairs.add(comb_rev) # ask if any of the residues is interacting, if so save the PDB chains_interacting = False for residue1 in chain1: if chains_interacting is True: break for residue2 in chain2: if residue1 != residue2: # define which is the important residue of each chain: atoms1 = [x.id for x in residue1.get_atoms()] atoms2 = [x.id for x in residue2.get_atoms()] important_atom1 = None if 'CA' in atoms1: important_atom1 = residue1['CA'] elif 'P' in atoms1: important_atom1 = residue1['P'] important_atom2 = None if 'CA' in atoms2: important_atom2 = residue2['CA'] elif 'P' in atoms2: important_atom2 = residue2['P'] # compute the distance: if important_atom1 is not None and important_atom2 is not None: distance = important_atom1 - important_atom2 else: continue if distance < 7: chains_interacting = True break if chains_interacting is True: # create a structure object ID = str(num_file) num_file += 1 new_structure = pdb_struct.Structure(ID) new_model = pdb_model.Model(0) new_model.add(chain1.copy()) new_model.add(chain2.copy()) new_structure.add(new_model) # move the coordinates of the structure to simulate what would happen if they were coming from different files rotation = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) translation = np.array((0, 0, 1), 'f') for atom in new_structure.get_atoms(): atom.transform(rotation, translation) # write to new pdb: if structure_in_created_structures( new_structure, saved_structures) is False: # record as a saved structure: saved_structures.append(new_structure.copy()) # give unique chains to a structure (A and B) id_nch = 0 for chain in new_structure.get_chains(): chain.id = chain_alphabet[id_nch] id_nch += 1 if verbose: print( 'writing PDB file with the interaction of %s and %s into %s.pdb' % (chain1.id[1], chain2.id[1], ID)) # write using our customized writer io = pdb.PDBIO() io.set_structure(new_structure) io.save(templates_path + ID + '.pdb')