def get_gyration_radius(struct: Structure) -> float: """ Calculate radius of gyration protein https://en.wikipedia.org/wiki/Radius_of_gyration :param struct: structure of protein :return: radius of gyration """ mass_center = get_mass_center(struct) coords = [a.get_coord() for a in struct.get_atoms()] masses = [_get_atom_mass(a) for a in struct.get_atoms()] mr2_sum = 0 for r, m in zip(coords, masses): mr2_sum += m * (math.dist(r, mass_center))**2 return math.sqrt(mr2_sum / sum(masses))
def get_molecular_mass(struct: Structure) -> float: """ Calculate molecular mass of protein structure :param struct: structure of protein :return: molecular mass """ mass = 0 for atom in struct.get_atoms(): mass += _get_atom_mass(atom) return mass
def get_all_interaction_pairs(pdb_filename, print_files=True): """ Takes a pdb file path and generates a folder with all the pairs of interacting chains without checking if there is redundant content. This simulates the user input :param pdb_filename: pdb file with the structure we want to break into interactions :param print_files: parameter indicating if we want to output the interaction pairs to a directory. :return: a directory with pdb files of the interactions and a list with the first element being the list of all interactions, ... to finish this with adri """ parser = PDBParser(PERMISSIVE=1) # Load pdb structure to a pdb file structure_id = get_structure_name(pdb_filename) filename = pdb_filename structure = parser.get_structure(structure_id, filename) neighbor_chains = get_neighbor_chains(structure) # Create a new directory with the interaction pdb files if print_files: if not os.path.exists('%s_all_interactions' % structure_id): os.makedirs('%s_all_interactions' % structure_id) else: for the_file in os.listdir('%s_all_interactions' % structure_id): file_path = os.path.join('%s_all_interactions' % structure_id, the_file) if os.path.isfile(file_path): os.unlink(file_path) io = PDBIO() io.set_structure(structure) for chain in neighbor_chains: for other_chain in neighbor_chains[chain]: io.save( '%s_all_interactions/%s_%s%s.pdb' % (structure_id, structure_id, chain.get_id(), other_chain.get_id()), ChainSelect(chain, other_chain)) else: interaction_list = [] structure_counter = 0 for chain, neighbor in neighbor_chains.items(): for chain2 in neighbor: new_str = Structure.Structure( '%s_%s' % (structure_id, structure_counter)) structure_counter += 1 new_str.add(Model.Model(0)) new_str[0].add(chain) new_str[0].add(chain2) interaction_list.append(new_str) return [interaction_list, 's%s_all_interactions' % structure_id]
def get_structure_slice_by_residues(struct: Structure, domain_name: str, chain_order: int, start: int, finish: int) -> Structure: """ Return new structure that contains new model (id=1), new chain (id=1) with residues from 'start' to 'finish' of specified chain of input structure :param struct: input structure to slice :param chain_order: order of chain to extract residues :param start: start residue :param finish: finish residues :param domain_name: new structure name :return: new structure """ new_chain = Chain.Chain(1) chain = list(struct.get_chains())[chain_order] for i in range(start, finish + 1): new_chain.add(chain[i]) model = Model.Model(1) model.add(new_chain) domain = Structure.Structure(domain_name) domain.add(model) return domain
def get_mass_center(struct: Structure) -> tuple: """ Calculate mass center of protein structure :param struct: structure of protein :return: coordinates of mass center """ x_sum, y_sum, z_sum = 0, 0, 0 mass = get_molecular_mass(struct) for atom in struct.get_atoms(): atom_mass = _get_atom_mass(atom) x_sum += atom.get_coord()[0] * atom_mass y_sum += atom.get_coord()[1] * atom_mass z_sum += atom.get_coord()[2] * atom_mass return x_sum / mass, y_sum / mass, z_sum / mass
def get_geometric_center(struct: Structure) -> tuple: """ Calculate center of protein structure by arithmetic mean of all atoms coordinates :param struct: structure of protein :return: coordinates of center """ x_sum, y_sum, z_sum = 0, 0, 0 n = 0 for atom in struct.get_atoms(): n += 1 x_sum += atom.get_coord()[0] y_sum += atom.get_coord()[1] z_sum += atom.get_coord()[2] return x_sum / n, y_sum / n, z_sum / n
def dump_pdb(self, filename): ''' If the BulgeGraph has a chain created for it, dump that as well. @param filename: The filename of the pdb file to which the chain coordinates will be written. ''' if self.chain is None: return self.chain.child_list.sort() mod = bpm.Model(' ') s = bps.Structure(' ') mod.add(self.chain) s.add(mod) io = bp.PDBIO() io.set_structure(s) io.save(filename)
def initialize_res(residue): '''Creates a new structure containing a single amino acid. The type and geometry of the amino acid are determined by the argument, which has to be either a geometry object or a single-letter amino acid code. The amino acid will be placed into chain A of model 0.''' if isinstance(residue, Geo): geo = residue else: geo = geometry(residue) segID = 1 AA = geo.residue_name CA_N_length = geo.CA_N_length CA_C_length = geo.CA_C_length N_CA_C_angle = geo.N_CA_C_angle CA_coord = [0., 0., 0.] C_coord = [CA_C_length, 0, 0] N_coord = [ CA_N_length * math.cos(N_CA_C_angle * (math.pi / 180.0)), CA_N_length * math.sin(N_CA_C_angle * (math.pi / 180.0)), 0 ] N = Atom("N", N_coord, 0.0, 1.0, " ", " N", 0, "N") CA = Atom("CA", CA_coord, 0.0, 1.0, " ", " CA", 0, "C") C = Atom("C", C_coord, 0.0, 1.0, " ", " C", 0, "C") ##Create Carbonyl atom (to be moved later) C_O_length = geo.C_O_length CA_C_O_angle = geo.CA_C_O_angle N_CA_C_O_diangle = geo.N_CA_C_O_diangle carbonyl = calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle) O = Atom("O", carbonyl, 0.0, 1.0, " ", " O", 0, "O") if (AA == 'G'): res = makeGly(segID, N, CA, C, O, geo) elif (AA == 'A'): res = makeAla(segID, N, CA, C, O, geo) elif (AA == 'S'): res = makeSer(segID, N, CA, C, O, geo) elif (AA == 'C'): res = makeCys(segID, N, CA, C, O, geo) elif (AA == 'V'): res = makeVal(segID, N, CA, C, O, geo) elif (AA == 'I'): res = makeIle(segID, N, CA, C, O, geo) elif (AA == 'L'): res = makeLeu(segID, N, CA, C, O, geo) elif (AA == 'T'): res = makeThr(segID, N, CA, C, O, geo) elif (AA == 'R'): res = makeArg(segID, N, CA, C, O, geo) elif (AA == 'K'): res = makeLys(segID, N, CA, C, O, geo) elif (AA == 'D'): res = makeAsp(segID, N, CA, C, O, geo) elif (AA == 'E'): res = makeGlu(segID, N, CA, C, O, geo) elif (AA == 'N'): res = makeAsn(segID, N, CA, C, O, geo) elif (AA == 'Q'): res = makeGln(segID, N, CA, C, O, geo) elif (AA == 'M'): res = makeMet(segID, N, CA, C, O, geo) elif (AA == 'H'): res = makeHis(segID, N, CA, C, O, geo) elif (AA == 'P'): res = makePro(segID, N, CA, C, O, geo) elif (AA == 'F'): res = makePhe(segID, N, CA, C, O, geo) elif (AA == 'Y'): res = makeTyr(segID, N, CA, C, O, geo) elif (AA == 'W'): res = makeTrp(segID, N, CA, C, O, geo) else: res = makeGly(segID, N, CA, C, O, geo) cha = Chain('A') cha.add(res) mod = Model(0) mod.add(cha) struc = Structure('X') struc.add(mod) return struc
def initialize_res(residue): '''Creates a new structure containing a single amino acid. The type and geometry of the amino acid are determined by the argument, which has to be either a geometry object or a single-letter amino acid code. The amino acid will be placed into chain A of model 0.''' if isinstance( residue, Geo ): geo = residue else: geo=geometry(residue) segID=1 AA= geo.residue_name CA_N_length=geo.CA_N_length CA_C_length=geo.CA_C_length N_CA_C_angle=geo.N_CA_C_angle CA_coord= [0.,0.,0.] C_coord= [CA_C_length,0,0] N_coord = [CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0] N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N") CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C") C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C") ##Create Carbonyl atom (to be moved later) C_O_length=geo.C_O_length CA_C_O_angle=geo.CA_C_O_angle N_CA_C_O_diangle=geo.N_CA_C_O_diangle carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle) O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O") if(AA=='G'): res=makeGly(segID, N, CA, C, O, geo) elif(AA=='A'): res=makeAla(segID, N, CA, C, O, geo) elif(AA=='S'): res=makeSer(segID, N, CA, C, O, geo) elif(AA=='C'): res=makeCys(segID, N, CA, C, O, geo) elif(AA=='V'): res=makeVal(segID, N, CA, C, O, geo) elif(AA=='I'): res=makeIle(segID, N, CA, C, O, geo) elif(AA=='L'): res=makeLeu(segID, N, CA, C, O, geo) elif(AA=='T'): res=makeThr(segID, N, CA, C, O, geo) elif(AA=='R'): res=makeArg(segID, N, CA, C, O, geo) elif(AA=='K'): res=makeLys(segID, N, CA, C, O, geo) elif(AA=='D'): res=makeAsp(segID, N, CA, C, O, geo) elif(AA=='E'): res=makeGlu(segID, N, CA, C, O, geo) elif(AA=='N'): res=makeAsn(segID, N, CA, C, O, geo) elif(AA=='Q'): res=makeGln(segID, N, CA, C, O, geo) elif(AA=='M'): res=makeMet(segID, N, CA, C, O, geo) elif(AA=='H'): res=makeHis(segID, N, CA, C, O, geo) elif(AA=='P'): res=makePro(segID, N, CA, C, O, geo) elif(AA=='F'): res=makePhe(segID, N, CA, C, O, geo) elif(AA=='Y'): res=makeTyr(segID, N, CA, C, O, geo) elif(AA=='W'): res=makeTrp(segID, N, CA, C, O, geo) else: res=makeGly(segID, N, CA, C, O, geo) cha= Chain('A') cha.add(res) mod= Model(0) mod.add(cha) struc= Structure('X') struc.add(mod) return struc
def compare_interactions(interaction1, interaction2): structure1 = Structure.Structure('1') structure2 = Structure.Structure('2') structure1.add(Model.Model(0)) structure2.add(Model.Model(0)) homodimer = False for chain in interaction1: if len(list(structure1[0].get_chains())) == 1 and compare_chains( chain, list(structure1[0].get_chains())[0]): homodimer = True structure1[0].add(Chain.Chain(chain.get_id())) res_counter = 0 for residue in chain: if 'CA' in [x.get_id() for x in residue.get_atoms()]: atom = residue['CA'] structure1[0][chain.get_id()].add( Residue.Residue( ('', res_counter, ''), residue.get_resname(), residue.get_segid())) structure1[0][chain.get_id()][('', res_counter, '')].add(atom.copy()) res_counter += 1 for chain in interaction2: structure2[0].add(Chain.Chain(chain.get_id())) res_counter = 0 for residue in chain: if 'CA' in [x.get_id() for x in residue.get_atoms()]: atom = residue['CA'] structure2[0][chain.get_id()].add( Residue.Residue( ('', res_counter, ''), residue.get_resname(), residue.get_segid())) structure2[0][chain.get_id()][('', res_counter, '')].add(atom.copy()) res_counter += 1 if homodimer: for int in [structure1[0], structure2[0]]: trim_to_superimpose( list(int.get_chains())[0], list(int.get_chains())[1]) for chain1 in structure1[0]: for chain2 in structure2[0]: if chain1.get_id() != chain2.get_id(): continue trim_to_superimpose(chain1, chain2) # print(list(chain1.get_residues())[0]) # print(list(chain2.get_residues())[0]) # print(list(structure1.get_chains())) # print(list(structure2.get_chains())) result = str_comparison_superimpose(structure1, structure2) return result
def get_model_ids(structure: Structure): models = structure.get_models() model_list = [] for model in models: model_list.append(model.get_id()) return model_list
def generate_pairwise_subunits_from_pdb(pdb_file_path, templates_path, file_type, verbose): """Take an existing complex and fragment it into each of the pairwise interactions between subunits. Keyword arguments: pdb_file_path -- path where the complex PDB is templates_path -- folder where the resulting folders will be saved file_type -- type of file verbose -- if a log of the program execution is saved Considerations: Does not consider nucleic acid sequences, it is only for testing the program on different complexes""" num_file = 0 if file_type == 'PDB': parser = pdb.PDBParser(PERMISSIVE=1) else: parser = pdb.MMCIFParser() structure = parser.get_structure('pdb_name', pdb_file_path) # give unique chain identifiers to a structure, it has to be similar to the ids of the chains used in build_complex, to be able to use further the structure_in_created_structures() function id_nch = 0 for chain in structure.get_chains(): actual_id = chain.id chain.id = (complete_chain_alphabet[id_nch] + '_', actual_id) id_nch += 1 # free the ./templates_path/ os.system('rm -rf ' + templates_path + '*') # initialize the saved pairs and structures saved_pairs = set() saved_structures = [] # loop through all possible pairwise files for chain1 in structure.get_chains(): for chain2 in structure.get_chains(): # the following strings define the pairs already saved comb = tuple(list(chain1.id) + list(chain2.id)) comb_rev = tuple(list(chain2.id) + list(chain1.id)) if chain1 is not chain2 and comb not in saved_pairs: # save the combination saved_pairs.add(comb) saved_pairs.add(comb_rev) # ask if any of the residues is interacting, if so save the PDB chains_interacting = False for residue1 in chain1: if chains_interacting is True: break for residue2 in chain2: if residue1 != residue2: # define which is the important residue of each chain: atoms1 = [x.id for x in residue1.get_atoms()] atoms2 = [x.id for x in residue2.get_atoms()] important_atom1 = None if 'CA' in atoms1: important_atom1 = residue1['CA'] elif 'P' in atoms1: important_atom1 = residue1['P'] important_atom2 = None if 'CA' in atoms2: important_atom2 = residue2['CA'] elif 'P' in atoms2: important_atom2 = residue2['P'] # compute the distance: if important_atom1 is not None and important_atom2 is not None: distance = important_atom1 - important_atom2 else: continue if distance < 7: chains_interacting = True break if chains_interacting is True: # create a structure object ID = str(num_file) num_file += 1 new_structure = pdb_struct.Structure(ID) new_model = pdb_model.Model(0) new_model.add(chain1.copy()) new_model.add(chain2.copy()) new_structure.add(new_model) # move the coordinates of the structure to simulate what would happen if they were coming from different files rotation = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) translation = np.array((0, 0, 1), 'f') for atom in new_structure.get_atoms(): atom.transform(rotation, translation) # write to new pdb: if structure_in_created_structures( new_structure, saved_structures) is False: # record as a saved structure: saved_structures.append(new_structure.copy()) # give unique chains to a structure (A and B) id_nch = 0 for chain in new_structure.get_chains(): chain.id = chain_alphabet[id_nch] id_nch += 1 if verbose: print( 'writing PDB file with the interaction of %s and %s into %s.pdb' % (chain1.id[1], chain2.id[1], ID)) # write using our customized writer io = pdb.PDBIO() io.set_structure(new_structure) io.save(templates_path + ID + '.pdb')