def create_and_add_MBR_residue(pdb, memb_chain, highest_atom_number): rsd = MyPDB.Residue('MBR', 1, memb_chain) X, Y, Z = [25, -25, -25, 25, 25, -25, -25, 25], [25, 25, -25, -25, 25, 25, -25, -25], [15, 15, 15, 15, -15, -15, -15, -15] for i, x, y, z in zip(range(1, 9), X, Y, Z): rsd.add_atom( MyPDB.Atom(serial_num=highest_atom_number + i, name='C%i' % i, res_type_3='MBR', chain=memb_chain, res_seq_num=1, x=x, y=y, z=z, element='O' if z == 15 else 'N', charge='', occupancy=1, temp=1, header='HETATM', alternate='', achar='', si='')) chn = MyPDB.Chain(memb_chain, {1: rsd}) chn.add_residue(rsd) pdb.add_chain(chn)
def pdb2interface(pdb_name: str, chain: str): """ :param pdb: a pdb name :param chain: a chain :return: creates a list of residues for any interface of chain with other chains with seq isentity < 0.95 """ results = {} pdb_path = find_pdb(pdb_name) pdb = mp.parse_PDB(file_in=pdb_path, name=pdb_name, with_non_residue=False) if len(list(pdb.seqs.keys())) < 2: return None wanted_seq = pdb.seqs[chain] for ch_id, ch_seq in pdb.seqs.items(): if ch_id != chain: score = wanted_seq.align(ch_seq) if wanted_seq.aligned_identity(ch_seq) < 0.95: interface = mp.interface_residues(pdb[chain], pdb[ch_id]) results[ch_id] = sorted([a.res_num for a in interface]) out_path = '/'.join(pdb_path.split('/')[:-1])+'/'+pdb_name+'_'+chain+'_result.txt' with open(out_path, 'w+') as fout: fout.write('name %s\n' % pdb_name) fout.write('chain %s\n' % chain) for ch, inter in results.items(): fout.write('%s %s\n' % (ch, ' '.join([str(a) for a in inter]))) fout.write('seq %s %s\n' % (ch, pdb.seqs[ch].get_seq()))
def pdb2interface(pdb_name: str, chain: str): """ :param pdb: a pdb name :param chain: a chain :return: creates a list of residues for any interface of chain with other chains with seq isentity < 0.95 """ results = {} pdb_path = find_pdb(pdb_name) pdb = mp.parse_PDB(file_in=pdb_path, name=pdb_name, with_non_residue=False) if len(list(pdb.seqs.keys())) < 2: return None wanted_seq = pdb.seqs[chain] for ch_id, ch_seq in pdb.seqs.items(): if ch_id != chain: score = wanted_seq.align(ch_seq) if wanted_seq.aligned_identity(ch_seq) < 0.95: interface = mp.interface_residues(pdb[chain], pdb[ch_id]) results[ch_id] = sorted([a.res_num for a in interface]) out_path = '/'.join(pdb_path.split('/') [:-1]) + '/' + pdb_name + '_' + chain + '_result.txt' with open(out_path, 'w+') as fout: fout.write('name %s\n' % pdb_name) fout.write('chain %s\n' % chain) for ch, inter in results.items(): fout.write('%s %s\n' % (ch, ' '.join([str(a) for a in inter]))) fout.write('seq %s %s\n' % (ch, pdb.seqs[ch].get_seq()))
def main(): pdb_name = sys.argv[1] output_file = pdb_name.split('.pdb')[0] + '_MBR.pdb' pdb = MyPDB.parse_PDB(pdb_name) memb_chain = determine_membrane_chain(pdb) highet_atom_number = determine_highest_serial_num(pdb) create_and_add_MBR_residue(pdb, memb_chain, highet_atom_number) MyPDB.write_PDB(output_file, pdb)
def main(): pdb_name = sys.argv[1] output_file = pdb_name.split('.pdb')[0] + '_MBR.pdb' pdb = MyPDB.parse_PDB(pdb_name) memb_chain = determine_membrane_chain(pdb) highet_atom_number = determine_highest_serial_num(pdb) create_and_add_MBR_residue(pdb, memb_chain, highet_atom_number) MyPDB.write_PDB(output_file, pdb) append_CONECT_to_PDB(output_file, highet_atom_number)
def main(): pdb_name = sys.argv[1] chain = sys.argv[2] start, end = sys.argv[3], sys.argv[4] pdb = MyPDB.parse_PDB(pdb_name) for rid, r in pdb[chain]: if rid > 1: print(rid, r.phi(pdb[chain][rid - 1])) if rid < len(pdb[chain]): print(rid, r.psi(pdb[chain][rid + 1])) MyPDB.draw_ramachadran(pdb)
def main(): pdb_name = sys.argv[1] chain = sys.argv[2] start, end = sys.argv[3], sys.argv[4] pdb = MyPDB.parse_PDB(pdb_name) for rid, r in pdb[chain]: if rid > 1: print(rid, r.phi(pdb[chain][rid-1])) if rid < len(pdb[chain]): print(rid, r.psi(pdb[chain][rid+1])) MyPDB.draw_ramachadran(pdb)
def parse_residues_file(self) -> dict: pdb = mp.parse_PDB( '/home/labs/fleishman/jonathaw/temp_residue_data/RKDS.txt') for res in pdb['A'].values(): mpf.translate_and_rotate_res_to_xy_plane( res, self.main_residue_atoms[res.res_type]) self.residues[res.res_type] = res
def draw_hbonds_profiles(): parser = argparse.ArgumentParser() parser.add_argument('-pdb') parser.add_argument('-stage', type=int) args = vars(parser.parse_args()) pdb = my.parse_PDB(args['pdb']) if args['stage'] == 1: seq_length = pdb.seq_length() command = "for i in `seq 1 %i`;do ~/bin/fleish_sub_general.sh /home/labs/fleishman/jonathaw/Rosetta/main/source/bin/rosetta_scripts.default.linuxgccrelease -parser:protocol ~/elazaridis/protocols/scan_hbonds.xml -s %s -mp:scoring:hbond -corrections::beta_nov15 -score:elec_memb_sig_die -score:memb_fa_sol -overwrite -out:prefix ${i}_ -script_vars energy_function=beta_nov15_elazaridis res_num=${i} s1=%i e1=%i ori1=%s s2=%i e2=%i ori2=%s ;done" % (seq_length, args['pdb'], 1, 24, 'out2in', 25, 48, 'out2in') print('issuing command\n%s' % command) os.system(command) if args['stage'] == 2: os.system("head -2 1_score.sc|tail -1 > all_score.sc") os.system("grep SCORE: *_score.sc|grep -v des >> all_score.sc") z_dict = {id: res.memb_z for id, res in pdb.res_items()} pos_dict = {v: k for k, v in z_dict.items()} sc_df = Rf.score_file2df('all_score.sc') zs, scs = [], [] for d, sc in zip(sc_df['description'].values, sc_df['a_e_res']): zs.append(z_dict[ int( d.split('_')[0] ) ]) scs.append(sc) plt.scatter(zs, scs) for z, sc in zip(zs, scs): if z is not None: plt.annotate(pos_dict[z], xy=(z, sc), xytext=(-20, 20), textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) plt.show()
def by_xyz(args): print(args['xyz']) i = args['num'] j = args['res_num'] for l in open(args['xyz'], 'r'): s = l.rstrip().split() a = MyPDB.Atom('HETATM', i, args['name'], "", args['res_name'], args['chain'], j, float(s[0]), float(s[1]), float(s[2]), "", charge=0, element="O", si="", occupancy=1, temp=0) print(a) i += 1 j += 1
def by_file(args): atoms = [] for l in open(args['f'], 'r'): s = l.rstrip().split() header = s[1] num = int(s[2]) name = s[3] res_name = s[4] chain = s[5] res_num = int(s[6]) x = float(s[7]) y = float(s[8]) z = float(s[9]) a = MyPDB.Atom(header, num, name, "", res_name, chain, res_num, x, y, z, "", charge=0, element="O", si="", occupancy=1, temp=0) print(a)
def test_symm_axis_interface(e: Entry, ch: str) -> bool: print(e) inter_resi = e.interface[ch] print(e.symm_segs) symm_resi = [a for seg in e.symm_segs for a in range(seg[0], seg[1]+1)] print('symm_resi', symm_resi) print(inter_resi) pdb = mp.parse_PDB(find_pdb(e.pdb), with_non_residue=False) symm_com = mp.com_residues(chain=pdb[e.chain], residues=symm_resi) print(symm_resi, symm_com) pdb.translate_xyz(mp.XYZ(0, 0, 0)-symm_com) print('pdb translated') print('writing pdb %s_%s_translated.pdb' % (e.pdb, e.chain)) mp.write_PDB('%s_%s_translated.pdb' % (e.pdb, e.chain), pdb) print('now symm com is at', mp.com_residues(chain=pdb[e.chain], residues=symm_resi)) print('symm axis XYZ %r' % (e.symm_axis)) print_pymol_select(e)
def parse_symm_db(args): """ :param args: :return: list of Entry instances that are significant in the CE database """ results = [] with open(work_dir + 'ce_symm.xml', 'r') as fin: xml = fin.read().split('data') ABC = 'QWERTYUIOPASDFGHJKLZXCVBNM' for data in xml: axis = {} significant = False for l in data.split('\n'): if 'isSig' in l: significant = l.split('>')[1].split('<')[0] == 'true' if not significant: break if '<x>' in l: axis['x'] = float(l.split('>')[1].split('<')[0]) if '<y>' in l: axis['y'] = float(l.split('>')[1].split('<')[0]) if '<z>' in l: axis['z'] = float(l.split('>')[1].split('<')[0]) if 'protodomain' in l: try: s = l.split('>')[1].split('<')[0].split(',') pdb = s[0][:4] chain = s[0][5] if len(s) < 2: continue segs = [] for s_ in s: if '_-' not in s_: start = int(''.join([ a for a in s_.split('_')[1].split('-')[0] if a not in ABC ])) end = int(''.join([ a for a in s_.split('_')[1].split('-')[1] if a not in ABC ])) if '_-' in s_: start = int(''.join([ a for a in s_.split('_-')[1].split('-')[0] if a not in ABC ])) end = int(''.join([ a for a in s_.split('_-')[1].split('-')[1] if a not in ABC ])) segs.append((start, end)) results.append( Entry(pdb, chain, segs, mp.XYZ(axis['x'], axis['y'], axis['z']))) except: print('SKIPPING', s) return results
def create_pdb_AA_AA_d_z(aa1: str, aa2: str, d: float, z: float, res_maker, path: str = "./") -> None: """ create a pdb with residues aa1 and aa2 at the XY plane at Z=z and distance d """ res1 = res_maker.get_residue(aa1) res2 = res_maker.get_residue(aa2) res1.change_chain_name("A") res2.change_chain_name("B") # rotate around Z to oppose axis_z = mp.XYZ(0, 0, 1) res2.dot_matrix_me(mpf.rotation_matrix_around_vec(axis_z, np.pi)) # translate to get d distance move_d = mp.XYZ( 0, res1[res_maker.main_residue_atoms[res1.res_type][1]].xyz.y - res2[res_maker.main_residue_atoms[res2.res_type][1]].xyz.y + d, 0, ) res2.translate_xyz(move_d) # translate all to z move_z = mp.XYZ(0, 0, z) res1.translate_xyz(move_z) res2.translate_xyz(move_z) # setup in a MyPDB instance, and renumber and write pdb = mp.MyPDB() for res in [res1, res2]: for a in res.values(): pdb.add_atom(a) pdb.renumber() mp.write_PDB("%s/%s_%s_%.2f_%.2f.pdb" % (path, res1.res_type, res2.res_type, d, z), pdb)
def translate_and_rotate_res_to_xy_plane(res: mp.Residue, atom_list: list) -> mp.Residue: xyz = copy.deepcopy(res[atom_list[0]].xyz) xyz = xyz.scalar_multi(-1) res.translate_xyz(xyz) # rotate a1 to xy plane proj_a2_xy = copy.deepcopy(res[atom_list[1]].xyz) proj_a2_xy.z = 0 a2_copy = copy.deepcopy(res[atom_list[1]].xyz) ang_a2_xy = np.arccos(a2_copy.unit().dot(proj_a2_xy.unit())) axis_a2_xy = a2_copy.unit().cross(proj_a2_xy.unit()).unit() rotation_matrix = rotation_matrix_around_vec(axis_a2_xy, ang_a2_xy) res.dot_matrix_me(rotation_matrix) # rotate a2 to xy plane proj_a2_xy = copy.deepcopy(res[atom_list[2]].xyz) proj_a2_xy.z = 0 a2_copy = copy.deepcopy(res[atom_list[2]].xyz) ang_a2_xy = np.arccos(a2_copy.unit().dot(proj_a2_xy.unit())) closest_point = point_on_normed_vec_closest_to_point( proj_a2_xy.as_nparray(), res[atom_list[1]].xyz.unit().as_nparray()) ang_a2_xy = angle_between_3_XYZs(a2_copy, closest_point, proj_a2_xy) axis = copy.deepcopy(res[atom_list[1]].xyz) rotation_matrix = rotation_matrix_around_vec(axis, -ang_a2_xy) res.dot_matrix_me(rotation_matrix) # rotate so that a2 and 3 are on both sides of the Y axis a1_copy = res[atom_list[0]].xyz a2_copy = res[atom_list[1]].xyz a3_copy = res[atom_list[2]].xyz ang_312 = angle_between_3_XYZs(a3_copy, a1_copy, a2_copy) ang_y12 = angle_between_3_XYZs(mp.XYZ(0, 1, 0), a1_copy, a2_copy) axis = mp.XYZ(0, 0, 1) rotation_matrix = rotation_matrix_around_vec(axis, -(ang_y12 + 0.5 * ang_312)) res.dot_matrix_me(rotation_matrix)
def main(): pdb_name = sys.argv[1] residue_num = int(sys.argv[2]) pdb = MyPDB.parse_PDB(pdb_name) res = pdb['A'][residue_num] CO = np.array([res['C'].xyz.x, res['C'].xyz.y, res['C'].xyz.z]) CA = np.array([res['CA'].xyz.x, res['CA'].xyz.y, res['CA'].xyz.z]) CB = np.array([res['CB'].xyz.x, res['CB'].xyz.y, res['CB'].xyz.z]) N = np.array([res['N'].xyz.x, res['N'].xyz.y, res['N'].xyz.z]) # HA = np.array([res['HA'].xyz.x, res['HA'].xyz.y, res['HA'].xyz.z]) v1 = N - CO v2 = CA - CO cp = np.cross(v1, v2) # HA_infront = cp.dot(HA-CA) > 0 CB_infront = cp.dot(CB-CA) > 0 print('residue %r, is %s ' % (res, res.D_or_L()))
def main(): pdb_name = sys.argv[1] residue_num = int(sys.argv[2]) pdb = MyPDB.parse_PDB(pdb_name) res = pdb['A'][residue_num] CO = np.array([res['C'].xyz.x, res['C'].xyz.y, res['C'].xyz.z]) CA = np.array([res['CA'].xyz.x, res['CA'].xyz.y, res['CA'].xyz.z]) CB = np.array([res['CB'].xyz.x, res['CB'].xyz.y, res['CB'].xyz.z]) N = np.array([res['N'].xyz.x, res['N'].xyz.y, res['N'].xyz.z]) # HA = np.array([res['HA'].xyz.x, res['HA'].xyz.y, res['HA'].xyz.z]) v1 = N - CO v2 = CA - CO cp = np.cross(v1, v2) # HA_infront = cp.dot(HA-CA) > 0 CB_infront = cp.dot(CB - CA) > 0 print('residue %r, is %s ' % (res, res.D_or_L()))
def test_symm_axis_interface(e: Entry, ch: str) -> bool: print(e) inter_resi = e.interface[ch] print(e.symm_segs) symm_resi = [a for seg in e.symm_segs for a in range(seg[0], seg[1] + 1)] print('symm_resi', symm_resi) print(inter_resi) pdb = mp.parse_PDB(find_pdb(e.pdb), with_non_residue=False) symm_com = mp.com_residues(chain=pdb[e.chain], residues=symm_resi) print(symm_resi, symm_com) pdb.translate_xyz(mp.XYZ(0, 0, 0) - symm_com) print('pdb translated') print('writing pdb %s_%s_translated.pdb' % (e.pdb, e.chain)) mp.write_PDB('%s_%s_translated.pdb' % (e.pdb, e.chain), pdb) print('now symm com is at', mp.com_residues(chain=pdb[e.chain], residues=symm_resi)) print('symm axis XYZ %r' % (e.symm_axis)) print_pymol_select(e)
def create_pdb_AA_AA_d_z(aa1: str, aa2: str, d: float, z: float, res_maker, path: str = './') -> None: """ create a pdb with residues aa1 and aa2 at the XY plane at Z=z and distance d """ res1 = res_maker.get_residue(aa1) res2 = res_maker.get_residue(aa2) res1.change_chain_name('A') res2.change_chain_name('B') # rotate around Z to oppose axis_z = mp.XYZ(0, 0, 1) res2.dot_matrix_me(mpf.rotation_matrix_around_vec(axis_z, np.pi)) # translate to get d distance move_d = mp.XYZ( 0, res1[res_maker.main_residue_atoms[res1.res_type][1]].xyz.y - res2[res_maker.main_residue_atoms[res2.res_type][1]].xyz.y + d, 0) res2.translate_xyz(move_d) # translate all to z move_z = mp.XYZ(0, 0, z) res1.translate_xyz(move_z) res2.translate_xyz(move_z) # setup in a MyPDB instance, and renumber and write pdb = mp.MyPDB() for res in [res1, res2]: for a in res.values(): pdb.add_atom(a) pdb.renumber() mp.write_PDB( '%s/%s_%s_%.2f_%.2f.pdb' % (path, res1.res_type, res2.res_type, d, z), pdb)
def point_on_normed_vec_closest_to_point(p: np.array, v: np.array) -> mp.XYZ: vec = v * np.dot(p, v) return mp.XYZ(vec[0], vec[1], vec[2])
#!/usr/bin/env python3.5 __author__ = 'jonathan' from AASeq import compare_2_seqs import seq_funcs as sf import MyPDB if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('-mode', type=str, default='pdbs', help='different running modes.\npdbs=from two pdbs') parser.add_argument('-1', type=str, help='first entry') parser.add_argument('-2', type=str, help='second entry') args = vars(parser.parse_args()) if args['mode'] == 'pdbs': pdb_1 = MyPDB.parse_PDB(args['1'], args['1']) pdb_2 = MyPDB.parse_PDB(args['2'], args['2']) seq_1 = MyPDB.extract_seq(pdb_1) seq_2 = MyPDB.extract_seq(pdb_2) start = 0 sorted_keys = sorted(seq_1.keys()) for k in sorted_keys: compare_2_seqs(seq_1[k], seq_2[k], start=start) start += len(seq_1[k]) print(start, len(seq_1[k]))
def create_and_add_MBR_residue(pdb, memb_chain, highest_atom_number): inn = MyPDB.Residue('INN', 1, memb_chain) atom_num = 1 i = 1 for x in range(-30, 30): for y in range(-30, 30): inn.add_atom( MyPDB.Atom(serial_num=highest_atom_number + atom_num, name='%i' % i, res_type_3='INN', chain=memb_chain, res_seq_num=1, x=x, y=y, z=-15, element='N', charge='', occupancy=1, temp=1, header='HETATM', alternate='', achar='', si='')) atom_num += 1 i += 1 out = MyPDB.Residue('OUT', 2, memb_chain) i = 1 for x in range(-30, 30): for y in range(-30, 30): out.add_atom( MyPDB.Atom(serial_num=highest_atom_number + atom_num, name='%i' % i, res_type_3='MID', chain=memb_chain, res_seq_num=2, x=x, y=y, z=15, element='O', charge='', occupancy=1, temp=1, header='HETATM', alternate='', achar='', si='')) atom_num += 1 i += 1 mid = MyPDB.Residue('MID', 3, memb_chain) i = 1 for x in range(-30, 30): for y in range(-30, 30): mid.add_atom( MyPDB.Atom(serial_num=highest_atom_number + atom_num, name='%i' % i, res_type_3='MID', chain=memb_chain, res_seq_num=3, x=x, y=y, z=0, element='C', charge='', occupancy=1, temp=1, header='HETATM', alternate='', achar='', si='')) atom_num += 1 i += 1 chn = MyPDB.Chain(memb_chain, {1: inn, 2: out, 3: mid}) chn.add_residue(inn) chn.add_residue(out) chn.add_residue(mid) pdb.add_chain(chn)
def parse_residues_file(self) -> dict: pdb = mp.parse_PDB("/home/labs/fleishman/jonathaw/temp_residue_data/RKDS.txt") for res in pdb["A"].values(): mpf.translate_and_rotate_res_to_xy_plane(res, self.main_residue_atoms[res.res_type]) self.residues[res.res_type] = res