def main(): patch3p = read_patch(sys.argv[-2]) patch4p = read_patch(sys.argv[-1]) dic_4_3 = OrderedDict() for p4 in patch4p: dic_4_3[tuple(p4)] = [] for p3 in patch3p: if compare_patch(p3,p4): dic_4_3[tuple(p4)].append(p3) dic_3_4 = OrderedDict() for p3 in patch3p: dic_3_4[tuple(p3)] = [] for p4 in patch4p: if compare_patch(p3,p4): dic_3_4[tuple(p3)].append(p4) with lt.open_file('dic_4_3') as w_f: lt.print_dic(dic_4_3,nest=0,output=w_f) with lt.open_file('dic_3_4') as w_f: lt.print_dic(dic_3_4,nest=0,output=w_f) non_unique_3 = set([tuple(s) for ss in dic_4_3.values() for s in ss]) unique_3 = set([tuple(p) for p in patch3p]).difference(non_unique_3) unique_3 = [(int(u[0]),u[1],u[2]) for u in unique_3] unique_3 = sorted(unique_3,reverse=True) with lt.open_file('unique_3') as w_f: for p in unique_3: print >> w_f, p
def write_result(res_neighbors, file_suffix): pdbs = list(set([pdb_id for pdb_id,_,_ in res_neighbors])) with lt.open_file(file_name='pdb_id', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: print >> w_f, 'num:', '\t', len(pdbs) for pdb in pdbs: print >> w_f, pdb, '\t',domain(pdb) res_neighbors = sorted(res_neighbors) with lt.open_file(file_name='res_neighbors', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: for pdb_id, res, neighbors in res_neighbors: print >> w_f, '{0:<8}{1:<10}{2:<20}{3}'.format( pdb_id, domain(pdb_id), res, ' '.join(neighbors)) HASH = {'VAL': 'V', 'ILE': 'I', 'LEU': 'L', 'GLU': 'E', 'GLN': 'Q', 'ASP': 'D', 'ASN': 'N', 'HIS': 'H', 'TRP': 'W', 'PHE': 'F', 'TYR': 'Y', 'ARG': 'R', 'LYS': 'K', 'SER': 'S', 'THR': 'T', 'MET': 'M', 'ALA': 'A', 'GLY': 'G', 'PRO': 'P', 'CYS': 'C'} neighbor_pdbs = {} for pdb_id, res, neighbors in res_neighbors: neighbors = [n.split('_')[0] for n in neighbors] neighbors = [HASH[n] for n in neighbors] neighbors = ''.join(sorted(neighbors)) if not neighbors in neighbor_pdbs.keys(): neighbor_pdbs[neighbors] = [pdb_id] else: if not pdb_id in neighbor_pdbs[neighbors]: neighbor_pdbs[neighbors].append(pdb_id) neighbor_pdbs = [(len(pdbs), neighbors, pdbs) for neighbors, pdbs in neighbor_pdbs.iteritems()] neighbor_pdbs = sorted(neighbor_pdbs, reverse=True) with lt.open_file(file_name='neighbor_pdbs', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: for _, neighbors, pdbs in neighbor_pdbs: pdbs = [pdb+'_'+domain(pdb) for pdb in pdbs if domain(pdb)] print >> w_f, '{0:<10}{1}'.format(neighbors, ' '.join(pdbs))
def write_result(res_neighbors,filename): pro_ids = set([pro for pro,_,_ in res_neighbors]) with lt.open_file(file_name='hbplus_salt_combine_'+filename+'_pdb') as w_f: for p in pro_ids: print >> w_f,p with lt.open_file(file_name='hbplus_salt_combine_'+filename) as w_f: for pro, phos_res, neighbor_res in res_neighbors: print >> w_f, '{0:<8}{1:<15}{2}'.format( pro, phos_res, ', '.join(neighbor_res))
def write_initial(hbs): with lt.open_file(file_name='hbplus_initial') as w_f: for pro, phos_res, neighbor_res, _ in hbs: print >> w_f, '{0:<8}{1:<15}{2}'.format(pro, phos_res, ', '.join(neighbor_res)) with lt.open_file(file_name='hbplus_initial_proof') as w_f: for pro, phos_res, neighbor_res, lines in hbs: print >> w_f, '-' * 80 print >> w_f, '{0:<8}{1:<15}{2}'.format(pro, phos_res, ', '.join(neighbor_res)) print >> w_f, '-' * 80 for line in lines: print >> w_f, line
def write_result(res_neighbors, file_suffix): pdbs = list(set([pdb_id for pdb_id, _, _ in res_neighbors])) with lt.open_file(file_name='pdb_id', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: print >> w_f, 'num:', '\t', len(pdbs) for pdb in pdbs: print >> w_f, pdb res_neighbors = sorted(res_neighbors) with lt.open_file(file_name='res_neighbors', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: for pdb_id, res, neighbors in res_neighbors: print >> w_f, '{0:<20}{1:<20}{2}'.format(pdb_id, res, ' '.join(neighbors))
def main(): with lt.open_file() as write_f: for f in lt.files_in_dir(sys.argv[-1]): pdb_id = os.path.split(f)[1].split('.')[0] ligands = check_ligands(f) if len(ligands) > 0: print >> write_f, pdb_id, '\t', ' '.join(ligands)
def write_result(tem_all_hots, tem_hots, tem_wdsp, all_hots, all_wdsp, tem_repeats_similarity, all_repeats_similarity, tem_all_seq_similarity, cutoff=0): for tem_pro, all_pros in tem_all_hots.iteritems(): f_name = tem_pro + '_similar_hotspots_' + str(cutoff) with lt.open_file(f_name) as w_f: print >> w_f, '{0:<20}{1:<15}{2:<18}{3:<15}{4:<15}{5:<15}{6:<15}{7:<}'.format( 'protein_id', 'identity', 'seq_similarity', "hotspot_num", 'repeat_length', "repeats_sim", 'tetrad_num', 'hotspots') print >> w_f, '{0:<20}{1:<15}{2:<18}{3:<15}{4:<15}{5:<15}{6:<15}{7:<}'.format( tem_pro, '100', '100', len(tem_hots[tem_pro]), tem_wdsp.repeat_num[tem_pro], tem_repeats_similarity[tem_pro][0], tem_wdsp.tetrad_num[tem_pro], ' '.join(tem_hots[tem_pro])) for pro, identity in all_pros: print >> w_f, '{0:<20}{1:<15}{2:<18}{3:<15}{4:<15}{5:<15}{6:<15}{7:<}'.format( pro, identity, tem_all_seq_similarity[tem_pro][pro], len(all_hots[pro]), all_wdsp.repeat_num[pro], all_repeats_similarity[pro][0], all_wdsp.tetrad_num[pro], ' '.join(all_hots[pro])) f_name = tem_pro + '_similar_hotspots_wdsp' + str(cutoff) with lt.open_file(f_name, file_extension='.wdsp') as w_f: for line in tem_wdsp.wdsps[tem_pro]: print >> w_f, line for pro, identity in all_pros: for line in all_wdsp.wdsps[pro]: print >> w_f, line f_name = tem_pro + '_similar_hotspots_seq' + str(cutoff) with lt.open_file(f_name) as w_f: print >> w_f, '>', tem_pro seq = tem_wdsp.seqs[tem_pro] for s in [seq[i:i + 80] for i in range(0, len(seq), 80)]: print >> w_f, s for pro in all_pros: print >> w_f, '>', pro[0] seq = all_wdsp.seqs[pro[0]] for s in [seq[i:i + 80] for i in range(0, len(seq), 80)]: print >> w_f, s
def main(): res_neighbors = read_pickle() with lt.open_file(file_name='hbplus_salt_combine_initial') as w_f: for pro, phos_res, neighbor_res in res_neighbors: print >> w_f, '{0:<8}{1:<15}{2}'.format(pro, phos_res, ', '.join(neighbor_res)) lt.pickle_dump(res_neighbors, 'hbplus_salt_combine')
def write_result(tem_all_hots, tem_hots, all_hots, cutoff=0.3): for tem_pro, all_pros in tem_all_hots.iteritems(): f_name = tem_pro + '_similar_hotspots_' + str(cutoff) with lt.open_file(f_name) as w_f: print >> w_f, '{0:<20}{1:<10}{2:<}'.format( tem_pro, " ",' '.join(tem_hots[tem_pro])) for pro,identity in all_pros: print >> w_f, '{0:<20}{1:<10.2f}{2:<}'.format( pro, identity, ' '.join(all_hots[pro]))
def write_result(tem_all_hots, tem_hots, all_hots, b, h, cutoff=1): for tem_pro, all_pros in tem_all_hots.iteritems(): f_name = tem_pro + '_similar_hotspots_b' + str(b) + 'h' + str(h) with lt.open_file(f_name) as w_f: print >> w_f, '{0:<20}{1:<}'.format( tem_pro, ' '.join(tem_hots[tem_pro])) for pro in all_pros: print >> w_f, '{0:<20}{1:<}'.format( pro, ' '.join(all_hots[pro]))
def main(): with open(sys.argv[-2]) as o_f: tem = Wdsp(o_f) tem_seq = tem.seqs with open(sys.argv[-1]) as o_f: all1 = Wdsp(o_f) all_seq = all1.seqs similarity = OrderedDict() for t_name, t_seq in tem_seq.iteritems(): sim = [] for a_name, a_seq in all_seq.iteritems(): sim.append((a_name, align(t_seq, a_seq))) # sim = sorted(sim, key=operator.itemgetter(1),reverse=True) similarity[t_name] = sim for k, v in similarity.iteritems(): with lt.open_file(k) as w_f: for a_name, a_identity in v: print >> w_f, '{0:<15}{1:<}'.format(a_name, a_identity)
def main(): null = open(os.devnull, 'w') # sys.stderr = null """ """ res_neighbors = [] for pdb_f in lt.files_in_dir(sys.argv[-1]): f_path, f_name = os.path.split(pdb_f) f_name, f_exten = os.path.splitext(f_name) if f_exten == '.pdb': pdb_id = f_name res_neighbors.extend(pdb_neighbors(pdb_f, pdb_id)) write_result(res_neighbors, file_suffix='original') sta = [len(v) for k, v in res_neighbors.iteritems()] sta = lt.lis_sta(sta) with lt.open_file('phos_neighbors_num') as w_f: for k, v in sta.items(): print >> '{0:<10}{1:<10}'.format(k, v)
def write_sta_result(res_neighbors,filename): HASH = {'VAL': 'V', 'ILE': 'I', 'LEU': 'L', 'GLU': 'E', 'GLN': 'Q', 'ASP': 'D', 'ASN': 'N', 'HIS': 'H', 'TRP': 'W', 'PHE': 'F', 'TYR': 'Y', 'ARG': 'R', 'LYS': 'K', 'SER': 'S', 'THR': 'T', 'MET': 'M', 'ALA': 'A', 'GLY': 'G', 'PRO': 'P', 'CYS': 'C','HOH':'O'} neighbor_pdbs = {} for pdb, res, neighbors in res_neighbors: neighbors = [n.split('_')[0] for n in neighbors] neighbors = [HASH.get(n,'*') for n in neighbors] neighbors = ''.join(sorted(neighbors)) if not neighbors in neighbor_pdbs.keys(): neighbor_pdbs[neighbors] = [(pdb,res)] else: if not (pdb,res)in neighbor_pdbs[neighbors]: neighbor_pdbs[neighbors].append((pdb,res)) neighbor_pdbs = [(len(neighbors),neighbors, pdbs) for neighbors, pdbs in neighbor_pdbs.iteritems()] neighbor_pdbs = sorted(neighbor_pdbs) with lt.open_file(file_name='hbplus_salt_combine_'+filename+'_sta') as w_f: for _,neighbors,pdbs in neighbor_pdbs: print >> w_f,'{0:<20}{1:<}'.format(neighbors,pdbs)
def main(): null = open(os.devnull, 'w') sys.stderr = null dist = [] for pdb_f in lt.files_in_dir(sys.argv[-1]): pdb_id = pdb_f[-8:-4] p_dist = pdb_dist(pdb_f, pdb_id) dist += p_dist def myround(n): n = np.round(n, 1) a, b = str(n).split('.') n = a + '.' + b[0] return float(n) dist = sorted(dist) dist = [myround(d) for d in dist] dist = [d for d in dist if d < 10.0] sta = lt.lis_sta(dist) with lt.open_file(file_suffix='dist_sta') as w_f: for num, freq in sta: print >> w_f, '{0:<10}{1}'.format(num, freq)
# -*- coding: utf-8 -*- """ calculate statistics for WDSP output file usage: python wdsp_sta.py wdsp_f """ import lt import sys import os from wdsp import Wdsp with open(sys.argv[-1]) as o_f: wdsp = Wdsp(o_f) scores_sta = lt.lis_sta(wdsp.scores.values()) with lt.open_file(file_suffix='total_score_sta') as w_f: for num, freq in scores_sta: print >> w_f, '{0:<10}{1}'.format(num, freq) tetrad_sta = [ len([vi for vi in v if vi >= 44.0]) for k, v in wdsp.blade_scores.iteritems() ] tetrad_sta = lt.lis_sta(tetrad_sta) with lt.open_file(file_suffix='tetrad_num_sta') as w_f: for num, freq in tetrad_sta: print >> w_f, '{0:<5}{1}'.format(num, freq) blades_sta = [len(blades) for pro, blades in wdsp.blades.iteritems()] blades_sta = lt.lis_sta(blades_sta) with lt.open_file(file_suffix='blades_sta') as w_f:
def write_result(res_neighbors, file_suffix): pdbs = list(set([pdb_id for pdb_id, _, _ in res_neighbors])) with lt.open_file(file_name='pdb_id', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: print >> w_f, 'num:', '\t', len(pdbs) for pdb in pdbs: print >> w_f, pdb res_neighbors = sorted(res_neighbors) with lt.open_file(file_name='res_neighbors', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: for pdb_id, res, neighbors in res_neighbors: print >> w_f, '{0:<20}{1:<20}{2}'.format(pdb_id, res, ' '.join(neighbors)) # do not distinguish between S and T # do not distinguish between G A V I L F P C M , because these residues all # use main chain to form hydrogen-bond HASH = { 'VAL': 'G', 'ILE': 'G', 'LEU': 'G', 'GLU': 'G', 'GLN': 'Q', 'ASP': 'G', 'ASN': 'N', 'HIS': 'H', 'TRP': 'W', 'PHE': 'G', 'TYR': 'Y', 'ARG': 'R', 'LYS': 'K', 'SER': 'S', 'THR': 'T', 'MET': 'G', 'ALA': 'G', 'GLY': 'G', 'PRO': 'G', 'CYS': 'G', 'HOH': 'O' } neighbor_pdbs = {} for pdb_id, res, neighbors in res_neighbors: neighbors = [n.split('_')[0] for n in neighbors] neighbors = [HASH.get(n, '*') for n in neighbors] neighbors = ''.join(sorted(neighbors)) if not neighbors in neighbor_pdbs.keys(): neighbor_pdbs[neighbors] = [pdb_id] else: if not pdb_id in neighbor_pdbs[neighbors]: neighbor_pdbs[neighbors].append(pdb_id) neighbor_pdbs = [(len(pdbs), neighbors, pdbs) for neighbors, pdbs in neighbor_pdbs.iteritems()] neighbor_pdbs = sorted(neighbor_pdbs, reverse=True) with lt.open_file(file_name='neighbor_pdbs', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: for _, neighbors, pdbs in neighbor_pdbs: pdbs = [pdb for pdb in pdbs] print >> w_f, '{0:<20}{1}'.format(neighbors, ','.join(pdbs))
def main(): null = open(os.devnull, 'w') # sys.stderr = null """ write original center:neighbors write pdb_ids write neighbors:pdbs """ res_neighbors = [] for pdb_f in lt.files_in_dir(sys.argv[-1]): f_path, f_name = os.path.split(pdb_f) f_name, f_exten = os.path.splitext(f_name) if f_exten == '.pdb': pdb_id = f_name res_neighbors.extend(pdb_neighbors(pdb_f, pdb_id)) write_result(res_neighbors, file_suffix='original') #neighbors = 1 res_neighbors1 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 1] write_result(res_neighbors1, file_suffix='1_neighbor') #neighbors = 2 res_neighbors2 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 2] write_result(res_neighbors2, file_suffix='2_neighbor') #neighbors = 3 res_neighbors3 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 3] write_result(res_neighbors3, file_suffix='3_neighbor') #neighbors = 4 res_neighbors4 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 4] write_result(res_neighbors4, file_suffix='4_neighbor') #neighbors = 5 res_neighbors5 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 5] write_result(res_neighbors5, file_suffix='5_neighbor') #neighbors = 6 res_neighbors6 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 6] write_result(res_neighbors6, file_suffix='6_neighbor') #neighbors > 6 res_neighbors7 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) > 6] write_result(res_neighbors7, file_suffix='7_neighbor') num1 = len(set([pdb_id for pdb_id, _, _ in res_neighbors1])) num2 = len(set([pdb_id for pdb_id, _, _ in res_neighbors2])) num3 = len(set([pdb_id for pdb_id, _, _ in res_neighbors3])) num4 = len(set([pdb_id for pdb_id, _, _ in res_neighbors4])) num5 = len(set([pdb_id for pdb_id, _, _ in res_neighbors5])) num6 = len(set([pdb_id for pdb_id, _, _ in res_neighbors6])) num7 = len(set([pdb_id for pdb_id, _, _ in res_neighbors7])) sta = [num1, num2, num3, num4, num5, num6, num7] with lt.open_file(file_suffix='sta') as w_f: print >> w_f, '{0:<20}{1}'.format('neighbors num', 'pdb_num') for i, num in enumerate(sta): print >> w_f, '{0:<20}{1}'.format(i + 1, num) print >> w_f, '{0:<20}{1}'.format('total', sum(sta))
def write_tem_pros(self): with lt.open_file(file_suffix='tem_pros') as w_f: for shape, patch, pros in self.tem_pros: print >> w_f, '{0:<10}{1:<20}{2:<10}{3:<}'.format( len(pros), shape, patch, ','.join(pros))
#!/usr/bin/env python # -*- coding: utf-8 -*- """ usage: python get_hotspot.py *.wdsp output hotspots in following format pro xxx xxx xxx xxx xxx xxx """ import os import sys import lt from wdsp import Wdsp with open(sys.argv[-1]) as wdsp_f: w = Wdsp(wdsp_f) with lt.open_file(file_suffix='hotspots') as w_f: for pro, hots in w.hotspots.iteritems(): print >> w_f, '{0:<25}{1:<}'.format(pro, ' '.join(hots))
def main(): phos_neighbors = {} good_pdb = [] bad_pdb = [] for pdb_f in lt.files_in_dir(sys.argv[-1]): f_path, f_name = os.path.split(pdb_f) f_id, f_ex = os.path.splitext(f_name) neighbors = pdb_neighbors(pdb_f, f_id) print neighbors if len(neighbors) > 0: for k, v in neighbors.iteritems(): if len(v) > 0: phos_neighbors[f_id] = neighbors good_pdb.append(f_id) else: bad_pdb.append(f_id) lt.write_list(good_pdb, 'good_pdb') lt.write_list(bad_pdb, 'bad_pdb') # delete res_id and clear empty phos_neighbors_v1 = {} for k, v in phos_neighbors.iteritems(): phos_neighbors_v1[k] = {} for vk, vv in v.iteritems(): phos_neighbors_v1[k][vk] = [] vvn = [] if len(vv) > 0: for vi in vv: vvn.append(vi.split('_')[0]) vvn = sorted(vvn) phos_neighbors_v1[k][vk] = vvn # delete non-standard residues stand_res = [ 'VAL', 'ILE', 'LEU', 'GLU', 'GLN', 'ASP', 'ASN', 'HIS', 'TRP', 'PHE', 'TYR', 'ARG', 'LYS', 'SER', 'THR', 'MET', 'ALA', 'GLY', 'PRO', 'CYS' ] phos_neighbors_v2 = {} for k, v in phos_neighbors_v1.iteritems(): phos_neighbors_v2[k] = {} for vk, vv in v.iteritems(): vvn = [] for vi in vv: if vi in stand_res: vvn.append(vi) if len(vvn) > 0: vvn = sorted(vvn) phos_neighbors_v2[k][vk] = vvn # delete uncomplete patterns phos_neighbors_v3 = {} for k, v in phos_neighbors_v2.iteritems(): phos_neighbors_v3[k] = {} for vk, vv in v.iteritems(): if len(vv) >= 3: phos_neighbors_v3[k][vk] = vv def write_phosneighbors(phos_neighbors, ofile): keys = phos_neighbors.keys() keys = sorted(keys) for k in keys: print >> ofile, k print >> ofile, '*' * 80 for k in keys: v = phos_neighbors[k] print >> ofile, k for vk, vv in v.iteritems(): print >> ofile, ' ', vk, '\t', ' '.join(vv) print >> ofile, '*' * 80 phos_sta = {} for k, v in phos_neighbors.iteritems(): for vk, vv in v.iteritems(): if not tuple(vv) in phos_sta.keys(): phos_sta[tuple(vv)] = [k] else: phos_sta[tuple(vv)].append(k) phos_sta_list = [] for k, v in phos_sta.iteritems(): phos_sta_list.append((len(v), k, v)) phos_sta_list = sorted(phos_sta_list, reverse=True) for len_v, k, v in phos_sta_list: print >> ofile, '{0:150}{1:<8}{2}'.format(k, len_v, ','.join(v)) ofile = lt.open_file('phos_neighbors_original') write_phosneighbors(phos_neighbors, ofile) ofile = lt.open_file('phos_neighbors_sorted') write_phosneighbors(phos_neighbors_v1, ofile) ofile = lt.open_file('phos_neighbors_clear') write_phosneighbors(phos_neighbors_v2, ofile) ofile = lt.open_file('phos_neighbors_clean') write_phosneighbors(phos_neighbors_v3, ofile)
def write_results(self): with lt.open_file(file_suffix='sim_patch_num') as w_f: for shape,patch_pros in self.shape_patch_pros.iteritems(): for patch,pros in patch_pros.iteritems(): print >> w_f,'{0:<10}{1:<10}{2:<10}'.format(shape,patch,len(pros))
def main(): null = open(os.devnull, 'w') # sys.stderr = null """ write original center:neighbors write pdb_ids write neighbors:pdbs """ def write_result(res_neighbors, file_suffix): pdbs = list(set([pdb_id for pdb_id, _, _ in res_neighbors])) with lt.open_file(file_name='pdb_id', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: print >> w_f, 'num:', '\t', len(pdbs) for pdb in pdbs: print >> w_f, pdb res_neighbors = sorted(res_neighbors) with lt.open_file(file_name='res_neighbors', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: for pdb_id, res, neighbors in res_neighbors: print >> w_f, '{0:<20}{1:<20}{2}'.format( pdb_id, res, ' '.join(neighbors)) HASH = { 'VAL': 'V', 'ILE': 'I', 'LEU': 'L', 'GLU': 'E', 'GLN': 'Q', 'ASP': 'D', 'ASN': 'N', 'HIS': 'H', 'TRP': 'W', 'PHE': 'F', 'TYR': 'Y', 'ARG': 'R', 'LYS': 'K', 'SER': 'S', 'THR': 'T', 'MET': 'M', 'ALA': 'A', 'GLY': 'G', 'PRO': 'P', 'CYS': 'C' } neighbor_pdbs = {} for pdb_id, res, neighbors in res_neighbors: neighbors = [n.split('_')[0] for n in neighbors] neighbors = [HASH[n] for n in neighbors] neighbors = ''.join(sorted(neighbors)) if not neighbors in neighbor_pdbs.keys(): neighbor_pdbs[neighbors] = [pdb_id] else: if not pdb_id in neighbor_pdbs[neighbors]: neighbor_pdbs[neighbors].append(pdb_id) neighbor_pdbs = [(len(pdbs), neighbors, pdbs) for neighbors, pdbs in neighbor_pdbs.iteritems()] neighbor_pdbs = sorted(neighbor_pdbs, reverse=True) with lt.open_file(file_name='neighbor_pdbs', inner_dir=file_suffix, file_suffix=file_suffix) as w_f: for _, neighbors, pdbs in neighbor_pdbs: pdbs = [pdb for pdb in pdbs] print >> w_f, '{0:<10}{1}'.format(neighbors, ' '.join(pdbs)) res_neighbors = [] for pdb_f in lt.files_in_dir(sys.argv[-1]): f_path, f_name = os.path.split(pdb_f) f_name, f_exten = os.path.splitext(f_name) pdb_id = f_name res_neighbors.extend(pdb_neighbors(pdb_f, pdb_id)) #write original pdbs = set([ pdb_id for pdb_id, _, neighbors in res_neighbors if len(neighbors) > 0 ]) with lt.open_file(file_suffix='original_pdbs') as w_f: for p in pdbs: print >> w_f, p with lt.open_file(file_suffix='orignal') as w_f: for pdb_id, res, neighbors in res_neighbors: if len(neighbors) > 0: print >> w_f, '{0:<20}{1:<20}{2}'.format( pdb_id, res, ' '.join(neighbors)) with lt.open_file(file_suffix='orignal_simple') as w_f: for pdb_id, res, neighbors in res_neighbors: if len(neighbors) > 0: HASH = { 'VAL': 'V', 'ILE': 'I', 'LEU': 'L', 'GLU': 'E', 'GLN': 'Q', 'ASP': 'D', 'ASN': 'N', 'HIS': 'H', 'TRP': 'W', 'PHE': 'F', 'TYR': 'Y', 'ARG': 'R', 'LYS': 'K', 'SER': 'S', 'THR': 'T', 'MET': 'M', 'ALA': 'A', 'GLY': 'G', 'PRO': 'P', 'CYS': 'C' } neighbors = [n.split('_')[0] for n in neighbors] neighbors = [HASH.get(n, '') for n in neighbors] neighbors = ''.join(sorted(neighbors)) print >> w_f, '{0:<20}{1:<20}{2}'.format( pdb_id, res, ''.join(neighbors)) # res_neighbors format: [(pdb_id,residue,[neighbor_resiudes])] # delete non-standard residues STAND_RES = [ 'VAL', 'ILE', 'LEU', 'GLU', 'GLN', 'ASP', 'ASN', 'HIS', 'TRP', 'PHE', 'TYR', 'ARG', 'LYS', 'SER', 'THR', 'MET', 'ALA', 'GLY', 'PRO', 'CYS' ] res_neighbors = [(pdb_id, res, [n for n in neighbors if n.split('_')[0] in STAND_RES]) for pdb_id, res, neighbors in res_neighbors] res_neighbors = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) > 0] # bad_pdbs = set([pdb_id for pdb_id,res,neighbors in res_neighbors for n in neighbors if not n.split('_')[0] in STAND_RES]) # res_neighbors = [(pdb_id,res,neighbors) for pdb_id,res,neighbors in res_neighbors if not pdb_id in bad_pdbs] write_result(res_neighbors, file_suffix='filter_non_standard') # # delete water # res_neighbors = [(pdb_id, res, [n for n in neighbors if n.split( # '_')[-1] != 'HOH']) for pdb_id, res, neighbors in res_neighbors] # res_neighbors = [(pdb_id, res, neighbors) for pdb_id, # res, neighbors in res_neighbors if len(neighbors) > 0] # write_result(res_neighbors, file_suffix='delete_water') # delete same_chain res_neighbors = [ (pdb_id, res, [n for n in neighbors if n.split('_')[-1] != res.split('_')[-1]]) for pdb_id, res, neighbors in res_neighbors ] res_neighbors = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) > 0] write_result(res_neighbors, file_suffix='filter_same_chain') #delete same_chain # bad_pdbs = set([pdb_id for pdb_id,res,neighbors in res_neighbors for n in neighbors if n.split('_')[-1] == res.split('_')[-1]]) # res_neighbors = [(pdb_id,res,neighbors) for pdb_id,res,neighbors in res_neighbors if not pdb_id in bad_pdbs] # write_result(res_neighbors, file_suffix='delete_same_chain') #neighbors = 1 res_neighbors1 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 1] write_result(res_neighbors1, file_suffix='1_neighbor') #neighbors = 2 res_neighbors2 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 2] write_result(res_neighbors2, file_suffix='2_neighbor') #neighbors = 3 res_neighbors3 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 3] write_result(res_neighbors3, file_suffix='3_neighbor') #neighbors = 4 res_neighbors4 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 4] write_result(res_neighbors4, file_suffix='4_neighbor') #neighbors = 5 res_neighbors5 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 5] write_result(res_neighbors5, file_suffix='5_neighbor') #neighbors = 6 res_neighbors6 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) == 6] write_result(res_neighbors6, file_suffix='6_neighbor') #neighbors > 6 res_neighbors7 = [(pdb_id, res, neighbors) for pdb_id, res, neighbors in res_neighbors if len(neighbors) > 6] write_result(res_neighbors7, file_suffix='7_neighbor') num1 = len(set([pdb_id for pdb_id, _, _ in res_neighbors1])) num2 = len(set([pdb_id for pdb_id, _, _ in res_neighbors2])) num3 = len(set([pdb_id for pdb_id, _, _ in res_neighbors3])) num4 = len(set([pdb_id for pdb_id, _, _ in res_neighbors4])) num5 = len(set([pdb_id for pdb_id, _, _ in res_neighbors5])) num6 = len(set([pdb_id for pdb_id, _, _ in res_neighbors6])) num7 = len(set([pdb_id for pdb_id, _, _ in res_neighbors7])) sta = [num1, num2, num3, num4, num5, num6, num7] with lt.open_file(file_suffix='sta') as w_f: print >> w_f, '{0:<20}{1}'.format('neighbors num', 'pdb_num') for i, num in enumerate(sta): print >> w_f, '{0:<20}{1}'.format(i + 1, num) print >> w_f, '{0:<20}{1}'.format('total', sum(sta))
def write_results(shape_patch_pros,pros_l,seqs_d,wdsps_d,hotspots_d,cutoff=1): sta_shape = [] for shape,patch_pros in shape_patch_pros.iteritems(): pros = set([pro for patch,pros in patch_pros.iteritems() for pro in pros]) sta_shape.append((shape,len(pros),[(patch,pros) for patch,pros in patch_pros.iteritems()])) sta_shape = sorted(sta_shape,key=operator.itemgetter(1),reverse=True) with lt.open_file(file_suffix='merged_shape') as w_f: for shape,num,detail in sta_shape: print >> w_f,'{0:<20}{1:<10}{2:<}'.format(shape,num,detail) pros = ' '.join([' '.join([ki for k in v.values() for ki in k]) for k, v in shape_patch_pros.iteritems()]).split() pros = set(pros) with lt.open_file(file_suffix='pros_id') as f: for pro in pros: print >> f, pro sta = [(len(pros), shape, patch, pros) for shape, patch_pros in shape_patch_pros.iteritems() for patch, pros in patch_pros.iteritems() if len(pros) >= cutoff] sta = sorted(sta, reverse=True) with lt.open_file(file_suffix='sta') as f: for len_pros, shape, patch, pros in sta: human_num = len([p for p in pros if 'HUMAN' in p]) print >> f, '{0:<10}{1:<20}{2:<10}{3:<10}{4:<}'.format( len_pros, shape, patch,human_num,','.join(pros)) for len_pros, shape, patch, pros in sta: dir_name = str(len_pros) + '_' + shape + '_' + patch with lt.open_file(file_name=dir_name,file_suffix='hotspot', inner_dir=dir_name) as f: pro_hotspots = sorted( [(len(hotspots_d[pro]), pro, hotspots_d[pro]) for pro in pros]) for _, pro, hotspot in pro_hotspots: print >> f, '{0:<20}{1}'.format( pro, ' '.join(hotspot)) with lt.open_file(file_name=dir_name,file_extension='.wdsp', inner_dir=dir_name) as f: for pro in pros: for w in wdsps_d[pro]: print >> f, w with lt.open_file(file_name=dir_name,file_suffix='pros_id', inner_dir=dir_name) as f: for pro in pros: print >> f, pro with lt.open_file(file_name=dir_name,file_suffix='seqs',file_extension='.fa', inner_dir=dir_name) as f: for pro in pros: print >> f, '>{0}'.format(pro) seq = seqs_d[pro] for s in [seq[i:i+80] for i in range(0,len(seq),80)]: print >> f, s sta_dic = {} for len_pros,shape,patch,pros in sta: patch_m = ''.join(sorted(patch)) if not patch_m in sta_dic.keys(): sta_dic[patch_m] = [(len_pros,shape,patch)] else: sta_dic[patch_m].append((len_pros,shape,patch)) sta_lis = [(k,sum([vi[0] for vi in v]),v) for k,v in sta_dic.iteritems()] sta_lis = sorted(sta_lis,key=operator.itemgetter(1),reverse=True) with lt.open_file(file_suffix='merged_patch_sta') as f: for patch, num, detail in sta_lis: # detail = ' '.join(detail) print >> f, '{0:<10}{1:<10}{2:<10}{3:<20}{4:<}'.format( patch, num, detail[0][0],detail[0][1],detail[0][2]) for d in detail[1:]: print >> f, '{0:<20}{1:<10}{2:<20}{3:<}'.format('',d[0],d[1],d[2]) with lt.open_file(file_suffix='merged_patch_sta_simple') as f: for patch, num, detail in sta_lis: # detail = ' '.join(detail) print >> f, '{0:<10}{1:<10}'.format( patch, num)