Exemple #1
0
def main():
    patch3p = read_patch(sys.argv[-2])
    patch4p = read_patch(sys.argv[-1])


    dic_4_3 = OrderedDict()
    for p4 in patch4p:
        dic_4_3[tuple(p4)] = []
        for p3 in patch3p:
            if compare_patch(p3,p4):
                dic_4_3[tuple(p4)].append(p3)

    dic_3_4 = OrderedDict()
    for p3 in patch3p:
        dic_3_4[tuple(p3)] = []
        for p4 in patch4p:
            if compare_patch(p3,p4):
                dic_3_4[tuple(p3)].append(p4)

    with lt.open_file('dic_4_3') as w_f:
        lt.print_dic(dic_4_3,nest=0,output=w_f)

    with lt.open_file('dic_3_4') as w_f:
        lt.print_dic(dic_3_4,nest=0,output=w_f)


    non_unique_3 = set([tuple(s) for ss in dic_4_3.values() for s in ss])
    unique_3 = set([tuple(p) for p in patch3p]).difference(non_unique_3)
    unique_3 = [(int(u[0]),u[1],u[2]) for u in unique_3]
    unique_3 = sorted(unique_3,reverse=True)

    with lt.open_file('unique_3') as w_f:
        for p in unique_3:
            print >> w_f, p
    def write_result(res_neighbors, file_suffix):
        pdbs = list(set([pdb_id for pdb_id,_,_ in res_neighbors]))
        with lt.open_file(file_name='pdb_id', inner_dir=file_suffix, file_suffix=file_suffix) as w_f:
            print >> w_f, 'num:', '\t', len(pdbs)
            for pdb in pdbs:
                print >> w_f, pdb, '\t',domain(pdb)

        res_neighbors = sorted(res_neighbors)
        with lt.open_file(file_name='res_neighbors', inner_dir=file_suffix, file_suffix=file_suffix) as w_f:
            for pdb_id, res, neighbors in res_neighbors:
                print >> w_f, '{0:<8}{1:<10}{2:<20}{3}'.format(
                    pdb_id, domain(pdb_id), res, '  '.join(neighbors))

        HASH = {'VAL': 'V', 'ILE': 'I', 'LEU': 'L', 'GLU': 'E', 'GLN': 'Q', 'ASP': 'D', 'ASN': 'N', 'HIS': 'H', 'TRP': 'W', 'PHE': 'F',
                'TYR': 'Y', 'ARG': 'R', 'LYS': 'K', 'SER': 'S', 'THR': 'T', 'MET': 'M', 'ALA': 'A', 'GLY': 'G', 'PRO': 'P', 'CYS': 'C'}
        neighbor_pdbs = {}
        for pdb_id, res, neighbors in res_neighbors:
            neighbors = [n.split('_')[0] for n in neighbors]
            neighbors = [HASH[n] for n in neighbors]
            neighbors = ''.join(sorted(neighbors))
            if not neighbors in neighbor_pdbs.keys():
                neighbor_pdbs[neighbors] = [pdb_id]
            else:
                if not pdb_id in neighbor_pdbs[neighbors]:
                    neighbor_pdbs[neighbors].append(pdb_id)
        neighbor_pdbs = [(len(pdbs), neighbors, pdbs)
                         for neighbors, pdbs in neighbor_pdbs.iteritems()]
        neighbor_pdbs = sorted(neighbor_pdbs, reverse=True)
        with lt.open_file(file_name='neighbor_pdbs', inner_dir=file_suffix, file_suffix=file_suffix) as w_f:
            for _, neighbors, pdbs in neighbor_pdbs:
                pdbs = [pdb+'_'+domain(pdb) for pdb in pdbs if domain(pdb)]
                print >> w_f, '{0:<10}{1}'.format(neighbors, ' '.join(pdbs))
def write_result(res_neighbors,filename):
    pro_ids = set([pro for pro,_,_ in res_neighbors])
    with lt.open_file(file_name='hbplus_salt_combine_'+filename+'_pdb') as w_f:
        for p in pro_ids:
            print >> w_f,p

    with lt.open_file(file_name='hbplus_salt_combine_'+filename) as w_f:
        for pro, phos_res, neighbor_res in res_neighbors:
            print >> w_f, '{0:<8}{1:<15}{2}'.format(
                pro, phos_res, ',   '.join(neighbor_res))
Exemple #4
0
def write_initial(hbs):
    with lt.open_file(file_name='hbplus_initial') as w_f:
        for pro, phos_res, neighbor_res, _ in hbs:
            print >> w_f, '{0:<8}{1:<15}{2}'.format(pro, phos_res,
                                                    ',   '.join(neighbor_res))
    with lt.open_file(file_name='hbplus_initial_proof') as w_f:
        for pro, phos_res, neighbor_res, lines in hbs:
            print >> w_f, '-' * 80
            print >> w_f, '{0:<8}{1:<15}{2}'.format(pro, phos_res,
                                                    ',   '.join(neighbor_res))
            print >> w_f, '-' * 80
            for line in lines:
                print >> w_f, line
def write_result(res_neighbors, file_suffix):
    pdbs = list(set([pdb_id for pdb_id, _, _ in res_neighbors]))
    with lt.open_file(file_name='pdb_id',
                      inner_dir=file_suffix,
                      file_suffix=file_suffix) as w_f:
        print >> w_f, 'num:', '\t', len(pdbs)
        for pdb in pdbs:
            print >> w_f, pdb

    res_neighbors = sorted(res_neighbors)
    with lt.open_file(file_name='res_neighbors',
                      inner_dir=file_suffix,
                      file_suffix=file_suffix) as w_f:
        for pdb_id, res, neighbors in res_neighbors:
            print >> w_f, '{0:<20}{1:<20}{2}'.format(pdb_id, res,
                                                     '  '.join(neighbors))
def main():
    with lt.open_file() as write_f:
        for f in lt.files_in_dir(sys.argv[-1]):
            pdb_id = os.path.split(f)[1].split('.')[0]
            ligands = check_ligands(f)
            if len(ligands) > 0:
                print >> write_f, pdb_id, '\t', ' '.join(ligands)
def write_result(tem_all_hots,
                 tem_hots,
                 tem_wdsp,
                 all_hots,
                 all_wdsp,
                 tem_repeats_similarity,
                 all_repeats_similarity,
                 tem_all_seq_similarity,
                 cutoff=0):
    for tem_pro, all_pros in tem_all_hots.iteritems():

        f_name = tem_pro + '_similar_hotspots_' + str(cutoff)
        with lt.open_file(f_name) as w_f:
            print >> w_f, '{0:<20}{1:<15}{2:<18}{3:<15}{4:<15}{5:<15}{6:<15}{7:<}'.format(
                'protein_id', 'identity', 'seq_similarity', "hotspot_num",
                'repeat_length', "repeats_sim", 'tetrad_num', 'hotspots')
            print >> w_f, '{0:<20}{1:<15}{2:<18}{3:<15}{4:<15}{5:<15}{6:<15}{7:<}'.format(
                tem_pro, '100', '100', len(tem_hots[tem_pro]),
                tem_wdsp.repeat_num[tem_pro],
                tem_repeats_similarity[tem_pro][0],
                tem_wdsp.tetrad_num[tem_pro], ' '.join(tem_hots[tem_pro]))
            for pro, identity in all_pros:
                print >> w_f, '{0:<20}{1:<15}{2:<18}{3:<15}{4:<15}{5:<15}{6:<15}{7:<}'.format(
                    pro, identity, tem_all_seq_similarity[tem_pro][pro],
                    len(all_hots[pro]), all_wdsp.repeat_num[pro],
                    all_repeats_similarity[pro][0], all_wdsp.tetrad_num[pro],
                    ' '.join(all_hots[pro]))

        f_name = tem_pro + '_similar_hotspots_wdsp' + str(cutoff)
        with lt.open_file(f_name, file_extension='.wdsp') as w_f:
            for line in tem_wdsp.wdsps[tem_pro]:
                print >> w_f, line
            for pro, identity in all_pros:
                for line in all_wdsp.wdsps[pro]:
                    print >> w_f, line

        f_name = tem_pro + '_similar_hotspots_seq' + str(cutoff)
        with lt.open_file(f_name) as w_f:
            print >> w_f, '>', tem_pro
            seq = tem_wdsp.seqs[tem_pro]
            for s in [seq[i:i + 80] for i in range(0, len(seq), 80)]:
                print >> w_f, s
            for pro in all_pros:
                print >> w_f, '>', pro[0]
                seq = all_wdsp.seqs[pro[0]]
                for s in [seq[i:i + 80] for i in range(0, len(seq), 80)]:
                    print >> w_f, s
def main():
    res_neighbors = read_pickle()

    with lt.open_file(file_name='hbplus_salt_combine_initial') as w_f:
        for pro, phos_res, neighbor_res in res_neighbors:
            print >> w_f, '{0:<8}{1:<15}{2}'.format(pro, phos_res,
                                                    ',   '.join(neighbor_res))

    lt.pickle_dump(res_neighbors, 'hbplus_salt_combine')
def write_result(tem_all_hots, tem_hots, all_hots, cutoff=0.3):
    for tem_pro, all_pros in tem_all_hots.iteritems():
        f_name = tem_pro + '_similar_hotspots_' + str(cutoff)
        with lt.open_file(f_name) as w_f:
            print >> w_f, '{0:<20}{1:<10}{2:<}'.format(
                tem_pro, " ",' '.join(tem_hots[tem_pro]))
            for pro,identity in all_pros:
                print >> w_f, '{0:<20}{1:<10.2f}{2:<}'.format(
                    pro, identity, ' '.join(all_hots[pro]))
def write_result(tem_all_hots, tem_hots, all_hots, b, h, cutoff=1):
    for tem_pro, all_pros in tem_all_hots.iteritems():
        f_name = tem_pro + '_similar_hotspots_b' + str(b) + 'h' + str(h)
        with lt.open_file(f_name) as w_f:
            print >> w_f, '{0:<20}{1:<}'.format(
                tem_pro, ' '.join(tem_hots[tem_pro]))
            for pro in all_pros:
                print >> w_f, '{0:<20}{1:<}'.format(
                    pro, ' '.join(all_hots[pro]))
Exemple #11
0
def main():
    with open(sys.argv[-2]) as o_f:
        tem = Wdsp(o_f)
        tem_seq = tem.seqs
    with open(sys.argv[-1]) as o_f:
        all1 = Wdsp(o_f)
        all_seq = all1.seqs

    similarity = OrderedDict()
    for t_name, t_seq in tem_seq.iteritems():
        sim = []
        for a_name, a_seq in all_seq.iteritems():
            sim.append((a_name, align(t_seq, a_seq)))
        # sim = sorted(sim, key=operator.itemgetter(1),reverse=True)
        similarity[t_name] = sim

    for k, v in similarity.iteritems():
        with lt.open_file(k) as w_f:
            for a_name, a_identity in v:
                print >> w_f, '{0:<15}{1:<}'.format(a_name, a_identity)
def main():
    null = open(os.devnull, 'w')
    # sys.stderr = null
    """
    """

    res_neighbors = []
    for pdb_f in lt.files_in_dir(sys.argv[-1]):
        f_path, f_name = os.path.split(pdb_f)
        f_name, f_exten = os.path.splitext(f_name)
        if f_exten == '.pdb':
            pdb_id = f_name
            res_neighbors.extend(pdb_neighbors(pdb_f, pdb_id))

    write_result(res_neighbors, file_suffix='original')

    sta = [len(v) for k, v in res_neighbors.iteritems()]
    sta = lt.lis_sta(sta)
    with lt.open_file('phos_neighbors_num') as w_f:
        for k, v in sta.items():
            print >> '{0:<10}{1:<10}'.format(k, v)
def write_sta_result(res_neighbors,filename):

    HASH = {'VAL': 'V', 'ILE': 'I', 'LEU': 'L', 'GLU': 'E', 'GLN': 'Q', 'ASP': 'D', 'ASN': 'N', 'HIS': 'H', 'TRP': 'W', 'PHE': 'F',
            'TYR': 'Y', 'ARG': 'R', 'LYS': 'K', 'SER': 'S', 'THR': 'T', 'MET': 'M', 'ALA': 'A', 'GLY': 'G', 'PRO': 'P', 'CYS': 'C','HOH':'O'}

    neighbor_pdbs = {}
    for pdb, res, neighbors in res_neighbors:
        neighbors = [n.split('_')[0] for n in neighbors]
        neighbors = [HASH.get(n,'*') for n in neighbors]
        neighbors = ''.join(sorted(neighbors))
        if not neighbors in neighbor_pdbs.keys():
            neighbor_pdbs[neighbors] = [(pdb,res)]
        else:
            if not (pdb,res)in neighbor_pdbs[neighbors]:
                neighbor_pdbs[neighbors].append((pdb,res))
    neighbor_pdbs = [(len(neighbors),neighbors, pdbs)
                     for neighbors, pdbs in neighbor_pdbs.iteritems()]
    neighbor_pdbs = sorted(neighbor_pdbs)

    with lt.open_file(file_name='hbplus_salt_combine_'+filename+'_sta') as w_f:
        for _,neighbors,pdbs in neighbor_pdbs:
            print >> w_f,'{0:<20}{1:<}'.format(neighbors,pdbs)
Exemple #14
0
def main():
    null = open(os.devnull, 'w')
    sys.stderr = null

    dist = []
    for pdb_f in lt.files_in_dir(sys.argv[-1]):
        pdb_id = pdb_f[-8:-4]
        p_dist = pdb_dist(pdb_f, pdb_id)
        dist += p_dist

    def myround(n):
        n = np.round(n, 1)
        a, b = str(n).split('.')
        n = a + '.' + b[0]
        return float(n)

    dist = sorted(dist)
    dist = [myround(d) for d in dist]
    dist = [d for d in dist if d < 10.0]

    sta = lt.lis_sta(dist)
    with lt.open_file(file_suffix='dist_sta') as w_f:
        for num, freq in sta:
            print >> w_f, '{0:<10}{1}'.format(num, freq)
Exemple #15
0
# -*- coding: utf-8 -*-
"""
calculate statistics for WDSP output file
usage: python wdsp_sta.py wdsp_f
"""
import lt
import sys
import os

from wdsp import Wdsp

with open(sys.argv[-1]) as o_f:
    wdsp = Wdsp(o_f)

    scores_sta = lt.lis_sta(wdsp.scores.values())
    with lt.open_file(file_suffix='total_score_sta') as w_f:
        for num, freq in scores_sta:
            print >> w_f, '{0:<10}{1}'.format(num, freq)

    tetrad_sta = [
        len([vi for vi in v if vi >= 44.0])
        for k, v in wdsp.blade_scores.iteritems()
    ]
    tetrad_sta = lt.lis_sta(tetrad_sta)
    with lt.open_file(file_suffix='tetrad_num_sta') as w_f:
        for num, freq in tetrad_sta:
            print >> w_f, '{0:<5}{1}'.format(num, freq)

    blades_sta = [len(blades) for pro, blades in wdsp.blades.iteritems()]
    blades_sta = lt.lis_sta(blades_sta)
    with lt.open_file(file_suffix='blades_sta') as w_f:
Exemple #16
0
def write_result(res_neighbors, file_suffix):
    pdbs = list(set([pdb_id for pdb_id, _, _ in res_neighbors]))
    with lt.open_file(file_name='pdb_id',
                      inner_dir=file_suffix,
                      file_suffix=file_suffix) as w_f:
        print >> w_f, 'num:', '\t', len(pdbs)
        for pdb in pdbs:
            print >> w_f, pdb

    res_neighbors = sorted(res_neighbors)
    with lt.open_file(file_name='res_neighbors',
                      inner_dir=file_suffix,
                      file_suffix=file_suffix) as w_f:
        for pdb_id, res, neighbors in res_neighbors:
            print >> w_f, '{0:<20}{1:<20}{2}'.format(pdb_id, res,
                                                     '  '.join(neighbors))

    # do not distinguish between S and T
    # do not distinguish between G A V I L F P C M , because these residues all
    # use main chain to form hydrogen-bond
    HASH = {
        'VAL': 'G',
        'ILE': 'G',
        'LEU': 'G',
        'GLU': 'G',
        'GLN': 'Q',
        'ASP': 'G',
        'ASN': 'N',
        'HIS': 'H',
        'TRP': 'W',
        'PHE': 'G',
        'TYR': 'Y',
        'ARG': 'R',
        'LYS': 'K',
        'SER': 'S',
        'THR': 'T',
        'MET': 'G',
        'ALA': 'G',
        'GLY': 'G',
        'PRO': 'G',
        'CYS': 'G',
        'HOH': 'O'
    }
    neighbor_pdbs = {}
    for pdb_id, res, neighbors in res_neighbors:
        neighbors = [n.split('_')[0] for n in neighbors]
        neighbors = [HASH.get(n, '*') for n in neighbors]
        neighbors = ''.join(sorted(neighbors))
        if not neighbors in neighbor_pdbs.keys():
            neighbor_pdbs[neighbors] = [pdb_id]
        else:
            if not pdb_id in neighbor_pdbs[neighbors]:
                neighbor_pdbs[neighbors].append(pdb_id)
    neighbor_pdbs = [(len(pdbs), neighbors, pdbs)
                     for neighbors, pdbs in neighbor_pdbs.iteritems()]
    neighbor_pdbs = sorted(neighbor_pdbs, reverse=True)
    with lt.open_file(file_name='neighbor_pdbs',
                      inner_dir=file_suffix,
                      file_suffix=file_suffix) as w_f:
        for _, neighbors, pdbs in neighbor_pdbs:
            pdbs = [pdb for pdb in pdbs]
            print >> w_f, '{0:<20}{1}'.format(neighbors, ','.join(pdbs))
Exemple #17
0
def main():
    null = open(os.devnull, 'w')
    # sys.stderr = null
    """
    write original center:neighbors
    write pdb_ids
    write neighbors:pdbs
    """

    res_neighbors = []
    for pdb_f in lt.files_in_dir(sys.argv[-1]):
        f_path, f_name = os.path.split(pdb_f)
        f_name, f_exten = os.path.splitext(f_name)
        if f_exten == '.pdb':
            pdb_id = f_name
            res_neighbors.extend(pdb_neighbors(pdb_f, pdb_id))

    write_result(res_neighbors, file_suffix='original')

    #neighbors = 1
    res_neighbors1 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 1]
    write_result(res_neighbors1, file_suffix='1_neighbor')
    #neighbors = 2
    res_neighbors2 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 2]
    write_result(res_neighbors2, file_suffix='2_neighbor')
    #neighbors = 3
    res_neighbors3 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 3]
    write_result(res_neighbors3, file_suffix='3_neighbor')
    #neighbors = 4
    res_neighbors4 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 4]
    write_result(res_neighbors4, file_suffix='4_neighbor')
    #neighbors = 5
    res_neighbors5 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 5]
    write_result(res_neighbors5, file_suffix='5_neighbor')
    #neighbors = 6
    res_neighbors6 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 6]
    write_result(res_neighbors6, file_suffix='6_neighbor')
    #neighbors > 6
    res_neighbors7 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) > 6]
    write_result(res_neighbors7, file_suffix='7_neighbor')

    num1 = len(set([pdb_id for pdb_id, _, _ in res_neighbors1]))
    num2 = len(set([pdb_id for pdb_id, _, _ in res_neighbors2]))
    num3 = len(set([pdb_id for pdb_id, _, _ in res_neighbors3]))
    num4 = len(set([pdb_id for pdb_id, _, _ in res_neighbors4]))
    num5 = len(set([pdb_id for pdb_id, _, _ in res_neighbors5]))
    num6 = len(set([pdb_id for pdb_id, _, _ in res_neighbors6]))
    num7 = len(set([pdb_id for pdb_id, _, _ in res_neighbors7]))

    sta = [num1, num2, num3, num4, num5, num6, num7]
    with lt.open_file(file_suffix='sta') as w_f:
        print >> w_f, '{0:<20}{1}'.format('neighbors num', 'pdb_num')
        for i, num in enumerate(sta):
            print >> w_f, '{0:<20}{1}'.format(i + 1, num)
        print >> w_f, '{0:<20}{1}'.format('total', sum(sta))
Exemple #18
0
 def write_tem_pros(self):
     with lt.open_file(file_suffix='tem_pros') as w_f:
         for shape, patch, pros in self.tem_pros:
             print >> w_f, '{0:<10}{1:<20}{2:<10}{3:<}'.format(
                 len(pros), shape, patch, ','.join(pros))
Exemple #19
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
usage: python get_hotspot.py *.wdsp
output hotspots in following format
pro xxx xxx xxx xxx xxx xxx
"""

import os
import sys
import lt
from wdsp import Wdsp

with open(sys.argv[-1]) as wdsp_f:
    w = Wdsp(wdsp_f)
    with lt.open_file(file_suffix='hotspots') as w_f:
        for pro, hots in w.hotspots.iteritems():
            print >> w_f, '{0:<25}{1:<}'.format(pro, ' '.join(hots))
def main():
    phos_neighbors = {}
    good_pdb = []
    bad_pdb = []
    for pdb_f in lt.files_in_dir(sys.argv[-1]):
        f_path, f_name = os.path.split(pdb_f)
        f_id, f_ex = os.path.splitext(f_name)
        neighbors = pdb_neighbors(pdb_f, f_id)
        print neighbors
        if len(neighbors) > 0:
            for k, v in neighbors.iteritems():
                if len(v) > 0:
                    phos_neighbors[f_id] = neighbors
                    good_pdb.append(f_id)
                else:
                    bad_pdb.append(f_id)

    lt.write_list(good_pdb, 'good_pdb')
    lt.write_list(bad_pdb, 'bad_pdb')
    # delete res_id and clear empty
    phos_neighbors_v1 = {}
    for k, v in phos_neighbors.iteritems():
        phos_neighbors_v1[k] = {}
        for vk, vv in v.iteritems():
            phos_neighbors_v1[k][vk] = []
            vvn = []
            if len(vv) > 0:
                for vi in vv:
                    vvn.append(vi.split('_')[0])
                vvn = sorted(vvn)
                phos_neighbors_v1[k][vk] = vvn

    # delete non-standard residues
    stand_res = [
        'VAL', 'ILE', 'LEU', 'GLU', 'GLN', 'ASP', 'ASN', 'HIS', 'TRP', 'PHE',
        'TYR', 'ARG', 'LYS', 'SER', 'THR', 'MET', 'ALA', 'GLY', 'PRO', 'CYS'
    ]
    phos_neighbors_v2 = {}
    for k, v in phos_neighbors_v1.iteritems():
        phos_neighbors_v2[k] = {}
        for vk, vv in v.iteritems():
            vvn = []
            for vi in vv:
                if vi in stand_res:
                    vvn.append(vi)
            if len(vvn) > 0:
                vvn = sorted(vvn)
                phos_neighbors_v2[k][vk] = vvn

    # delete uncomplete patterns
    phos_neighbors_v3 = {}
    for k, v in phos_neighbors_v2.iteritems():
        phos_neighbors_v3[k] = {}
        for vk, vv in v.iteritems():
            if len(vv) >= 3:
                phos_neighbors_v3[k][vk] = vv

    def write_phosneighbors(phos_neighbors, ofile):
        keys = phos_neighbors.keys()
        keys = sorted(keys)
        for k in keys:
            print >> ofile, k

        print >> ofile, '*' * 80
        for k in keys:
            v = phos_neighbors[k]
            print >> ofile, k
            for vk, vv in v.iteritems():
                print >> ofile, '    ', vk, '\t', ' '.join(vv)

        print >> ofile, '*' * 80
        phos_sta = {}
        for k, v in phos_neighbors.iteritems():
            for vk, vv in v.iteritems():
                if not tuple(vv) in phos_sta.keys():
                    phos_sta[tuple(vv)] = [k]
                else:
                    phos_sta[tuple(vv)].append(k)
        phos_sta_list = []
        for k, v in phos_sta.iteritems():
            phos_sta_list.append((len(v), k, v))
        phos_sta_list = sorted(phos_sta_list, reverse=True)
        for len_v, k, v in phos_sta_list:
            print >> ofile, '{0:150}{1:<8}{2}'.format(k, len_v, ','.join(v))

    ofile = lt.open_file('phos_neighbors_original')
    write_phosneighbors(phos_neighbors, ofile)
    ofile = lt.open_file('phos_neighbors_sorted')
    write_phosneighbors(phos_neighbors_v1, ofile)
    ofile = lt.open_file('phos_neighbors_clear')
    write_phosneighbors(phos_neighbors_v2, ofile)
    ofile = lt.open_file('phos_neighbors_clean')
    write_phosneighbors(phos_neighbors_v3, ofile)
Exemple #21
0
 def write_results(self):
     with lt.open_file(file_suffix='sim_patch_num') as w_f:
         for shape,patch_pros in self.shape_patch_pros.iteritems():
             for patch,pros in patch_pros.iteritems():
                 print >> w_f,'{0:<10}{1:<10}{2:<10}'.format(shape,patch,len(pros))
def main():
    null = open(os.devnull, 'w')
    # sys.stderr = null
    """
    write original center:neighbors
    write pdb_ids
    write neighbors:pdbs
    """
    def write_result(res_neighbors, file_suffix):
        pdbs = list(set([pdb_id for pdb_id, _, _ in res_neighbors]))
        with lt.open_file(file_name='pdb_id',
                          inner_dir=file_suffix,
                          file_suffix=file_suffix) as w_f:
            print >> w_f, 'num:', '\t', len(pdbs)
            for pdb in pdbs:
                print >> w_f, pdb

        res_neighbors = sorted(res_neighbors)
        with lt.open_file(file_name='res_neighbors',
                          inner_dir=file_suffix,
                          file_suffix=file_suffix) as w_f:
            for pdb_id, res, neighbors in res_neighbors:
                print >> w_f, '{0:<20}{1:<20}{2}'.format(
                    pdb_id, res, '  '.join(neighbors))

        HASH = {
            'VAL': 'V',
            'ILE': 'I',
            'LEU': 'L',
            'GLU': 'E',
            'GLN': 'Q',
            'ASP': 'D',
            'ASN': 'N',
            'HIS': 'H',
            'TRP': 'W',
            'PHE': 'F',
            'TYR': 'Y',
            'ARG': 'R',
            'LYS': 'K',
            'SER': 'S',
            'THR': 'T',
            'MET': 'M',
            'ALA': 'A',
            'GLY': 'G',
            'PRO': 'P',
            'CYS': 'C'
        }
        neighbor_pdbs = {}
        for pdb_id, res, neighbors in res_neighbors:
            neighbors = [n.split('_')[0] for n in neighbors]
            neighbors = [HASH[n] for n in neighbors]
            neighbors = ''.join(sorted(neighbors))
            if not neighbors in neighbor_pdbs.keys():
                neighbor_pdbs[neighbors] = [pdb_id]
            else:
                if not pdb_id in neighbor_pdbs[neighbors]:
                    neighbor_pdbs[neighbors].append(pdb_id)
        neighbor_pdbs = [(len(pdbs), neighbors, pdbs)
                         for neighbors, pdbs in neighbor_pdbs.iteritems()]
        neighbor_pdbs = sorted(neighbor_pdbs, reverse=True)
        with lt.open_file(file_name='neighbor_pdbs',
                          inner_dir=file_suffix,
                          file_suffix=file_suffix) as w_f:
            for _, neighbors, pdbs in neighbor_pdbs:
                pdbs = [pdb for pdb in pdbs]
                print >> w_f, '{0:<10}{1}'.format(neighbors, ' '.join(pdbs))

    res_neighbors = []
    for pdb_f in lt.files_in_dir(sys.argv[-1]):
        f_path, f_name = os.path.split(pdb_f)
        f_name, f_exten = os.path.splitext(f_name)
        pdb_id = f_name
        res_neighbors.extend(pdb_neighbors(pdb_f, pdb_id))

    #write original
    pdbs = set([
        pdb_id for pdb_id, _, neighbors in res_neighbors if len(neighbors) > 0
    ])
    with lt.open_file(file_suffix='original_pdbs') as w_f:
        for p in pdbs:
            print >> w_f, p
    with lt.open_file(file_suffix='orignal') as w_f:
        for pdb_id, res, neighbors in res_neighbors:
            if len(neighbors) > 0:
                print >> w_f, '{0:<20}{1:<20}{2}'.format(
                    pdb_id, res, ' '.join(neighbors))
    with lt.open_file(file_suffix='orignal_simple') as w_f:
        for pdb_id, res, neighbors in res_neighbors:
            if len(neighbors) > 0:
                HASH = {
                    'VAL': 'V',
                    'ILE': 'I',
                    'LEU': 'L',
                    'GLU': 'E',
                    'GLN': 'Q',
                    'ASP': 'D',
                    'ASN': 'N',
                    'HIS': 'H',
                    'TRP': 'W',
                    'PHE': 'F',
                    'TYR': 'Y',
                    'ARG': 'R',
                    'LYS': 'K',
                    'SER': 'S',
                    'THR': 'T',
                    'MET': 'M',
                    'ALA': 'A',
                    'GLY': 'G',
                    'PRO': 'P',
                    'CYS': 'C'
                }
                neighbors = [n.split('_')[0] for n in neighbors]
                neighbors = [HASH.get(n, '') for n in neighbors]
                neighbors = ''.join(sorted(neighbors))
                print >> w_f, '{0:<20}{1:<20}{2}'.format(
                    pdb_id, res, ''.join(neighbors))
    # res_neighbors format: [(pdb_id,residue,[neighbor_resiudes])]

    # delete non-standard residues
    STAND_RES = [
        'VAL', 'ILE', 'LEU', 'GLU', 'GLN', 'ASP', 'ASN', 'HIS', 'TRP', 'PHE',
        'TYR', 'ARG', 'LYS', 'SER', 'THR', 'MET', 'ALA', 'GLY', 'PRO', 'CYS'
    ]

    res_neighbors = [(pdb_id, res,
                      [n for n in neighbors if n.split('_')[0] in STAND_RES])
                     for pdb_id, res, neighbors in res_neighbors]
    res_neighbors = [(pdb_id, res, neighbors)
                     for pdb_id, res, neighbors in res_neighbors
                     if len(neighbors) > 0]
    # bad_pdbs = set([pdb_id for pdb_id,res,neighbors in res_neighbors for n in neighbors if not n.split('_')[0] in STAND_RES])
    # res_neighbors = [(pdb_id,res,neighbors) for pdb_id,res,neighbors in res_neighbors if not pdb_id in bad_pdbs]
    write_result(res_neighbors, file_suffix='filter_non_standard')

    # # delete water
    # res_neighbors = [(pdb_id, res, [n for n in neighbors if n.split(
    # '_')[-1] != 'HOH']) for pdb_id, res, neighbors in res_neighbors]
    # res_neighbors = [(pdb_id, res, neighbors) for pdb_id,
    # res, neighbors in res_neighbors if len(neighbors) > 0]
    # write_result(res_neighbors, file_suffix='delete_water')

    # delete same_chain
    res_neighbors = [
        (pdb_id, res,
         [n for n in neighbors if n.split('_')[-1] != res.split('_')[-1]])
        for pdb_id, res, neighbors in res_neighbors
    ]
    res_neighbors = [(pdb_id, res, neighbors)
                     for pdb_id, res, neighbors in res_neighbors
                     if len(neighbors) > 0]
    write_result(res_neighbors, file_suffix='filter_same_chain')
    #delete same_chain
    # bad_pdbs = set([pdb_id for pdb_id,res,neighbors in res_neighbors for n in neighbors if n.split('_')[-1] == res.split('_')[-1]])
    # res_neighbors = [(pdb_id,res,neighbors) for pdb_id,res,neighbors in res_neighbors if not pdb_id in bad_pdbs]
    # write_result(res_neighbors, file_suffix='delete_same_chain')

    #neighbors = 1
    res_neighbors1 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 1]
    write_result(res_neighbors1, file_suffix='1_neighbor')
    #neighbors = 2
    res_neighbors2 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 2]
    write_result(res_neighbors2, file_suffix='2_neighbor')
    #neighbors = 3
    res_neighbors3 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 3]
    write_result(res_neighbors3, file_suffix='3_neighbor')
    #neighbors = 4
    res_neighbors4 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 4]
    write_result(res_neighbors4, file_suffix='4_neighbor')
    #neighbors = 5
    res_neighbors5 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 5]
    write_result(res_neighbors5, file_suffix='5_neighbor')
    #neighbors = 6
    res_neighbors6 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) == 6]
    write_result(res_neighbors6, file_suffix='6_neighbor')
    #neighbors > 6
    res_neighbors7 = [(pdb_id, res, neighbors)
                      for pdb_id, res, neighbors in res_neighbors
                      if len(neighbors) > 6]
    write_result(res_neighbors7, file_suffix='7_neighbor')

    num1 = len(set([pdb_id for pdb_id, _, _ in res_neighbors1]))
    num2 = len(set([pdb_id for pdb_id, _, _ in res_neighbors2]))
    num3 = len(set([pdb_id for pdb_id, _, _ in res_neighbors3]))
    num4 = len(set([pdb_id for pdb_id, _, _ in res_neighbors4]))
    num5 = len(set([pdb_id for pdb_id, _, _ in res_neighbors5]))
    num6 = len(set([pdb_id for pdb_id, _, _ in res_neighbors6]))
    num7 = len(set([pdb_id for pdb_id, _, _ in res_neighbors7]))

    sta = [num1, num2, num3, num4, num5, num6, num7]
    with lt.open_file(file_suffix='sta') as w_f:
        print >> w_f, '{0:<20}{1}'.format('neighbors num', 'pdb_num')
        for i, num in enumerate(sta):
            print >> w_f, '{0:<20}{1}'.format(i + 1, num)
        print >> w_f, '{0:<20}{1}'.format('total', sum(sta))
Exemple #23
0
def write_results(shape_patch_pros,pros_l,seqs_d,wdsps_d,hotspots_d,cutoff=1):

    sta_shape = []
    for shape,patch_pros in shape_patch_pros.iteritems():
        pros = set([pro for patch,pros in patch_pros.iteritems() for pro in pros])
        sta_shape.append((shape,len(pros),[(patch,pros) for patch,pros in patch_pros.iteritems()]))
    sta_shape = sorted(sta_shape,key=operator.itemgetter(1),reverse=True)
    with lt.open_file(file_suffix='merged_shape') as w_f:
        for shape,num,detail in sta_shape:
            print >> w_f,'{0:<20}{1:<10}{2:<}'.format(shape,num,detail)


    pros = ' '.join([' '.join([ki for k in v.values() for ki in k])
                     for k, v in shape_patch_pros.iteritems()]).split()
    pros = set(pros)
    with lt.open_file(file_suffix='pros_id') as f:
        for pro in pros:
            print >> f, pro

    sta = [(len(pros), shape, patch, pros) for shape, patch_pros in shape_patch_pros.iteritems()
           for patch, pros in patch_pros.iteritems() if len(pros) >= cutoff]
    sta = sorted(sta, reverse=True)
    with lt.open_file(file_suffix='sta') as f:
        for len_pros, shape, patch, pros in sta:
            human_num = len([p for p in pros if 'HUMAN' in p])
            print >> f, '{0:<10}{1:<20}{2:<10}{3:<10}{4:<}'.format(
                len_pros, shape, patch,human_num,','.join(pros))

    for len_pros, shape, patch, pros in sta:
        dir_name = str(len_pros) + '_' + shape + '_' + patch

        with lt.open_file(file_name=dir_name,file_suffix='hotspot', inner_dir=dir_name) as f:
            pro_hotspots = sorted(
                [(len(hotspots_d[pro]), pro, hotspots_d[pro]) for pro in pros])
            for _, pro, hotspot in pro_hotspots:
                print >> f, '{0:<20}{1}'.format(
                    pro, ' '.join(hotspot))

        with lt.open_file(file_name=dir_name,file_extension='.wdsp', inner_dir=dir_name) as f:
            for pro in pros:
                for w in wdsps_d[pro]:
                    print >> f, w

        with lt.open_file(file_name=dir_name,file_suffix='pros_id', inner_dir=dir_name) as f:
            for pro in pros:
                print >> f, pro

        with lt.open_file(file_name=dir_name,file_suffix='seqs',file_extension='.fa', inner_dir=dir_name) as f:
            for pro in pros:
                print >> f, '>{0}'.format(pro)
                seq = seqs_d[pro]
                for s in [seq[i:i+80] for i in range(0,len(seq),80)]:
                    print >> f, s

    sta_dic = {}
    for len_pros,shape,patch,pros in sta:
        patch_m = ''.join(sorted(patch))
        if not patch_m in sta_dic.keys():
            sta_dic[patch_m] = [(len_pros,shape,patch)]
        else:
            sta_dic[patch_m].append((len_pros,shape,patch))
    sta_lis = [(k,sum([vi[0] for vi in v]),v) for k,v in sta_dic.iteritems()]
    sta_lis = sorted(sta_lis,key=operator.itemgetter(1),reverse=True)
    with lt.open_file(file_suffix='merged_patch_sta') as f:
        for patch, num, detail in sta_lis:
            # detail = ' '.join(detail)
            print >> f, '{0:<10}{1:<10}{2:<10}{3:<20}{4:<}'.format(
                 patch, num, detail[0][0],detail[0][1],detail[0][2])
            for d in detail[1:]:
                print >> f, '{0:<20}{1:<10}{2:<20}{3:<}'.format('',d[0],d[1],d[2])
    with lt.open_file(file_suffix='merged_patch_sta_simple') as f:
        for patch, num, detail in sta_lis:
            # detail = ' '.join(detail)
            print >> f, '{0:<10}{1:<10}'.format(
                 patch, num)