Example #1
0
def compute_doublet_params(ids, params_type):
    dp = __import__("doublet-params")

    dd = DoubletsDict("gc-data/", reduced_atoms=R_ATOMS)
    dd.load_pdb_files(ids, verbose=True)
    res = {}
    
    widgets = ['compute doublet params',' ', Percentage(), ' ', Bar(), ' ', ETA()]
    pbar = ProgressBar(widgets=widgets, maxval=max(1,len(ids))).start()
    
    for i,d_id in enumerate(ids):
        d = dd.get(d_id)
        if d is None or d[0] is None or d[1] is None:
            print "INVALID doublet! %s" % d_id
            continue
        n_type = dd.get_n_type(d_id).upper()
        if not re.match('^[ACGU]{2}$',n_type):
            print "INVALID doublet! %s, wrong n_type: %s" % (d_id,n_type)
            continue
        d_norm = normalize_points(d, n_type[0])
        d = doublet_params_dict(d, n_type, params_type)
        if d is None:
            print "INVALID doublet! %s" % (d_id)
            continue
        res[d_id] = d
        pbar.update(i)
    pbar.finish()
    return res
    def classify_prg_result_details_fp(self, short_id, sc, prg_res, exp_res,
                                       other_results):
        other_prgs = [x for x in self.PRG_BY_CAT[sc] if x != 'CL']

        d_id = self.pdb_id.upper() + ":" + short_id
        if sc == 'base-ribose':
            p_br = doublet_params_dict(self.dd.get(d_id),
                                       self.dd.get_n_type(d_id), 'base-ribose')
        if sc in ['bp-classic', 'bp-non-classic']:
            p_bp = doublet_params_dict(self.dd.get(d_id),
                                       self.dd.get_n_type(d_id), 'bp')

        sc2 = sc.replace("bp-classic", "bp").replace("bp-non-classic", "bp")

        exp_res2 = exp_res
        if exp_res2 == "":
            oo = [x for x in other_results if x != ""]
            if len(oo) == 1:
                exp_res2 = oo[0]

        if sc == 'base-ribose' and p_br['ph_h'] >= 4.5:
            return ("new-base-ribose", "ph_h=%.4f" % p_br['ph_h'])
        elif sc2 == 'bp' and prg_res != "" and exp_res2 != "" and prg_res[
                0:2] == exp_res2[0:2] and expected_strand_orient(
                    exp_res2) != p_bp['strand_orient']:
            extra = "strand orientation: %s, expected orientation for %s: %s" % (
                self._strand_orientation_name(p_bp['strand_orient']), exp_res2,
                self._strand_orientation_name(
                    expected_strand_orient(exp_res2)))
            return ("cis-vs-trans", extra)
        elif sc2 == 'bp' and prg_res == 'SH_cis' and exp_res2 == 'WH_cis':
            extra = ""
            return ("wh_cis-vs-sh_cis", extra)
        else:
            if prg_res in other_results:
                extra = "consistent with: " + [
                    p
                    for p, o in zip(other_prgs, other_results) if prg_res == o
                ][0]
                return ("consistent-with-single-cl", extra)
            elif all([x == "" for x in other_results]):
                return ("not-recognized-by-others-cl", "")
            else:
                return ("others", "")
    def test_doublets_class(self):
        from utils import PDBObject,DoubletsDict,GraphTool,bench_start,bench_stop
        from distances import doublet_params_dict, Doublet, Residue, residue_conformation

        eps = 0.001

        for pdb_id in ['3fo6','2zjp']:
            dd = DoubletsDict(reduced_atoms=['*'])
            dd.load_pdb(pdb_id)
            gr = GraphTool(PDBObject.pdb_fn(pdb_id,"close_doublets"),edge_type="dist")
            
            old_params = {}
            bench_start("old params computations")
            for d_id in gr.get_ids():
                full_d_id = pdb_id+":"+d_id
                n_type = dd.get_n_type(full_d_id)
                (p1,p2) = dd.get(full_d_id)
                for type in ['bp','stacking','base-phosphate','base-ribose']:
                    params = doublet_params_dict((p1,p2), n_type, type)
                    old_params["%s-%s"%(d_id,type)] = params
            bench_stop("old params computations")
            
            residues = {}
            bench_start("new params computations")
            for d_id in gr.get_ids():
                full_d_id = pdb_id+":"+d_id
                n_type = dd.get_n_type(full_d_id)
                # print "processing doublet: %s (%s)" % (full_d_id,n_type)
                (p1,p2) = dd.get(full_d_id)
                (r1,r2) = d_id.split(":")
                if not residues.has_key(r1):
                    residues[r1] = Residue(r1,n_type[0],p1)
                if not residues.has_key(r2):
                    residues[r2] = Residue(r2,n_type[1],p2)
                d = Doublet(d_id,residues[r1],residues[r2])
                for type in ['bp','stacking','base-phosphate','base-ribose']:
                    params = old_params["%s-%s"%(d_id,type)]
                    for key,expected_value in params.items():
                        if re.match("^(dist_[A-Z]|i_|ii_|oxygens)",key):
                            if type=='base-phosphate':
                                v = d.ph_info.get(key)
                            elif type=='base-ribose':
                                v = d.br_info.get(key)
                            else:
                                v = None
                        else:
                            v = getattr(d, key)
                        # print "d_id=%(d_id)s type=%(type)s key=%(key)s v=%(v)s expected=%(expected_value)s" % locals()
                        if key in ['oxygens']:
                            self.assertTrue(sorted(v)==sorted(expected_value))
                        else:
                            self.assertTrue(v>expected_value-eps and v<expected_value+eps,"%s: got: %.4f, expected: %.4f"%(key,v,expected_value))
            bench_stop("new params computations")
Example #4
0
    def compute_doublet_params(ids, params_type):
        dp = __import__("doublet-params")

        R_ATOMS = ['N1', 'N2', 'N3', 'N4', 'N6', 'N7', 'P']
        R_ATOMS += ['C2', 'C4', 'C5', 'C6', 'C8', "C1'"]
        R_ATOMS += ['O2']
        R_ATOMS += ["O2'", "O3'", "O4'"]
        R_ATOMS += ["OP1", "OP2", "O5'", "NEXT:O3'"]
        R_ATOMS += [
            'N1', 'C6', 'O6', 'C5', 'C4', 'N3', 'C2', 'N2', 'N7', 'C8', 'N9'
        ]

        dd = DoubletsDict("gc-data", reduced_atoms=R_ATOMS)
        dd.load_pdb_files(ids, verbose=True)
        res = {}

        widgets = [
            'compute doublet params', ' ',
            Percentage(), ' ',
            Bar(), ' ',
            ETA()
        ]
        pbar = ProgressBar(widgets=widgets, maxval=max(1, len(ids))).start()

        for i, d_id in enumerate(ids):
            d = dd.get(d_id)
            if d is None or d[0] is None or d[1] is None:
                print "INVALID doublet! %s" % d_id
                continue
            n_type = dd.get_n_type(d_id).upper()
            if not re.match('^[ACGU]{2}$', n_type):
                print "INVALID doublet! %s, wrong n_type: %s" % (d_id, n_type)
                continue
            d = doublet_params_dict(d, n_type, params_type)
            if d is None:
                print "INVALID doublet! %s" % (d_id)
                continue
            res[d_id] = d
            pbar.update(i)
        pbar.finish()
        return res
 def test_bp_params(self):
     from utils import DoubletsDict
     from distances import doublet_params_dict, Doublet, Residue
     
     TEST_DATA = [
         (
             "3FO6:A39:A57",
             {
                 'stack_orient': 3, 
                 'dist_z': 0.22861798604329428, 
                 'o_ang': 69.303115470181339, 
                 'dist': 5.5976623073175231, 
                 'stack_overlap': 0, 
                 'conf': 0, 
                 'n12cc_ang': 87.910385850994587, 
                 'n1cc_ang': 92.089614149005413, 
                 'strand_orient_norm': -1, 
                 'stack_norm': 0.97095424, 
                 'nn_ang_norm': 13.698485786637832, 
                 'nn_ang': 166.30151421336217, 
                 'min_dist': 1.7859186, 
                 'strand_orient': -1, 
                 'orient': 1, 
                 'n2cc_ang': 88.640047368908554
             }
         ),
         (
             "2ZJP:X303:X77",
             {
                 'stack_orient': 3, 
                 'strand_orient': -1, 
                 'dist': 6.3820344572213887, 
                 'stack_min_dist': 1.4906554379247356, 
                 'n12cc_ang': 79.337562855790168, 
                 'stack_norm': 0.93983656, 
                 'strand_orient_norm': -1, 
                 'conf': 0, 
                 'n2_z': -0.93983656, 
                 'orient': 1, 
                 'n2cc_ang': 96.415019366401566, 
                 'dist_z': 1.1562830607096353, 
                 'o_ang': 50.822836557417922, 
                 'stack_overlap': 0, 
                 'rot_ang': -56.801295513103156, 
                 'n1cc_ang': 100.66243714420983, 
                 'min_dist': 1.4906554379247356, 
                 'nn_ang_norm': 20.020604505050017, 
                 'nn_ang': 159.97939549494998
             }
         )
     ]
     eps = 0.001
     
     dd = DoubletsDict(reduced_atoms=['*'])
     
     for d_id, expected_params in TEST_DATA:
         n_type = dd.get_n_type(d_id)
         points = dd.get(d_id)
         self.assertEqual(len(points),2)
         params = doublet_params_dict(points, n_type, 'stacking')
         self.assertTrue(params is not None)
 
         for key,expected_value in expected_params.items():
             self.assertTrue(params.has_key(key))
             self.assertTrue(params[key]>expected_value-eps) 
             self.assertTrue(params[key]<expected_value+eps) 
         
         # test nowych metod
         r1 = Residue("A1",n_type[0],points[0])
         r2 = Residue("B1",n_type[1],points[1])
         d = Doublet(d_id,r1,r2)
         for key,expected_value in expected_params.items():
             if key in ['dist','min_dist','nn_ang','nn_ang_norm','n1cc_ang','n2cc_ang',
                 'n12cc_ang','o_ang','orient','stack_orient','stack_norm','strand_orient',
                 'strand_orient_norm','conf','dist_z','n2_z','rot_ang','stack_min_dist',
                 'stack_overlap']:
                 v = getattr(d,key)
                 # print "%s got: %.4f, expected: %.4f" % (key,v,expected_value)
                 self.assertTrue(v>expected_value-eps and v<expected_value+eps,"%s: got: %.4f, expected: %.4f"%(key,v,expected_value))
             else:
                 print "skipping: %s" % key
def main():
    (parser, options, _args) = parse_args()

    doublet_lists = []
    labels = []
    if options.doublet_id:
        doublet_lists.append([options.doublet_id])
        labels.append("d:%s" % options.doublet_id)
    elif options.gen_pdb_for:
        json = load_json(options.input_group_info)
        assert isinstance(json, dict)
        assert len(json.keys()) == 1
        v = []
        vu = []
        for group_info in json.values():
            v = group_info['all_doublets']
            vu = list(
                set([
                    x for row in group_info['neigh_unclassified']
                    for x, d in row
                ]))
        gen_pdb(v, vu, options)
    elif options.input_group_info:
        json = load_json(options.input_group_info)
        assert isinstance(json, dict)
        assert len(json.keys()) == 1
        for group_info in json.values():
            assert isinstance(group_info, dict)
            for k in ('all_doublets', 'neigh_unclassified', 'neigh_other'):
                v = group_info[k]
                if k == 'all_doublets':
                    doublet_lists.append(v)
                    labels.append('reference')
                else:
                    vv = list(set([did for row in v for (did, dist) in row]))
                    doublet_lists.append(vv)
                    labels.append(k.split("_")[1])
    elif options.input_json:
        only_keys = None
        if options.only_keys:
            only_keys = options.only_keys.split(",")

        for fn in options.input_json.split(","):
            if fn == '':
                continue
            json = load_json(fn)
            if isinstance(json, dict):
                print "DICT!"
                keys = json.keys()
                if only_keys is not None:
                    if len(only_keys) == 1:
                        regexp = re.compile('^' + only_keys[0] + '$')
                        keys = [k for k in json.keys() if regexp.match(k)]
                    else:
                        keys = only_keys
                for k in keys:
                    if not json.has_key(k):
                        continue
                    v = json[k]
                    assert all([isinstance(did, str) for did in v]) == True
                    print k
                    doublet_lists.append(v)
                    labels.append(k)
                if only_keys is not None and len(only_keys) == 1:
                    doublet_lists = [sum(doublet_lists, [])]
                    labels = [only_keys[0]]
            elif isinstance(json, list):
                print "LIST!"
                assert all([isinstance(did, str) for did in json]) == True
                doublet_lists.append(json)
                labels.append(os.path.basename(fn))
            else:
                raise Exception("Unknown format of JSON file")
    elif options.input_pdb:
        structure = load_pdb(options.input_pdb)
        residues = [r for r in structure.get_residues()]
        assert len(residues) == 2
        n_type = residues[0].resname.strip() + residues[1].resname.strip()
        print doublet_params_dict(
            (simplify_residue(residues[0]), simplify_residue(residues[1])),
            n_type, options.params_type)

    compute_params(doublet_lists, labels, options)