Beispiel #1
0
    def run(self):
        total_llh = 0.0
        for vjins in self.vjins_nts:
            assert len(vjins) >= 2  # because it has the last v and the first j
            for mobj in re.finditer(self.d_nts, vjins):
                # check special case
                v_hang_right = 0 if self.v_hang == 0 else 3 - self.v_hang
                if len(self.d_nts) == 3:
                    if (mobj.start() - v_hang_right - 1) % 3 == 0:
                        continue
                elif len(self.d_nts) == 4:
                    if (mobj.start() - v_hang_right - 1) != 1:
                        continue
                
                vdins = vjins[: mobj.start()]
                djins = vjins[mobj.end(): ]
                cdr3_nt = self.v_cdr3_nt + vjins[1:-1] + self.j_cdr3_nt
                assert lcommon.nt2aa(cdr3_nt) == self.cdr3_aa 
                event = lclone.Cdr3Clone(1, cdr3_nt, self.v, self.j, d=self.d,
                      aa=self.cdr3_aa, vdel=self.vdel, jdel=self.jdel,
                      d5del=self.d5del, d3del=self.d3del,
                      vdins=vdins, djins = djins)
                llh = ntclone_likelihood(event, self.model) 
                
                # addjust empty D llh (TEMPORARY HACK):
                if self.d_nts == '':
                    if self.d == 'TRBD1':
                        llh += log10(13)
                    elif self.d == 'TRBD2':
                        llh += log10(17)

                total_llh += 10 ** llh
        pickle.dump(total_llh, gzip.open(self.outfile, 'wb'))
Beispiel #2
0
 def run(self):
     llhs = []
     jclones = pickle.load(gzip.open(self.infile, 'rb'))
     for c in jclones:
         clonellh = rcommon.ntclone_likelihood(c, self.model)
         llhs.append(clonellh)
     sumllh = sum([10 ** llh for llh in llhs])
     pickle.dump(sumllh, gzip.open(self.outfile, 'wb')) 
Beispiel #3
0
 def run(self):
     llhs = []
     events = []
     for sam in os.listdir(self.indir):
         jfile = os.path.join(self.indir, sam, self.j)
         if os.path.exists(jfile):
             jclones = pickle.load(gzip.open(jfile, 'rb'))
             for c in jclones:
                 if not rcommon.visited_event(events, c):
                     events.append(c)
                     clonellh = rcommon.ntclone_likelihood(c, self.model)
                     llhs.append(clonellh)
     sumllh = sum([10 ** llh for llh in llhs])
     pickle.dump(sumllh, gzip.open(self.outfile, 'wb')) 
Beispiel #4
0
def aaclones_likelihood(clone2sams, model, db_dir, sam2total, group2sams,
                        outfile, ingroup, outgroup):
    f = open(outfile, 'w')
    f.write("sample\tnum_ntclones\tprob_observed\n")
    for clone, (insams, outsams) in clone2sams.iteritems():
        f.write("#%s\n" % clone)
        events = []
        event_llhs = []
        for i, sams in enumerate([insams, outsams]):
            if not sams:
                continue
            sam2ntclones = get_ntclones(clone, sams, db_dir)
            f.write("#Group_%d\n" % (i + 1))
            for sam, ntclones in sam2ntclones.iteritems():
                total = sam2total[sam]
                llhoods = []
                for ntclone in ntclones:
                    clonellhood = rcommon.ntclone_likelihood(ntclone, model)
                    #prob_observed = clonellhood + log10(total)
                    logmu = log10(total) + clonellhood
                    prob_observed = poisson.logsf(1, 10 ** logmu)  # prob. observing >=1 ntclone
                    llhoods.append(prob_observed)

                    if not rcommon.visited_event(events, ntclone):
                        events.append(ntclone)
                        event_llhs.append(clonellhood)
                        #if clonellhood != float(-inf):
                        #    event_llhs.append(clonellhood)

                llhoods_str = ",".join(["%f" % llh for llh in llhoods])
                f.write("%s\t%d\t%s\n" % (sam, len(ntclones), llhoods_str))
        
        # calc prob to observe the aa clones (sum of all nt events)
        if sum([10**llh for llh in event_llhs]) > 0:
            aa_llh = log10(sum([10**llh for llh in event_llhs]))
            avr_total = (sam2total[ingroup] + sam2total[outgroup]) / 2
            avr_logmu = aa_llh + log10(avr_total)
            avr_aa_llh = poisson.logsf(1, 10 ** avr_logmu)
            f.write("#Clone_log_likelihood: %f, %f\n" % (aa_llh, avr_aa_llh))
            
            ingroup_llh = get_group_likelihood(aa_llh, group2sams[ingroup],
                                               insams, sam2total)
            outgroup_llh = get_group_likelihood(aa_llh, group2sams[outgroup],
                                                outsams, sam2total)
            f.write("#Ingrp vs Outgrp: %f vs %f\n#\n" % (ingroup_llh, outgroup_llh))

    f.close()
Beispiel #5
0
 def run(self):
     #events = []
     #event_file = "%s_events" % self.outfile
     total_llh = 0.0
     model = pickle.load(gzip.open(self.modelfile, 'rb'))
     vd_ins_nts = pickle.load(gzip.open(self.vfile, 'rb'))
     dj_ins_nts = pickle.load(gzip.open(self.jfile, 'rb'))
     #self.logToMaster("vdel: %d; vd_ins_nts: %d" % (self.vdel, len(vd_ins_nts)))
     #self.logToMaster("jdel: %d; dj_ins_nts: %d" % (self.jdel, len(dj_ins_nts)))
             
     for vd_ins in vd_ins_nts:
         for dj_ins in dj_ins_nts:
             cdr3_nt = (self.v_cdr3_nt + vd_ins[1: ] + self.d_cdr3_nt +
                        dj_ins[: -1] + self.j_cdr3_nt)
             assert lcommon.nt2aa(cdr3_nt) == self.cdr3_aa
             event = lclone.Cdr3Clone(1, cdr3_nt, self.v, self.j, d=self.d,
                   aa=self.cdr3_aa, vdel=self.vdel, jdel=self.jdel,
                   d5del=self.d5del, d3del=self.d3del,
                   vdins=vd_ins, djins = dj_ins)
             #events.append(event)
             llh = ntclone_likelihood(event, model) 
             total_llh += 10 ** llh
     pickle.dump(total_llh, gzip.open(self.outfile, 'wb'))