def run(self): total_llh = 0.0 for vjins in self.vjins_nts: assert len(vjins) >= 2 # because it has the last v and the first j for mobj in re.finditer(self.d_nts, vjins): # check special case v_hang_right = 0 if self.v_hang == 0 else 3 - self.v_hang if len(self.d_nts) == 3: if (mobj.start() - v_hang_right - 1) % 3 == 0: continue elif len(self.d_nts) == 4: if (mobj.start() - v_hang_right - 1) != 1: continue vdins = vjins[: mobj.start()] djins = vjins[mobj.end(): ] cdr3_nt = self.v_cdr3_nt + vjins[1:-1] + self.j_cdr3_nt assert lcommon.nt2aa(cdr3_nt) == self.cdr3_aa event = lclone.Cdr3Clone(1, cdr3_nt, self.v, self.j, d=self.d, aa=self.cdr3_aa, vdel=self.vdel, jdel=self.jdel, d5del=self.d5del, d3del=self.d3del, vdins=vdins, djins = djins) llh = ntclone_likelihood(event, self.model) # addjust empty D llh (TEMPORARY HACK): if self.d_nts == '': if self.d == 'TRBD1': llh += log10(13) elif self.d == 'TRBD2': llh += log10(17) total_llh += 10 ** llh pickle.dump(total_llh, gzip.open(self.outfile, 'wb'))
def run(self): llhs = [] jclones = pickle.load(gzip.open(self.infile, 'rb')) for c in jclones: clonellh = rcommon.ntclone_likelihood(c, self.model) llhs.append(clonellh) sumllh = sum([10 ** llh for llh in llhs]) pickle.dump(sumllh, gzip.open(self.outfile, 'wb'))
def run(self): llhs = [] events = [] for sam in os.listdir(self.indir): jfile = os.path.join(self.indir, sam, self.j) if os.path.exists(jfile): jclones = pickle.load(gzip.open(jfile, 'rb')) for c in jclones: if not rcommon.visited_event(events, c): events.append(c) clonellh = rcommon.ntclone_likelihood(c, self.model) llhs.append(clonellh) sumllh = sum([10 ** llh for llh in llhs]) pickle.dump(sumllh, gzip.open(self.outfile, 'wb'))
def aaclones_likelihood(clone2sams, model, db_dir, sam2total, group2sams, outfile, ingroup, outgroup): f = open(outfile, 'w') f.write("sample\tnum_ntclones\tprob_observed\n") for clone, (insams, outsams) in clone2sams.iteritems(): f.write("#%s\n" % clone) events = [] event_llhs = [] for i, sams in enumerate([insams, outsams]): if not sams: continue sam2ntclones = get_ntclones(clone, sams, db_dir) f.write("#Group_%d\n" % (i + 1)) for sam, ntclones in sam2ntclones.iteritems(): total = sam2total[sam] llhoods = [] for ntclone in ntclones: clonellhood = rcommon.ntclone_likelihood(ntclone, model) #prob_observed = clonellhood + log10(total) logmu = log10(total) + clonellhood prob_observed = poisson.logsf(1, 10 ** logmu) # prob. observing >=1 ntclone llhoods.append(prob_observed) if not rcommon.visited_event(events, ntclone): events.append(ntclone) event_llhs.append(clonellhood) #if clonellhood != float(-inf): # event_llhs.append(clonellhood) llhoods_str = ",".join(["%f" % llh for llh in llhoods]) f.write("%s\t%d\t%s\n" % (sam, len(ntclones), llhoods_str)) # calc prob to observe the aa clones (sum of all nt events) if sum([10**llh for llh in event_llhs]) > 0: aa_llh = log10(sum([10**llh for llh in event_llhs])) avr_total = (sam2total[ingroup] + sam2total[outgroup]) / 2 avr_logmu = aa_llh + log10(avr_total) avr_aa_llh = poisson.logsf(1, 10 ** avr_logmu) f.write("#Clone_log_likelihood: %f, %f\n" % (aa_llh, avr_aa_llh)) ingroup_llh = get_group_likelihood(aa_llh, group2sams[ingroup], insams, sam2total) outgroup_llh = get_group_likelihood(aa_llh, group2sams[outgroup], outsams, sam2total) f.write("#Ingrp vs Outgrp: %f vs %f\n#\n" % (ingroup_llh, outgroup_llh)) f.close()
def run(self): #events = [] #event_file = "%s_events" % self.outfile total_llh = 0.0 model = pickle.load(gzip.open(self.modelfile, 'rb')) vd_ins_nts = pickle.load(gzip.open(self.vfile, 'rb')) dj_ins_nts = pickle.load(gzip.open(self.jfile, 'rb')) #self.logToMaster("vdel: %d; vd_ins_nts: %d" % (self.vdel, len(vd_ins_nts))) #self.logToMaster("jdel: %d; dj_ins_nts: %d" % (self.jdel, len(dj_ins_nts))) for vd_ins in vd_ins_nts: for dj_ins in dj_ins_nts: cdr3_nt = (self.v_cdr3_nt + vd_ins[1: ] + self.d_cdr3_nt + dj_ins[: -1] + self.j_cdr3_nt) assert lcommon.nt2aa(cdr3_nt) == self.cdr3_aa event = lclone.Cdr3Clone(1, cdr3_nt, self.v, self.j, d=self.d, aa=self.cdr3_aa, vdel=self.vdel, jdel=self.jdel, d5del=self.d5del, d3del=self.d3del, vdins=vd_ins, djins = dj_ins) #events.append(event) llh = ntclone_likelihood(event, model) total_llh += 10 ** llh pickle.dump(total_llh, gzip.open(self.outfile, 'wb'))