def score_gene_pair(self, geneA, geneB, eventsA, eventsB, annotations): (historiesA, costsA) =merge_histories_costs(eventsA) (historiesB, costsB) =merge_histories_costs(eventsB) eventsAB=[] for event in eventsA: genelist = annotations[event.id] if geneA in genelist and geneB in genelist: eventsAB.append(event) if len(genelist) >2: # there are other genes besides A and B self.countsABplus+=1 (self.hists, self.costs) = merge_histories_costs(eventsAB) self.count = len(self.hists) self.countsA =len(historiesA) self.countsB = len(historiesB) self.scoreA = histseg.compute_likelihood(costsA, 1) self.scoreB = histseg.compute_likelihood(costsB, 1) self.likelihood= histseg.compute_likelihood(self.costs, 1)
def main(allevents, annotationfn, totalp, outputfh): annotations=genemod.read_in_annotations(annotationfn) sys.stderr.write("Read in all the annotations: %d\n" % (len(annotations))) geneEvents=genemod.create_gene_events_hash(allevents, annotations) #key: a gene name, value: a list of event indexes sys.stderr.write("geneEvents: %d\n" % (len(geneEvents))) if not totalp: totalp = histseg.get_total_likelihood(allevents) sys.stderr.write("totalp: %s\n" % (str(totalp))) for gene in geneEvents.keys(): myevents=[allevents[i] for i in geneEvents[gene]] sys.stderr.write("Working on %s with %d events\n" % (gene, len(myevents))) mergedEvent=histseg.merge_events(myevents) mergedEvent.compute_timing_wmeansd() mergedEvent.likelihood = histseg.compute_likelihood(mergedEvent.costs, totalp) mergedEvent.numhists=len(mergedEvent.histories) self = mergedEvent mystr=("%s\t%f\t%s\t%f\t%s\t%s\t%d\n" % (gene, self.ordermean, str(self.ordersd), self.prevalmean, str(self.prevalsd), str(self.likelihood), self.numhists)) outputfh.write(mystr) sys.stderr.write(mystr)
def likelihood_score_gene_pairs(allevent, annotations, tabixfn): geneEvents=create_gene_events_hash(allevents, annotations) # key: a gene name, value: a list of event indexes for the events with geneX sys.stderr.write("Hashed in all the annotations: %d\n" % (len(geneEvents))) eventi=0 myGenepairs=[] mygeneunpairs=[] pairIDs=[] allhistoryids=[] allcosts=[] while eventi < len(allevents): sys.stderr.write("working on event %d\n" % (eventi)) myevent=allevents[eventi] for i in xrange(len(myevent.histories)): hid=myevent.histories[i] if hid not in allhistoryids: allhistoryids.append(hid) allcosts.append(myevent.costs[i]) genes=annotations[myevent.id] if genes[0] != "None": for ia in xrange(len(genes)): geneA = genes[ia] for ib in xrange(ia+1, len(genes)): geneB = genes[ib] genepairID="%s,%s" % (geneA, geneB) # sys.stderr.write("working on %s\n" % (genepairID)) if genepairID not in pairIDs: eventsA=[allevents[i] for i in geneEvents[geneA]] eventsB=[allevents[i] for i in geneEvents[geneB]] mypair = GenePair(geneA, geneB, eventsA, eventsB, annotations) pairIDs.append(genepairID) myGenepairs.append(mypair) eventi+=1 totalp = histseg.compute_likelihood(allcosts, 1) sys.stderr.write("totalp: %s\n" % (str(totalp))) if tabixfn: pair_distances = find_distance_between_genes(pairIDs, args.tabix) for pair in myGenepairs: if tabixfn: pair.distance = pair_distances[pair.geneA+pair.geneB] pair.adjust_likelihoods(totalp) sys.stdout.write(str(pair))