def Run(self): #{ LogMsg(self, "Adding breakpoint gene annotation...") start = time.time() group_parser = CandidateGroupParserCls(self.options.barnacle_path) self.CreateGroupCoordsFile(group_parser) self.overlaps_stream = RunOverlapCode(self.options.genes_path, self.options.group_coords_path, STREAM_OUT, dpt=self.options.dpt, log_info=self.log_info) self.output_file = FileBoxCls(self.options.out_path, "w", "cannot create output file") overlaps_remain = True for group in group_parser: #{ # get the breakpoint gene annotation for the group if (overlaps_remain): #{ try: #{ self.GetBreakpointOverlaps(group) except StopIteration: DebugMsg(self, "No overlaps remain") overlaps_remain = False #} end try #} end if # write the group to the output file self.output_file.Write(group.FullDataString()) #} end for self.output_file.Close() if (not self.options.keep_coords_file): #{ os.remove(self.options.group_coords_path) #} end def LogMsg(self, "Time spent adding breakpoint gene annotation: %s" % TimeSpent(start))
class BreakpointGeneAnnotatorCls: #{ def __init__(self, options): #{ SetupMainClass(self, options) self.overlaps_stream = None self.curr_overlap = None self.output_file = None #} end def def __del__(self): #{ if (hasattr(self, "output_file") and None != self.output_file): #{ self.output_file.Close() #} end if CloseLogFile(self) #} end def def Run(self): #{ LogMsg(self, "Adding breakpoint gene annotation...") start = time.time() group_parser = CandidateGroupParserCls(self.options.barnacle_path) self.CreateGroupCoordsFile(group_parser) self.overlaps_stream = RunOverlapCode(self.options.genes_path, self.options.group_coords_path, STREAM_OUT, dpt=self.options.dpt, log_info=self.log_info) self.output_file = FileBoxCls(self.options.out_path, "w", "cannot create output file") overlaps_remain = True for group in group_parser: #{ # get the breakpoint gene annotation for the group if (overlaps_remain): #{ try: #{ self.GetBreakpointOverlaps(group) except StopIteration: DebugMsg(self, "No overlaps remain") overlaps_remain = False #} end try #} end if # write the group to the output file self.output_file.Write(group.FullDataString()) #} end for self.output_file.Close() if (not self.options.keep_coords_file): #{ os.remove(self.options.group_coords_path) #} end def LogMsg(self, "Time spent adding breakpoint gene annotation: %s" % TimeSpent(start)) #} end def def CreateGroupCoordsFile(self, group_parser): #{ if (self.options.use_existing_group_coords): #{ LogMsg(self, "Using existing group coordinates file.") return #} end if # check whether to use "chr" in chromosome names in coordinates file use_chr = ShouldChromUseChr(1, self.options.genes_path, "exon coordinates", self.log_info) # open the group coordinates file group_coords_file = FileBoxCls(self.options.group_coords_path, "w", "cannot create event coordinates file") for group in group_parser: #{ ExtremeDebugMsg(self, "Writing coordinates for group %i" % group.id) self.WriteGroupCoords(group, group_coords_file, use_chr) #} end for group_parser.Close() group_coords_file.Close() #} end def def WriteGroupCoords(self, group, group_coords_file, use_chr): #{ for candidate in group.members: #{ if (candidate.gap): #{ # write gap group coordinates self.WriteGapGroupCoords(candidate, group_coords_file, use_chr) else: # write split group coordinates self.WriteSplitGroupCoords(candidate, group_coords_file, use_chr) #} end if #} end for #} end def def WriteGapGroupCoords(self, candidate, group_coords_file, use_chr): #{ gap_coords = ConstructBEDString(candidate.align_info_B.chrom, use_chr, candidate.align_info_B.genome_start, candidate.align_info_B.genome_end, "%sA" % candidate.IDString()) group_coords_file.WriteLine(gap_coords) #} end def def WriteSplitGroupCoords(self, candidate, group_coords_file, use_chr): #{ #split_coords_A = GroupCoordsCls(candidate.align_info_A.chrom, # candidate.align_info_A.genome_end - self.options.event_buffer, # candidate.align_info_A.genome_end + self.options.event_buffer, # "%sA" % candidate.IDString(), use_chr) #group_coords_file.WriteLine("%s" % split_coords_A.ToString()) #split_coords_B = GroupCoordsCls(candidate.align_info_B.chrom, # candidate.align_info_B.genome_start - self.options.event_buffer, # candidate.align_info_B.genome_start + self.options.event_buffer, # "%sB" % candidate.IDString(), use_chr) #group_coords_file.WriteLine("%s" % split_coords_B.ToString()) region_ids = ("A", "B") for i in [0,1]: #{ genome_coords = (candidate.alignments[i].genome_start, candidate.alignments[i].genome_end) split_coords = ConstructBEDString(candidate.alignments[i].chrom, use_chr, genome_coords[1-i] - self.options.event_buffer, genome_coords[1-i] + self.options.event_buffer, "%s%s" % (candidate.IDString(), region_ids[i])) group_coords_file.WriteLine(split_coords) #} end for #} end def def GetBreakpointOverlaps(self, group): #{ # clear any previous breakpoint genes group.ClearBPGenes() # skip overlaps for groups that come before the current group while (not hasattr(self, "curr_overlap") or None == self.curr_overlap or self.curr_overlap.group_id < group.id): #{ self.GetNextOverlap() #} end while # create a dictionary of the members of the current group candidates_dict = dict((candidate.candidate_id, candidate) for candidate in group.members) # get all overlaps for the current group while (self.curr_overlap.group_id == group.id): #{ ExtremeDebugMsg(self, "Found overlap for %i%s" % (self.curr_overlap.group_id, self.curr_overlap.candidate_id)) if (self.curr_overlap.candidate_id in candidates_dict): #{ ExtremeDebugMsg(self, " candidate ID in dictionary!") self.AddBreakPointGene(candidates_dict[self.curr_overlap.candidate_id]) #} end if self.GetNextOverlap() #} end while #} end def def GetNextOverlap(self): #{ if (None == self.overlaps_stream): #{ raise BreakpointGeneAnnotatorError("breakpoint gene overlap stream " "is not open!") #} end if overlap_line = CleanLine(self.overlaps_stream.next()) tokenizer = TokenizerCls(overlap_line, delimiter=" ", log_info=self.log_info) try: self.curr_overlap = FeatureOverlapCls(tokenizer, multi_target=True) except ValueError,e: raise BreakpointGeneAnnotatorError("error parsing overlap line: " "%s\n%s" % (overlap_line, e)) # end try ParseFullEventID(self.curr_overlap, self.curr_overlap.query_id)