コード例 #1
0
ファイル: library_iterator.py プロジェクト: ptraverse/gsc
class LibIteratorCls:
  def __init__(self, lib_list_path, ProcessLibraryMethod,
      options, log_info=None):
    self.lib_list_path = lib_list_path
    self.ProcessLibrary = ProcessLibraryMethod
    self.options = options
    self.CheckOptions()
    #if (not hasattr(options, "no_path_check")):
    #  self.options.no_path_check = False
    # end if
    self.log_info = log_info
    self.num_libs = 0
    self.list_of_paths = False
  # end def

  def __del__(self):
    if (hasattr(self, "lib_list_file") and
        None != self.lib_list_file     and
        not self.lib_list_file.closed):
      self.lib_list_file.close()
    # end if
  # end def

  def CheckOptions(self): #{
    required_opts = ["no_path_check", "only_pass"]
    for opt in required_opts: #{
      if (not hasattr(self.options, opt)): #{
        setattr(self.options, opt, False)
      #} end if
    #} end for
    if (not hasattr(self.options, "get_paths")):
      self.options.get_paths = True
    #} end if
  #} end def

  def IterateOverAllLibs(self):
    self.num_libs = 0
    self.lib_list_file = FileBoxCls(self.lib_list_path, "r",
      "could not open library list file")
    for lib_line in self.lib_list_file:
      # skip comment lines
      if (lib_line.startswith("#")):
        continue
      # end if
      lib_info = LibraryInfoCls(self.options, self.log_info,
        self.list_of_paths)
      lib_info.GetLibDir(lib_line)
      DebugMsg(self, "Lib Dir: %s" % lib_info.lib_dir)
      if (self.options.get_paths and not self.list_of_paths):
        lib_info.GetEventPaths()
      # end if
      if (1 > len(lib_info.event_paths) and
          (self.options.get_paths or self.list_of_paths)):
        raise LibIteratorError("could not get event path(s) from directory: "
          "%s" % lib_info.lib_dir)
      # end if
      self.ProcessLibrary(lib_info)
      self.num_libs += 1
    # end for
    self.lib_list_file.close()
コード例 #2
0
ファイル: samtools.py プロジェクト: ptraverse/gsc
 def PrintErrors(self):  # {
     errors = False
     fail_msg = "could not open samtools error file"
     err_file = FileBoxCls(self.err_file_path, "r", fail_msg)
     for line in err_file:  # {
         LogMsg(self, line)
         errors = True
     # } end for
     err_file.close()
     return errors
コード例 #3
0
ファイル: filter_groups.py プロジェクト: ptraverse/gsc
 def PrintFilters(self): #{
   filter_file = FileBoxCls(self.OutputPath("filters"), "w",
     "could not open filter file")
   try: #{
     for filter_name in sorted(self.filters.keys()): #{
       #LogMsg(self, "%s: %s" %
       #  (self.filters[filter_name].description,
       #   self.filters[filter_name].ValueString()))
       filter_file.WriteLine("%s: %s" %
         (self.filters[filter_name].description,
          self.filters[filter_name].ValueString()))
     #} end for
   finally:
     filter_file.close()
コード例 #4
0
ファイル: check_status.py プロジェクト: ptraverse/gsc
 def CheckStatus(self): #{
   fail_msg = \
     "cannot open output file for job number %s" % self.num
   output_file = FileBoxCls(self.output_path, "r", fail_msg)
   self.status = "in progress"
   for output_line in output_file: #{
     if (R2C_SUCCESS == output_line): #{
       self.status = "complete"
       break
     elif ("" == output_line or R2C_FAIL == output_line): #{
       self.status = "failed"
       break
     #} end if
   #} end for
   output_file.close()
   return
コード例 #5
0
 def CreateAlignCoordsFile(self, aligns): #{
   DebugMsg(self, "Creating new alignment coordinates file...")
   # open the alignment coordinates file
   fail_msg = "Cannot open alignment coordinates file"
   align_coords_file = FileBoxCls(self.paths['align_coords'], "w", fail_msg)
   # iterate through the alignments
   for id, align in enumerate(aligns): #{
     align = FixAlign(align)
     # REMINDER: use alignment blocks instead!
     WriteBlockCoords(align, id, align_coords_file, use_chr=True)
     #coord_str = "%s %i %i %i" % (align.target,
     #  min(align.tstart, align.tend), max(align.tstart, align.tend),
     #  id)
     #align_coords_file.write(coord_str + "\n")
   #} end for
   align_coords_file.close()
コード例 #6
0
ファイル: check_status.py プロジェクト: ptraverse/gsc
 def CheckStatus(self): #{
   ExtremeDebugMsg(self, "Checking job status: %s" % self.output_path)
   fail_msg = ("cannot open output file for job number %s" % self.num)
   output_file = FileBoxCls(self.output_path, "r", fail_msg)
   self.status = "in progress"
   for output_line in output_file: #{
     ExtremeDebugMsg(self, "  %s" % output_line)
     if (CID_SUCCESS == output_line): #{
       self.status = "complete"
       break
     elif ("" == output_line or CID_FAIL == output_line): #{
       self.status = "failed"
       break
     #} end if
   #} end for
   output_file.close()
   return
コード例 #7
0
ファイル: integrate.py プロジェクト: ptraverse/gsc
 def IntegrateP2GFile(self, p2g_path): #{
   DebugMsg(self, "Integrating pair-to-genome file: %s" % p2g_path)
   group = None
   fail_msg = "cannot open pair-to-genome results file"
   p2g_file = FileBoxCls(p2g_path, "r", fail_msg)
   for p2g_line in p2g_file: #{
     DebugMsg(self, "LINE: %s" % p2g_line)
     # count the group
     self.num_groups += 1
     # parse the pair-to-genome line
     p2g_support = P2GGroupCls(self.options, self.log_info)
     p2g_support.ParseSupportString(p2g_line)
     # check that the group had some reads at least
     if (1 > p2g_support.num_reads): #{
       self.groups_without_reads.append("%i" % p2g_support.group_id)
     #} end if
     # get a group from the groups file
     if (None == group or
         p2g_support.group_id > group.id):
       try: #{
         DebugMsg(self, "Getting next group...")
         group = self.group_parser.GetNextGroup()
       except StopIteration:
         raise P2GIntegratorError \
           ("Unexpected end of groups file: %s\n  while integrating: %s" %
            (self.group_parser.data_file_path, p2g_path))
       #} end try
     # allow for groups having been removed from the groups file
     if (p2g_support.group_id < group.id): #{
       continue
     #} end if
     # ensure that the group ids match up
     if (p2g_support.group_id != group.id): #{
       raise P2GIntegratorError("Inconsistent group ids: %i from %s, " %
         (p2g_support.group_id, p2g_path) +
         "%i from %s" % (group.id, self.options.barnacle_path))
     #} end if
     # add the pair-to-genome support to the group
     self.AddSupportToGroup(group, p2g_support)
     # apply any pair-to-genome filters given
     #self.ApplyFilters(group)
     # write the group to the new output file(s)
     self.WriteGroup(group)
   #} end for
   p2g_file.close()
コード例 #8
0
 def WriteCounts(self): #{
   # open the counts file
   fail_msg = "Cannot open counts file"
   counts_file = FileBoxCls(self.paths['counts'], "w", fail_msg)
   # write the number of split alignments found
   counts_file.WriteLine("Split: %i" % len(self.candidate_contigs))
   # if gapped alignments were also checked for
   if (self.options.check_gap): #{
     # write the number of gapped alignments found
     msg = "Gapped: "
     if (self.more_than_99): #{
       msg += "at least "
     #} end if
     msg += "%i" % self.num_gapped_aligns
     counts_file.WriteLine(msg)
   #} end if
   counts_file.WriteLine("COMPLETE")
   # close the counts file
   counts_file.close()
コード例 #9
0
ファイル: with_tophat_fusion.py プロジェクト: ptraverse/gsc
class TopHatFileCls:  # {
    def __init__(self, path, log_info=None):  # {
        self.file = FileBoxCls(path, "r", "cannot read TopHat-Fusion results file")
        self.log_info = log_info

    # } end def

    def __del__(self):  # {
        self.close()

    # } end def

    def __iter__(self):  # {
        return self

    # } end def

    def next(self):  # {
        # the first line should start with "allAtOnce" and
        # it contains the breakpoint coordinates
        # parse the tophat line
        tophat_event = TopHatEventCls(self.file.next())
        # the next two lines should be "sequence" lines
        tophat_event.CheckSeqLine(self.file.next())
        tophat_event.CheckSeqLine(self.file.next())
        # the next lines should be... scores?
        tophat_event.CheckScoreLine(self.file.next())
        # the next line should have the gene ids
        tophat_event.ParseGenesLine(self.file.next())
        # skip the final line
        self.file.next()
        return tophat_event

    # } end def

    def close(self):  # {
        if hasattr(self, "file") and None != self.file and not self.file.closed:  # {
            self.file.close()
コード例 #10
0
ファイル: calculate.py プロジェクト: ptraverse/gsc
class P2GCalculatorCls: #{
  def __init__(self, options): #{
    SetupMainClass(self, options)
    CheckConfigCommands(self, "samtools")
    self.groups_file = None
    self.output_file = None
    self.options.use_chr = False
  #} end def

  def __del__(self): #{
    # close input and output files, if they are not already closed
    self.CloseFiles()
    CloseLogFile(self)
  #} end def

  def CalculateSupport(self): #{
    start = time.time()
    LogMsg(self, "Adding pair-to-genome support to groups...")
    # open the input and output files
    self.Setup()
    #ExtremeDebugMsg(self, "Should I use chr? %s" % self.options.use_chr)
    # for each group in the input file
    for group_line in self.groups_file: #{
      group_start = time.time()
      # create a group object from the line
      group = P2GGroupCls(self.options, self.log_info)
      group.ParseGroupLine(group_line)
      LogMsg(self, "Group: %i" % group.group_id)
      ExtremeDebugMsg(self, "  %s" % group.ToString())
      # get the pair-to-genome support for the current group
      group.GetPairToGenomeSupport()
      # write the pair-to-genome support for the current group
      self.WritePairToGenomeSupport(group.SupportString())
      ExtremeDebugMsg(self, "Time spent on group: %s" % TimeSpent(group_start))
    #} end for
    # close the input and output files
    self.CloseFiles()
    # remove the temporary samtools output files
    for end in ["", "_1", "_2"]: #{
      temp_sam_path = os.path.join(self.options.output_dir,
        "sam_out_tmp%s" % end)
      if (os.path.isfile(temp_sam_path)): #{
        os.remove(temp_sam_path)
      #} end if
      temp_sam_path += ".err"
      if (os.path.isfile(temp_sam_path)): #{
        os.remove(temp_sam_path)
      #} end if
    #} end for
    LogMsg(self, "Total time adding pair-to-genome support: %s" %
      TimeSpent(start))
  #} end def

  def Setup(self): #{
    fail_msg = "cannot open groups file"
    self.groups_file = FileBoxCls(self.options.barnacle_path, "r", fail_msg)
    output_file_path = self.options.barnacle_path.replace(".data", ".out")
    fail_msg = "cannot create pair-to-genome support output file"
    self.output_file = FileBoxCls(output_file_path, "w", fail_msg)
    # create samtools object and check whether to use "chr" in chromosome IDs
    samtools = SAMToolsCls(self.options.p2g_path, self.options,
      log_info=self.log_info)
    self.options.use_chr = samtools.ShouldChromUseChr()
  #} end def

  def WritePairToGenomeSupport(self, support_string): #{
    self.output_file.WriteLine("%s" % support_string)
  #} end def

  def CloseFiles(self): #{
    if (None != self.groups_file and not self.groups_file.closed): #{
      self.groups_file.close()
      self.groups_file = None
    #} end if
    if (None != self.output_file and not self.output_file.closed): #{
      self.output_file.close()
      self.output_file = None
コード例 #11
0
class CandidateGroupParserCls: #{
  def __init__(self, data_file_path, keep_lines=False, check_data=False): #{
    CheckFilePath(data_file_path, "candidate group file")
    self.group_parser = GroupParserCls(keep_lines=keep_lines)
    self.check_data = check_data
    fail_message = "cannot open data file"
    self.data_file = FileBoxCls(data_file_path, "r", fail_message)
    self.groups = list()
  #} end def

  def __del__(self): #{
    # close data file if it is open
    self.CloseDataFile()
  #} end def

  def __iter__(self): #{
    return self
  #} end def

  # Load the entire data file into memory
  # Do not mix with using GetNextGroup() method
  def ParseDataFile(self): #{
    #self.OpenDataFile()
    for group_line in self.data_file: #{
      #group_line = CleanLine(group_line)
      # skip blank lines
      #if ("" == group_line): #{
      #  continue
      #} end if
      self.groups.append(self.group_parser.ParseGroup \
        (group_line, self.data_file, check_data=self.check_data))
    #} end for
    self.CloseDataFile()
    return self.groups
  #} end def

  # Load a single group from the data file into memory
  # Do not mix with using ParseDataFile() method
  def GetNextGroup(self): #{
    return self.next()
  #} end def

  def next(self): #{
    #if (None == self.data_file): #{
    #  self.OpenDataFile()
    #} end if
    group_line = ""
    # skip blank lines
    while ("" == group_line): #{
      #group_line = CleanLine(self.data_file.next())
      group_line = self.data_file.next()
    #} end if
    return self.group_parser.ParseGroup \
      (group_line, self.data_file, check_data=self.check_data)
  #} end def

  def Close(self): #{
    self.CloseDataFile()
  #} end def

  def CloseDataFile(self): #{
    if (not hasattr(self, "data_file")): #{
      return
    #} end if
    if (None == self.data_file): #{
      return
    #} end if
    if (self.data_file.closed): #{
      return
    #} end if
    self.data_file.close()
    #self.data_file = None
  #} end def

  def close(self): #{
    self.CloseDataFile()
  #} end def

  def GroupLine(self): #{
    if (not self.group_parser.keep_lines): #{
      raise CandidateGroupParserError \
        ("cannot get group line when keep_lines flag was not set")
    #} end if
    return self.group_parser.group_line
  #} end def

  def MemberLines(self): #{
    if (not self.group_parser.keep_lines): #{
      raise CandidateGroupParserError \
        ("cannot get member lines when keep_lines flag was not set")
    #} end if
    return self.group_parser.member_lines
コード例 #12
0
 def Output(self, append): #{
   # open the output file in the appropriate mode
   if append: #{
     mode = "a"
   else:
     mode = "w"
   #} end if
   fail_msg = "Cannot open split alignment output file"
   out = FileBoxCls(self.paths['split_out'], mode, fail_msg)
   if (self.params['output_psl']): #{
     if (self.candidate_contigs[0].align1.method == "blat"): #{
       fail_msg = "Cannot open alignment psl output file"
       psl_out = FileBoxCls(self.paths['psl_out'], mode, fail_msg)
       DebugMsg(self,
         "Writing alignment lines to %s" % self.paths['psl_out'])
       # write out the alignment lines for the gapped alignment events found
       for psl_line in self.gapped_psl_lines: #{
         psl_out.Write(psl_line)
       #} end for
     else:
       # only write out psl lines for blat alignments
       self.params['output_psl'] = False
     #} end if
   #} end if
   # write the split alignment details to the output file
   for candidate_contig in self.candidate_contigs: #{
     # skip non-standard chromosomes
     #chr_patt = r"\A(chr)?(\d+|[XY]|MT?)\Z"
     #if (None == re.search(chr_patt, candidate_contig.align1.target) or
     #    None == re.search(chr_patt, candidate_contig.align2.target)):
     #if (NonStandardChr(candidate_contig.align1.target) or
     #    NonStandardChr(candidate_contig.align2.target)): #{
     #  DebugMsg(self, "Skipping non-standard chromosome: %s/%s" %
     #    (candidate_contig.align1.target, candidate_contig.align2.target))
     #  continue
     #} end if
     #if ("chr" != candidate_contig.align1.target[0:3]): #{
     #  candidate_contig.align1.target = ("chr%s" %
     #    candidate_contig.align1.target)
       #LogMsg(self, "  Target: %s" %
       #                        candidate_contig.align1.target)
       #msg = ("Improperly formatted alignment: %s" %
       #       candidate_contig.Details())
       #raise CandidateIdentifierError(msg)
     #} end if
     #if ("chr" != candidate_contig.align2.target[0:3]): #{
     #  candidate_contig.align2.target = ("chr%s" %
     #    candidate_contig.align2.target)
     #} end if
     candidate_contig.align1.target = AddChr(candidate_contig.align1.target)
     candidate_contig.align2.target = AddChr(candidate_contig.align2.target)
     ExtremeDebugMsg(self, "Writing line to %s:\n  %s" %
       (out.path, candidate_contig.Details()))
     out.WriteLine(candidate_contig.Details())
     if (self.params['output_psl']): #{
       psl_out.Write(candidate_contig.align1.psl())
       psl_out.Write(candidate_contig.align2.psl())
     #} end if
   #} end for
   out.close()
   if (self.params['output_psl']): #{
     psl_out.close()
コード例 #13
0
  def IdentifyCandidateContigs(self, aligns): #{
    # TEMP # ExtremeDebugMsg(self, AlignListString(aligns)

    # open the contig sequences file if using the gap filter
    if (self.options.check_gap): #{
      fail_msg = "Cannot open contig sequence file"
      ctg_seq_file = FileBoxCls(self.paths['ctg_seq'], "r", fail_msg)
    else:
      ctg_seq_file = None
    #} end if

    # iterate over the alignments, grouping them by query (i.e. contig)
    contig_align_index = 0
    while (contig_align_index < len(aligns)): #{
      self.num_contigs += 1
      contig = ContigWithAlignmentsCls(contig_align_index, aligns,
        ctg_seq_file, self.paths['gap_out'], self.options, self.log_info)
      ExtremeDebugMsg(self, "-"*80)
      #DebugMsg(self, "Grouping alignments for "
      #  "%s (contig #%i)..." % (contig.id, self.num_contigs))
      DebugMsg(self, "%i) %s" % (self.num_contigs, contig.id))
      ExtremeDebugMsg(self, "  Contig length: %i" % contig.length)
      #LogMsg(self, "Contig align index: %i" % contig_align_index)

      # Select the alignments to consider for the current contig
      # and check for gapped alignments at the same time
      contig.SelectAlignments(aligns)
      if (contig.single_align_found): #{
        self.num_full_aligns += 1
      #} end if
      if (self.options.check_gap and not contig.perfect_align_found): #{
        contig.CheckGappedAlignments()
        self.gapped_psl_lines.extend(contig.gapped_psl_lines)
        self.num_gapped_aligns += contig.num_gapped_aligns
      #} end if
      if (self.params['check_split'] and
          not self.params['use_quick_chooser']):
        # pare down the alignment groups so that
        # only the best alignments remain
        contig.PareAlignmentGroups()
      #} end if

      #LogMsg(self, "# Gaps Found (Finder): %i" %
      #                         self.num_gapped_aligns)
      contig_align_index += contig.num_aligns_to_contig

      if (0 < len(contig.best_aligns)): #{
        if (self.log_info['debug']): #{
          LogMsg(self, "%i best aligns: %s" %
            (len(contig.best_aligns), contig.id))
          ExtremeDebugMsg(self, AlignListString(contig.best_aligns))
        #} end if
      elif (0 < len(contig.align_groups)): #{
        if (self.log_info['debug']): #{
          ExtremeDebugMsg(self, "-"*40)
          LogMsg(self, "%i align groups: %s" %
            (len(contig.align_groups), contig.id))
          for i, group in enumerate(contig.align_groups): #{
            ExtremeDebugMsg(self, "\n".join(["Group %i" % i,
              "  %i) S:%i E:%i Aligns:%i" % (i, group.ctg_start,
              group.ctg_end, len(group.best_aligns)),
              AlignListString(group.best_aligns)]))
          #} end for
        #} end if
      else: # no best aligns or align groups found
        if (not contig.perfect_align_found and
            not contig.single_align_found): #{
          DebugMsg(self, "No partial aligns selected: %s" % contig.id)
        #} end if
        continue
      #} end if

      # examine pairs of the chosen alignments
      if (self.params['use_quick_chooser']): #{
        self.ExamineBestAlignsPairwise(contig)
      else:
        self.ExamineAlignGroupsPairwise(contig)
      #} end if
    #} end while
    DebugMsg(self, "-"*80)
    # close the contig sequences file if using the gap filter
    if (self.options.check_gap): #{
      ctg_seq_file.close()