Python GeneAnnotationParserCls Examples

Programming Language: Python

Namespace/Package Name: parsers.genes.annotation

Examples at hotexamples.com: 3

Python GeneAnnotationParserCls - 3 examples found. These are the top rated real world Python examples of parsers.genes.annotation.GeneAnnotationParserCls extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

close(1)

next(1)

Example #1

Show file

File: exon_bounds.py Project: ptraverse/gsc

 def LoadExonBoundaryCoordinatesFromFile(self, path): #{
   DebugMsg(self, "Gene annotation file: %s" % path)
   # open the annotations file
   annotations_file = GeneAnnotationParserCls(path, log_info=self.log_info)
   skipped_chroms = set()
   # get the coordinates from the file
   for transcript in annotations_file: #{
     # fix chromosome names, if needed
     chrom = NormalizeChrID(transcript.chrom)
     if (NonStandardChr(chrom)): #{
       ExtremeDebugMsg(self, "Skipping transcript in strange chromosome: "
         "%s (%s)" % (chrom, transcript.chrom))
       skipped_chroms.add(chrom)
       continue
     #} end if
     prev_exon = None
     for (index, exon) in enumerate(transcript.SortedExons()): #{
       (exon.left, exon.right) = (exon.min, exon.max)
       # assume that exon list is sorted by left coordinate
       if (None != prev_exon and prev_exon.min > exon.min): #{
         raise ExonBoundCounterError("Transcript %s exons are not in "
           "order: %s, %s" % (transcript.transcript_id,
           prev_exon.ToString(), exon.ToString()))
       #} end if
       prev_exon = exon
       # do not include the left side of the first exon
       if (0 == index): #{
         exon.left = None
       #} end if
       # do not include the right side of the last exon
       if (len(transcript.exons) == (index+1)): #{
         exon.right = None
       #} end if
       # exon_bound_coords[chrom][prime_side][coord1][coord2] = gene_list
       for side in SIDES: #{
         if (None != getattr(exon, side)): #{
           keys = [chrom, side, getattr(exon, side),
             getattr(exon, OtherSide(side))]
           AddToMultiDict(self.exon_bound_coords, keys,
             transcript.transcript_id)
         #} end if
       #} end for
     #} end for
   #} end for
   if (0 < len(skipped_chroms)): #{
     DebugMsg(self, "Skipped transcripts in chromosomes: %s" %
       ", ".join(sorted(skipped_chroms)))
   #} end if
   # close the file
   annotations_file.close()

Example #2

Show file

File: event_simulator.py Project: ptraverse/gsc

 def __init__(self, path, log_info=None): #{
   self.log_info = log_info
   # open the annotations file
   self.parser = GeneAnnotationParserCls(path, log_info=log_info)

Example #3

Show file

File: event_simulator.py Project: ptraverse/gsc

class AnnotationsFileCls: #{
  def __init__(self, path, log_info=None): #{
    self.log_info = log_info
    # open the annotations file
    self.parser = GeneAnnotationParserCls(path, log_info=log_info)
  #} end def

  def __iter__(self): #{
    return self
  #} end def

  def next(self): #{
    # replace spaces in the alias and
    # get rid of any "chr" in the chromosome name
    transcript = FixAnnotation(self.parser.next(), use_chr=False)
    ExtremeDebugMsg(self, "T: %s" % transcript)
    # ensure that the transcript is from a "normal" chromosome,
    # not including mitochondrial DNA, and is not a tRNA or rRNA
    while (NonStandardChr(transcript.chrom) or
        "M" == transcript.chrom or
        transcript.gene_name.lower().startswith("trna_") or
        transcript.gene_name.lower().endswith("_rrna")): #{
      ExtremeDebugMsg(self, "  Skipping...")
      transcript = FixAnnotation(self.parser.next(), use_chr=False)
      ExtremeDebugMsg(self, "T: %s" % transcript)
    #} end while
    transcript.isoform = 1
    # check whether the transcript is coding or non-coding
    if (transcript.cdsStart >= transcript.cdsEnd): #{
      transcript.non_coding = True
    #} end if
    # separate the exons into UTRs and coding exons
    self.SeparateUTRs(transcript)
    # reverse the order of the exons if
    # the transcript is on the negative strand
    if ("-" == transcript.strand): #{
      transcript.exons.reverse()
      transcript.split_exons.reverse()
      transcript.utr_flags.reverse()
    #} end if
    return transcript
  #} end def

  def SeparateUTRs(self, transcript): #{
    # ensure that exons are ordered by start coordinate
    if (transcript.exons[0][0] > transcript.exons[-1][0]): #{
      transcript.exons.reverse()
    #} end if
    transcript.num_coding_exons = 0
    transcript.utr_flags = list()
    transcript.split_exons = list()
    if (transcript.non_coding): #{
      ExtremeDebugMsg(self, "Not separating UTRs for non-coding gene")
      return
    #} end if
    ExtremeDebugMsg(self, "Separating UTRs from coding exons...\n"
      "cdsStart: %i, cdsEnd: %i" % (transcript.cdsStart, transcript.cdsEnd))
    for (e_start, e_end) in transcript.exons: #{
      ExtremeDebugMsg(self, "Exon start: %i, end: %i" % (e_start, e_end))
      # if the exon ends before the CDS start or
      # the exon starts after the CDS end,
      # the full exon is a UTR
      if (e_end < transcript.cdsStart or transcript.cdsEnd < e_start): #{
        transcript.utr_flags.append(True)
        transcript.split_exons.append([e_start, e_end])
        ExtremeDebugMsg(self, "  full UTR")
      else:
        # if the exon starts before the CDS start and
        # ends after the CDS start,
        # the first part of the exon is a UTR
        if (e_start < transcript.cdsStart): #{
          transcript.utr_flags.append(True)
          transcript.split_exons.append([e_start, transcript.cdsStart-1])
          e_start = transcript.cdsStart
          ExtremeDebugMsg(self, "  UTR start: %i-%i\n  New start: %i" %
            (transcript.split_exons[-1][0],
            transcript.split_exons[-1][1], e_start))
        #} end if
        # if the exon starts before the CDS end and
        # ends after the CDS end,
        # the second part of the exon is a UTR
        if (transcript.cdsEnd < e_end): #{
          transcript.num_coding_exons += 1
          transcript.utr_flags.append(False)
          transcript.split_exons.append([e_start, transcript.cdsEnd])
          transcript.utr_flags.append(True)
          transcript.split_exons.append([transcript.cdsEnd+1, e_end])
          ExtremeDebugMsg(self, "  exon start: %i-%i\n  UTR end: %i-%i" %
            (transcript.split_exons[-2][0], transcript.split_exons[-2][1],
             transcript.split_exons[-1][0], transcript.split_exons[-1][1]))
        # if the exon starts after the CDS start and
        # ends before the CDS end,
        # the full exon is really an exon
        elif (e_start <= e_end):
          transcript.num_coding_exons += 1
          transcript.utr_flags.append(False)
          transcript.split_exons.append([e_start, e_end])
          ExtremeDebugMsg(self, "  full exon: %i-%i" % (e_start, e_end))
        else:
          raise ExonCoordsError("cannot determine exon type: "
            "%s: CDS:%i-%i, Exon:%i-%i" % (transcript.alias,
            transcript.cdsStart, transcript.cdsEnd, e_start, e_end))
        #} end if
      #} end if
    #} end for
    if (len(transcript.split_exons) != len(transcript.utr_flags)): #{
      raise ChimeraSimulatorError("error loading transcript: # exons (%i)" %
        len(transcript.exons) + " not equal to # UTR flags (%i)" %
        len(transcript.utr_flags))