Esempio n. 1
0
 def GetPTDPrimerInfo(self, primer_info): #{
   # [event_id, chimera.id, chimera.left, chimera.right, gene.id,
   #  gene.left, gene.right]
   # event_id
   event_id = primer_info.EventID()
   # contig_id
   contig = PrimerSeqCls(primer_info.ctg_id)
   # contig_left
   contig.left = min(primer_info.c2t_aligns[0].ctg_end,
     primer_info.c2t_aligns[1].ctg_start)
   # contig_right
   contig.right = max(primer_info.c2t_aligns[0].ctg_end,
     primer_info.c2t_aligns[1].ctg_start)
   # gene_id
   gene = PrimerSeqCls(primer_info.c2t_aligns[0].transcript)
   # gene.left
   gene.left = min(primer_info.c2t_aligns[0].transcript_end,
     primer_info.c2t_aligns[1].transcript_start)
   # gene.right
   gene.right = max(primer_info.c2t_aligns[0].transcript_end,
     primer_info.c2t_aligns[1].transcript_start)
   # dup_length
   dup_length = (gene.right - gene.left) + 1
   # construct inferred chimera sequence
   ctg_overlap = primer_info.C2TAlignOverlap()
   extra = ""
   if (0 > ctg_overlap): #{
     extra = self.contigs[primer_info.ctg_id][contig_left+1:contig_right]
   #} end if
   if ("-" == primer_info.c2t_aligns[0].strand): #{
     extra = ReverseComplement(extra)
   #} end if
   # chimera_id
   chimera = PrimerSeqCls("G%i_%s_%s" % (primer_info.group_id,
     primer_info.ctg_id, gene.id))
   chimera.seq = (self.transcripts[gene.id][:gene.right] + extra +
     self.transcripts[gene.id][gene.left-1:])
   if (chimera.id in self.transcripts and
       self.transcripts[chimera.id] != chimera.seq): #{
     raise PrimerDesignerError("conflicting chimeric PTD sequences: %s" %
       chimera.id)
   #} end if
   self.transcripts[chimera.id] = chimera.seq
   # chimera_left
   chimera.left = gene.left
   # chimera_right
   chimera.right = gene.right + len(extra) + dup_length
   DebugMsg(self, "Overlap: %i, Extra: \"%s\", WT_Length: %i, Dup_Length: "
     "%i, CT_Length: %i, Calc_Length: %i" % (ctg_overlap, extra,
     len(self.transcripts[gene.id]), dup_length, len(chimera.seq),
     len(self.transcripts[gene.id])+len(extra)+dup_length))
   return map(str, [event_id, chimera.id, chimera.left, chimera.right,
     gene.id, gene.left, gene.right])
Esempio n. 2
0
 def GetFusionPrimerInfo(self, primer_info): #{
   # [event_id, contig_id, contig_left, contig_right, geneA_id,
   #  geneA_left, geneA_right, geneB_id, geneB_left, geneB_right]
   # event_id
   event_id = primer_info.EventID()
   # contig_id
   contig = PrimerSeqCls(primer_info.ctg_id)
   # contig_left
   contig.left = min(primer_info.c2t_aligns[0].ctg_end,
     primer_info.c2t_aligns[1].ctg_start)
   # contig_right
   contig.right = max(primer_info.c2t_aligns[0].ctg_end,
     primer_info.c2t_aligns[1].ctg_start)
   # contig_overlap
   ctg_overlap = primer_info.C2TAlignOverlap()
   # ensure that geneA is the 5' gene, if possible
   if ("+" == primer_info.c2t_aligns[0].strand): #{
     alignA = primer_info.c2t_aligns[0]
     alignB = primer_info.c2t_aligns[1]
     geneA = self.GeneInfoFromAlign0(alignA, ctg_overlap)
     geneB = self.GeneInfoFromAlign1(alignB, ctg_overlap)
   else:
     alignA = primer_info.c2t_aligns[1]
     alignB = primer_info.c2t_aligns[0]
     geneA = self.GeneInfoFromAlign1(alignA, ctg_overlap)
     geneB = self.GeneInfoFromAlign0(alignB, ctg_overlap)
   #} end if
   if (MIN_CTG_LEN > len(self.contigs[contig.id])): #{
     DebugMsg(self, "Contig %s too short (%i): attempting to construct "
       "synthetic chimera" % (contig.id, len(self.contigs[contig.id])))
     chimera = self.ConstructFusionSeq(primer_info, contig, ctg_overlap)
     if (chimera.id in self.transcripts and
         self.transcripts[chimera.id] != chimera.seq): #{
       raise PrimerDesignerError("conflicting chimeric fusion sequences: %s" %
         chimera.id)
     #} end if
     if (len(chimera.seq) > len(self.contigs[contig.id])): #{
       DebugMsg(self, "Using synthetic chimera sequence")
       self.transcripts[chimera.id] = chimera.seq
       contig = chimera
     #} end if
   #} end if
   return map(str, [event_id, contig.id, contig.left, contig.right,
     geneA.id, geneA.left, geneA.right, geneB.id, geneB.left, geneB.right])
Esempio n. 3
0
 def ConstructFusionSeq(self, primer_info, contig, ctg_overlap): #{
   # chimera_id
   chimera = PrimerSeqCls("G%i_%s_%s/%s" % (primer_info.group_id,
     primer_info.ctg_id, primer_info.c2t_aligns[0].transcript,
     primer_info.c2t_aligns[1].transcript))
   left_seq  = self.RefSeqFromAlign0(primer_info.c2t_aligns[0])
   right_seq = self.RefSeqFromAlign1(primer_info.c2t_aligns[1])
   extra = ""
   if (0 > ctg_overlap): #{
     extra = self.contigs[primer_info.ctg_id][contig.left:contig.right-1]
   else:
     right_seq = right_seq[ctg_overlap:]
   #} end if
   chimera.seq = (left_seq + extra + right_seq)
   # chimera_left
   chimera.left = min(len(left_seq), len(left_seq)+ctg_overlap)
   # chimera_right
   chimera.right = max(len(left_seq), len(left_seq)+ctg_overlap)
   DebugMsg(self, "Left: %s\nExtra: %s\nRight: %s\nOverlap: %i, "
     "WT_Length0: %i, WT_Length1: %i, Extra_len: %i, CT_Length: %i, "
     "Calc_Length: %i" % (left_seq, extra, right_seq, ctg_overlap,
     len(left_seq), len(right_seq)+max(0,ctg_overlap), len(extra),
     len(chimera.seq), len(left_seq)+len(right_seq)+len(extra)))
   return chimera
Esempio n. 4
0
 def GetITDPrimerInfo(self, primer_info): #{
   # [event_id, chimera.id, chimera.left, chimera.right, gene.id,
   #  gene.left, gene.right]
   DebugMsg(self, "Getting Primer Info for %s %s" %
     (primer_info.EventID(), primer_info.ctg_id))
   # event_id
   event_id = primer_info.EventID()
   # contig_id
   contig = PrimerSeqCls(primer_info.ctg_id)
   # contig_left
   contig.left = min(primer_info.c2g_aligns[1][0],
     primer_info.c2g_aligns[2][0])
   # contig_right
   contig.right = max(primer_info.c2g_aligns[1][1],
     primer_info.c2g_aligns[2][1])
   # alignment to transcript
   align = primer_info.c2t_aligns[0]
   # ensure that there are some blocks
   if (None == align.query_blocks): #{
     align.query_blocks =[[align.ctg_start,align.ctg_end]]
   #} end if
   if (None == align.target_blocks): #{
     align.target_blocks =[
       [align.transcript_start,align.transcript_end]]
   #} end if
   #align = self.AdjustITDAlign(primer_info)
   #DebugMsg(self, "Adjusted: %s" % align.ToString())
   #cdup_coords = primer_info.c2g_aligns[1]
   cdup_coords = self.GetContigDuplicationCoords(primer_info.c2g_aligns,
     align)
   DebugMsg(self, "CDup: %i-%i" % (cdup_coords[0], cdup_coords[1]))
   # gene_id
   gene = PrimerSeqCls(align.transcript)
   num_blocks = len(align.query_blocks)
   for index in range(num_blocks): #{
     DebugMsg(self, "Block: %i-%i::%i-%i" % (align.query_blocks[index][0],
       align.query_blocks[index][1], align.target_blocks[index][0],
       align.target_blocks[index][1]))
     if (align.query_blocks[index][0] <= cdup_coords[0] and
         cdup_coords[1] <= align.query_blocks[index][1]): #{
       DebugMsg(self, "Found block %i" % index)
       break
     #} end if
   #} end for
   if (num_blocks <= index): #{
     raise PrimerDesignerError("could not find block containing duplication!")
   #} end if
   start_offset = (cdup_coords[0] - align.query_blocks[index][0])
   if ("-" == align.strand): #{
     start_offset = -start_offset
   #} end if
   tdup_start = align.target_blocks[index][0] + start_offset
   #for index in range(index, num_blocks): #{
   #  if (align.query_blocks[index][1] > cdup_coords[1]): #{
   #    break
   #  #} end if
   #} end for
   end_offset = (cdup_coords[1] - align.query_blocks[index][0])
   if ("-" == align.strand): #{
     end_offset = -end_offset
   #} end if
   tdup_end = align.target_blocks[index][0] + end_offset
   DebugMsg(self, "In Block: %i-%i::%i-%i, offsets:%i,%i, tdup:%i-%i" % (
     align.query_blocks[index][0], align.query_blocks[index][1],
     align.target_blocks[index][0], align.target_blocks[index][1],
     start_offset, end_offset, tdup_start, tdup_end))
   # gene.left
   gene.left = min(tdup_start, tdup_end)
   # gene.right
   gene.right = max(tdup_start, tdup_end)
   cdup_seq = self.contigs[contig.id][cdup_coords[0]-1:cdup_coords[1]]
   tdup_seq = self.transcripts[gene.id][gene.left-1:gene.right]
   if ("-" == align.strand): #{
     tdup_seq = ReverseComplement(tdup_seq)
   #} end if
   DebugMsg(self, "Align: %s\nDup-coords: %s-%s;%s-%s\n  %s\n  %s" % (
     align.ToString(), cdup_coords[0], cdup_coords[1], tdup_start,
     tdup_end, cdup_seq, tdup_seq))
   if (cdup_seq.upper() != tdup_seq.upper()): #{
     diffs = 0
     for i in range(len(cdup_seq)): #{
       if (cdup_seq[i].upper() != tdup_seq[i].upper()): #{
         diffs += 1
       #} end if
     #} end for
     if (1 < diffs): #{
       raise PrimerDesignerError("Contig duplicated-sequence is not the "
         "same as transcript duplicated-sequence!")
     else:
       LogMsg(self, "single-base mismatch from ref in duplicated seq")
     #} end if
   #} end if
   return map(str, [event_id, contig.id, contig.left, contig.right,
     gene.id, gene.left, gene.right])