Ejemplo n.º 1
0
handle2=open(ECfilename ,'r')
strand="+"
for line in handle2:
    row=line.strip().split("\t")
    gene=row[0].split('.')[0]
    if gene==id:
        transcript_length=0
        variant_exons[row[0]]={}
        variant=ISOFORM(id=row[0],chr=row[1],strand=row[2],chr_start=int(row[3]),chr_end=int(row[4]),cds_start=int(row[5]),cds_end=int(row[6]),exon=int(row[7]))
        variant.chr_span=int(row[4])-int(row[3])
        strand=row[2]
        
        startlist=row[8].split(',')
        endlist=row[9].split(',')
        for i in range(len(endlist[:-1])):
            exon=EXON(variant=row[0],chr=row[1],strand=row[2])
            
            if variant.strand=="+":
                exon.number=i+1
                exon.start=int(startlist[i])
                exon.end=int(endlist[i])
            else:
                exon.number=len(endlist[:-1])-i
                exon.start=int(endlist[i])
                exon.end=int(startlist[i])
            
            exon.length=abs(exon.end-exon.start)
            transcript_length+=exon.length
            variant_exons[row[0]][exon.number]=exon
    
        variant.transcript_length=transcript_length
Ejemplo n.º 2
0
        variantID = row[0]
        geneID = row[0].split(".")[0]
        if geneID in gene_variant:
            gene_variant[geneID].append(row[0])
        if variantID in variant_dic:
            chr_start_list = row[8][:-1].split(",")
            chr_end_list = row[9][:-1].split(",")
            variant_dic[variantID].chr = row[1]
            variant_dic[variantID].strand = row[2]
            variant_dic[variantID].cdsStart = int(row[5])
            variant_dic[variantID].cdsEnd = int(row[6])

            variant_dic[variantID].exon = []
            tx = 1
            for i in range(len(chr_start_list)):
                exon = EXON(variant=row[0], chr=row[1], strand=row[2])
                exon.number = i + 1
                if int(chr_start_list[i]
                       ) >= variant_dic[variantID].cdsStart and int(
                           chr_end_list[i]) <= variant_dic[variantID].cdsEnd:
                    exon.start = int(chr_start_list[i])
                    exon.end = int(chr_end_list[i])
                    exonlen = exon.end - exon.start
                    exon.trans_start = tx
                    exon.trans_end = tx + exonlen - 1
                    tx += exonlen
                elif int(
                        chr_end_list[i]
                ) >= variant_dic[variantID].cdsStart and int(
                        chr_start_list[i]) <= variant_dic[variantID].cdsStart:
                    exon.start = variant_dic[variantID].cdsStart
Ejemplo n.º 3
0
 geneID=row[0].split(".")[0]
 if geneID in gene_variant:
     gene_variant[geneID].append(row[0])
 if variantID in variant_dic:
     chr_start_list=row[8][:-1].split(",")
     chr_end_list=row[9][:-1].split(",")
     variant_dic[variantID].chr=row[1]
     variant_dic[variantID].strand=row[2]
     variant_dic[variantID].cdsStart=int(row[5])
     variant_dic[variantID].cdsEnd=int(row[6])
     
     
     variant_dic[variantID].exon=[]
     tx=1
     for i in range(len(chr_start_list)):
         exon=EXON(variant=row[0],chr=row[1],strand=row[2])
         exon.number=i+1
         if int(chr_start_list[i])>=variant_dic[variantID].cdsStart and int(chr_end_list[i])<=variant_dic[variantID].cdsEnd:
             exon.start=int(chr_start_list[i])
             exon.end=int(chr_end_list[i])
             exonlen=exon.end-exon.start
             exon.trans_start=tx
             exon.trans_end=tx+exonlen-1
             tx+=exonlen
         elif int(chr_end_list[i])>=variant_dic[variantID].cdsStart and int(chr_start_list[i])<=variant_dic[variantID].cdsStart:
             exon.start=variant_dic[variantID].cdsStart
             exon.end=int(chr_end_list[i])
             exonlen=exon.end-exon.start
             exon.trans_start=tx
             exon.trans_end=tx+exonlen-1
             tx+=exonlen
Ejemplo n.º 4
0
        variant_exons[row[0]] = {}
        variant = ISOFORM(id=row[0],
                          chr=row[1],
                          strand=row[2],
                          chr_start=int(row[3]),
                          chr_end=int(row[4]),
                          cds_start=int(row[5]),
                          cds_end=int(row[6]),
                          exon=int(row[7]))
        variant.chr_span = int(row[4]) - int(row[3])
        strand = row[2]

        startlist = row[8].split(',')
        endlist = row[9].split(',')
        for i in range(len(endlist[:-1])):
            exon = EXON(variant=row[0], chr=row[1], strand=row[2])

            if variant.strand == "+":
                exon.number = i + 1
                exon.start = int(startlist[i])
                exon.end = int(endlist[i])
            else:
                exon.number = len(endlist[:-1]) - i
                exon.start = int(endlist[i])
                exon.end = int(startlist[i])

            exon.length = abs(exon.end - exon.start)
            transcript_length += exon.length
            variant_exons[row[0]][exon.number] = exon

        variant.transcript_length = transcript_length