handle2=open(ECfilename ,'r') strand="+" for line in handle2: row=line.strip().split("\t") gene=row[0].split('.')[0] if gene==id: transcript_length=0 variant_exons[row[0]]={} variant=ISOFORM(id=row[0],chr=row[1],strand=row[2],chr_start=int(row[3]),chr_end=int(row[4]),cds_start=int(row[5]),cds_end=int(row[6]),exon=int(row[7])) variant.chr_span=int(row[4])-int(row[3]) strand=row[2] startlist=row[8].split(',') endlist=row[9].split(',') for i in range(len(endlist[:-1])): exon=EXON(variant=row[0],chr=row[1],strand=row[2]) if variant.strand=="+": exon.number=i+1 exon.start=int(startlist[i]) exon.end=int(endlist[i]) else: exon.number=len(endlist[:-1])-i exon.start=int(endlist[i]) exon.end=int(startlist[i]) exon.length=abs(exon.end-exon.start) transcript_length+=exon.length variant_exons[row[0]][exon.number]=exon variant.transcript_length=transcript_length
variantID = row[0] geneID = row[0].split(".")[0] if geneID in gene_variant: gene_variant[geneID].append(row[0]) if variantID in variant_dic: chr_start_list = row[8][:-1].split(",") chr_end_list = row[9][:-1].split(",") variant_dic[variantID].chr = row[1] variant_dic[variantID].strand = row[2] variant_dic[variantID].cdsStart = int(row[5]) variant_dic[variantID].cdsEnd = int(row[6]) variant_dic[variantID].exon = [] tx = 1 for i in range(len(chr_start_list)): exon = EXON(variant=row[0], chr=row[1], strand=row[2]) exon.number = i + 1 if int(chr_start_list[i] ) >= variant_dic[variantID].cdsStart and int( chr_end_list[i]) <= variant_dic[variantID].cdsEnd: exon.start = int(chr_start_list[i]) exon.end = int(chr_end_list[i]) exonlen = exon.end - exon.start exon.trans_start = tx exon.trans_end = tx + exonlen - 1 tx += exonlen elif int( chr_end_list[i] ) >= variant_dic[variantID].cdsStart and int( chr_start_list[i]) <= variant_dic[variantID].cdsStart: exon.start = variant_dic[variantID].cdsStart
geneID=row[0].split(".")[0] if geneID in gene_variant: gene_variant[geneID].append(row[0]) if variantID in variant_dic: chr_start_list=row[8][:-1].split(",") chr_end_list=row[9][:-1].split(",") variant_dic[variantID].chr=row[1] variant_dic[variantID].strand=row[2] variant_dic[variantID].cdsStart=int(row[5]) variant_dic[variantID].cdsEnd=int(row[6]) variant_dic[variantID].exon=[] tx=1 for i in range(len(chr_start_list)): exon=EXON(variant=row[0],chr=row[1],strand=row[2]) exon.number=i+1 if int(chr_start_list[i])>=variant_dic[variantID].cdsStart and int(chr_end_list[i])<=variant_dic[variantID].cdsEnd: exon.start=int(chr_start_list[i]) exon.end=int(chr_end_list[i]) exonlen=exon.end-exon.start exon.trans_start=tx exon.trans_end=tx+exonlen-1 tx+=exonlen elif int(chr_end_list[i])>=variant_dic[variantID].cdsStart and int(chr_start_list[i])<=variant_dic[variantID].cdsStart: exon.start=variant_dic[variantID].cdsStart exon.end=int(chr_end_list[i]) exonlen=exon.end-exon.start exon.trans_start=tx exon.trans_end=tx+exonlen-1 tx+=exonlen
variant_exons[row[0]] = {} variant = ISOFORM(id=row[0], chr=row[1], strand=row[2], chr_start=int(row[3]), chr_end=int(row[4]), cds_start=int(row[5]), cds_end=int(row[6]), exon=int(row[7])) variant.chr_span = int(row[4]) - int(row[3]) strand = row[2] startlist = row[8].split(',') endlist = row[9].split(',') for i in range(len(endlist[:-1])): exon = EXON(variant=row[0], chr=row[1], strand=row[2]) if variant.strand == "+": exon.number = i + 1 exon.start = int(startlist[i]) exon.end = int(endlist[i]) else: exon.number = len(endlist[:-1]) - i exon.start = int(endlist[i]) exon.end = int(startlist[i]) exon.length = abs(exon.end - exon.start) transcript_length += exon.length variant_exons[row[0]][exon.number] = exon variant.transcript_length = transcript_length