lin += '\t'+"ID="+mRNA_ID+';Parent='+chr_name[token[0]]+str(count).zfill(7)+";Name="+mRNA_ID+';Type='+gene_type+';' print lin else: ### print rest match = re.search(r'ID=.+',line) match = match.group().split(';')[0].replace('ID=','') variant_no = match.split('.')[-1].replace('exon','') ### Get the mRNA variant number lin = '\t'.join(line.split('\t')[0:len(line.split('\t'))-1]) ### all information except ID lin += '\t'+"ID="+mRNA_ID+'.'+token[2]+'.'+variant_no+';Parent='+mRNA_ID+';Type='+gene_type+';' print lin if __name__ == "__main__": gff3,Ns = options(sys.argv[1:]) ### hash the N counts from the file hash = A_hash_file.hash_file(Ns) ### run it by chromosome size = E_get_chr_size_gff3.get_size(gff3) for chr in sorted(size): ### modify gene names modifyGeneNames(gff3,hash,chr) ### close the logfile o.close()
print lin else: ### print rest match = re.search(r'ID=.+', line) match = match.group().split(';')[0].replace('ID=', '') variant_no = match.split('.')[-1].replace( 'exon', '') ### Get the mRNA variant number lin = '\t'.join( line.split('\t')[0:len(line.split('\t')) - 1]) ### all information except ID lin += '\t' + "ID=" + mRNA_ID + '.' + token[ 2] + '.' + variant_no + ';Parent=' + mRNA_ID + ';' print lin if __name__ == "__main__": gff3, N_counts, specie = options(sys.argv[1:]) ### hash the N counts from the file hash = hash_Ns(N_counts) ### run it by chromosome size = E_get_chr_size_gff3.get_size(gff3) for chr in sorted(size): ### modify gene names modifyGeneNames(gff3, chr, hash, specie) ### close the logfile o.close()
if i in hash_call: cds_call_len += 1 print id + '\t' + str(exon_len) + '\t' + str( exon_call_len) + '\t' + str(cds_len) + '\t' + str( cds_call_len) first_transcript = False hash_exon = {} hash_cds = {} id = str(obj) elif obj.types() == "exon": for i in range(int(obj.starts()), int(obj.ends())): hash_exon[i] = '' elif obj.types() == "CDS": for i in range(int(obj.starts()), int(obj.ends())): hash_cds[i] = '' print id + '\t' + str(exon_len) + '\t' + str(exon_call_len) + '\t' + str( cds_len) + '\t' + str(cds_call_len) if __name__ == "__main__": options(sys.argv[1:]) size = E_get_chr_size_gff3.get_size(ifile) for chrom in size: hash_call = parseCall(chrom) get_exon_fraction(chrom, hash_call) ### close the logfile o.close()
if i in hash_call: cds_call_len += 1 print id + "\t" + str(exon_len) + "\t" + str(exon_call_len) + "\t" + str(cds_len) + "\t" + str( cds_call_len ) first_transcript = False hash_exon = {} hash_cds = {} id = str(obj) elif obj.types() == "exon": for i in range(int(obj.starts()), int(obj.ends())): hash_exon[i] = "" elif obj.types() == "CDS": for i in range(int(obj.starts()), int(obj.ends())): hash_cds[i] = "" print id + "\t" + str(exon_len) + "\t" + str(exon_call_len) + "\t" + str(cds_len) + "\t" + str(cds_call_len) if __name__ == "__main__": options(sys.argv[1:]) size = E_get_chr_size_gff3.get_size(ifile) for chrom in size: hash_call = parseCall(chrom) get_exon_fraction(chrom, hash_call) ### close the logfile o.close()