Esempio n. 1
0
                        lin += '\t'+"ID="+mRNA_ID+';Parent='+chr_name[token[0]]+str(count).zfill(7)+";Name="+mRNA_ID+';Type='+gene_type+';'
                        print lin
                    
                    else:
                        ### print rest
                        match = re.search(r'ID=.+',line)
                        match = match.group().split(';')[0].replace('ID=','')       
                        variant_no = match.split('.')[-1].replace('exon','')                           ### Get the mRNA variant number
                        lin = '\t'.join(line.split('\t')[0:len(line.split('\t'))-1]) ### all information except ID
                        lin += '\t'+"ID="+mRNA_ID+'.'+token[2]+'.'+variant_no+';Parent='+mRNA_ID+';Type='+gene_type+';'
                        print lin
                    

                            
        

if __name__ == "__main__":
    
    gff3,Ns = options(sys.argv[1:])
    
    ### hash the N counts from the file
    hash = A_hash_file.hash_file(Ns)
    
    ### run it by chromosome
    size = E_get_chr_size_gff3.get_size(gff3)
    for chr in sorted(size):
        ### modify gene names
        modifyGeneNames(gff3,hash,chr)
    
    ### close the logfile
    o.close()
Esempio n. 2
0
                        print lin

                    else:
                        ### print rest
                        match = re.search(r'ID=.+', line)
                        match = match.group().split(';')[0].replace('ID=', '')
                        variant_no = match.split('.')[-1].replace(
                            'exon', '')  ### Get the mRNA variant number
                        lin = '\t'.join(
                            line.split('\t')[0:len(line.split('\t')) -
                                             1])  ### all information except ID
                        lin += '\t' + "ID=" + mRNA_ID + '.' + token[
                            2] + '.' + variant_no + ';Parent=' + mRNA_ID + ';'
                        print lin


if __name__ == "__main__":

    gff3, N_counts, specie = options(sys.argv[1:])

    ### hash the N counts from the file
    hash = hash_Ns(N_counts)

    ### run it by chromosome
    size = E_get_chr_size_gff3.get_size(gff3)
    for chr in sorted(size):
        ### modify gene names
        modifyGeneNames(gff3, chr, hash, specie)

    ### close the logfile
    o.close()
Esempio n. 3
0
                            if i in hash_call:
                                cds_call_len += 1
                        print id + '\t' + str(exon_len) + '\t' + str(
                            exon_call_len) + '\t' + str(cds_len) + '\t' + str(
                                cds_call_len)
                    first_transcript = False
                    hash_exon = {}
                    hash_cds = {}
                    id = str(obj)
                elif obj.types() == "exon":
                    for i in range(int(obj.starts()), int(obj.ends())):
                        hash_exon[i] = ''
                elif obj.types() == "CDS":
                    for i in range(int(obj.starts()), int(obj.ends())):
                        hash_cds[i] = ''
    print id + '\t' + str(exon_len) + '\t' + str(exon_call_len) + '\t' + str(
        cds_len) + '\t' + str(cds_call_len)


if __name__ == "__main__":

    options(sys.argv[1:])

    size = E_get_chr_size_gff3.get_size(ifile)

    for chrom in size:
        hash_call = parseCall(chrom)
        get_exon_fraction(chrom, hash_call)

    ### close the logfile
    o.close()
Esempio n. 4
0
                            if i in hash_call:
                                cds_call_len += 1
                        print id + "\t" + str(exon_len) + "\t" + str(exon_call_len) + "\t" + str(cds_len) + "\t" + str(
                            cds_call_len
                        )
                    first_transcript = False
                    hash_exon = {}
                    hash_cds = {}
                    id = str(obj)
                elif obj.types() == "exon":
                    for i in range(int(obj.starts()), int(obj.ends())):
                        hash_exon[i] = ""
                elif obj.types() == "CDS":
                    for i in range(int(obj.starts()), int(obj.ends())):
                        hash_cds[i] = ""
    print id + "\t" + str(exon_len) + "\t" + str(exon_call_len) + "\t" + str(cds_len) + "\t" + str(cds_call_len)


if __name__ == "__main__":

    options(sys.argv[1:])

    size = E_get_chr_size_gff3.get_size(ifile)

    for chrom in size:
        hash_call = parseCall(chrom)
        get_exon_fraction(chrom, hash_call)

    ### close the logfile
    o.close()