Beispiel #1
0
def write_gene_to_gtf(ofp, gene):
    lines = []
    for index, transcript in enumerate(gene.transcripts):
        meta_data = {}

        if config.FIX_CHRM_NAMES_FOR_UCSC:
            transcript.chrm = fix_chrm_name_for_ucsc(transcript.chrm)
        assert transcript.gene_id != None
        lines.append(
            transcript.build_gtf_lines(meta_data, source="grit") + "\n")

    ofp.write("".join(lines))

    return
Beispiel #2
0
def write_gene_to_gtf( ofp, gene ):
    lines = []
    for index, transcript in enumerate(gene.transcripts):
        meta_data = {}
        
        if config.FIX_CHRM_NAMES_FOR_UCSC:
            transcript.chrm = fix_chrm_name_for_ucsc(transcript.chrm)
        assert transcript.gene_id != None
        lines.append( transcript.build_gtf_lines(
                meta_data, source="grit") + "\n" )
    
    ofp.write( "".join(lines) )
    
    return
Beispiel #3
0
 def writeGff( self, ofp ):
     """
         chr7    127471196  127472363  Pos1  0  +  127471196  127472363  255,0,0
     """
     if self.strand == '-':
         writetable_bins = self.reverse_strand( contig_len )
     else:
         writetable_bins = self
     
     for bin in writetable_bins:
         if filter != None and bin.type != filter:
             continue
         chrm = elements.chrm
         if config.FIX_CHRM_NAMES_FOR_UCSC:
             chrm = fix_chrm_name_for_ucsc(self.chrm)
         # add 1 because gffs are 1-based
         region = GenomicInterval(chrm, self.strand, 
                                  bin.start+1, bin.stop+1)
         grp_id = "%s_%s_%i_%i" % region
         ofp.write( create_gff_line(region, grp_id) + "\n" )
     
     return
Beispiel #4
0
def write_gene_to_tracking_file(ofp, gene):
    lines = []
    contig_name = gene.chrm
    if config.FIX_CHRM_NAMES_FOR_UCSC:
        contig_name = fix_chrm_name_for_ucsc(contig_name)

    for t in gene.transcripts:
        if t.gene_name != None:
            gene_short_name = t.gene_name
        elif t.ref_gene != None:
            gene_short_name = t.ref_gene
        else:
            gene_short_name = '-'

        line = [
            # tracking ID
            (t.id).ljust(20),
            # class code
            ('-' if t.ref_match_class_code == None else t.ref_match_class_code
             ).ljust(10),
            # nearest ref id
            ('-' if t.ref_trans == None else t.ref_trans).ljust(20),
            # gene unique id
            (t.gene_id).ljust(20),
            # gene short name
            ('-' if t.gene_name == None else t.gene_name).ljust(20),
            # TSS ID
            ('-').ljust(10),
            ("%s:%s:%i-%i" %
             (contig_name, t.strand, t.start, t.stop)).ljust(30),
            # transcript length
            str(t.calc_length())
        ]

        lines.append("\t".join(line))

    ofp.write("\n".join(lines) + "\n")
    return
Beispiel #5
0
def write_gene_to_tracking_file( ofp, gene):
    lines = []
    contig_name = gene.chrm
    if config.FIX_CHRM_NAMES_FOR_UCSC:
        contig_name = fix_chrm_name_for_ucsc(contig_name)
    
    for t in gene.transcripts:
        if t.gene_name != None:
            gene_short_name = t.gene_name
        elif t.ref_gene != None:
            gene_short_name = t.ref_gene
        else:
            gene_short_name = '-'
        
        line = [
            # tracking ID
            (t.id).ljust(20), 
            # class code
            ('-' if t.ref_match_class_code == None 
             else t.ref_match_class_code).ljust(10), 
            # nearest ref id
            ('-' if t.ref_trans == None else t.ref_trans).ljust(20),
            # gene unique id
            (t.gene_id).ljust(20), 
            # gene short name
            ('-' if t.gene_name == None else t.gene_name).ljust(20),
            # TSS ID
            ('-').ljust(10), 
            ("%s:%s:%i-%i"%(contig_name, t.strand, t.start, t.stop)).ljust(30),
             # transcript length
            str(t.calc_length()) ]
            
        lines.append("\t".join(line))
    
    ofp.write( "\n".join(lines) + "\n" )
    return
Beispiel #6
0
    def write_elements_bed( self, ofp ):
        feature_mapping = { 
            'GENE': 'gene',
            'CAGE_PEAK': 'promoter',
            'SE_GENE': 'single_exon_gene',
            'TSS_EXON': 'tss_exon',
            'EXON': 'internal_exon',
            'TES_EXON': 'tes_exon',
            'INTRON': 'intron',
            'POLYA': 'polya',
            'INTERGENIC_SPACE': 'intergenic',
            'RETAINED_INTRON': 'retained_intron',
            'UNKNOWN': 'UNKNOWN'
        }

        color_mapping = { 
            'GENE': '200,200,200',
            'CAGE_PEAK': '153,255,000',
            'SE_GENE': '000,000,200',
            'TSS_EXON': '140,195,59',
            'EXON': '000,000,000',
            'TES_EXON': '255,51,255',
            'INTRON': '100,100,100',
            'POLYA': '255,0,0',
            'INTERGENIC_SPACE': '254,254,34',
            'RETAINED_INTRON': '255,255,153',
            'UNKNOWN': '0,0,0'
        }

        chrm = self.chrm
        if config.FIX_CHRM_NAMES_FOR_UCSC:
            chrm = fix_chrm_name_for_ucsc(chrm)

        # write the gene line
        bed_line = create_bed_line( chrm, self.strand, 
                                    self.start, self.stop+1, 
                                    feature_mapping['GENE'],
                                    score=1000,
                                    color=color_mapping['GENE'],
                                    use_thick_lines=True,
                                    blocks=[(x.start, x.stop) for x in self.regions])
        ofp.write( bed_line + "\n"  )
        try: max_min_fpkm = max(1e-1, max(bin.fpkm for bin in self.elements)) \
           if len(self.elements) > 0 else 0
        except:  max_min_fpkm = 1000
        
        for element in self.elements:
            region = ( chrm, self.strand, element.start, element.stop)

            blocks = []
            use_thick_lines=(element.type != 'INTRON')
            element_type = element.type
            if element_type == None: 
                element_type = 'UNKNOWN'
                continue
            
            try: fpkm = element.fpkm_ub
            except: fpkm = element.fpkm
            score = min(1000, int(1000*fpkm/max_min_fpkm))
            score = 1000
            grp_id = element_type + "_%s_%s_%i_%i" % region

            # also, add 1 to stop because beds are open-closed ( which means no net 
            # change for the stop coordinate )
            bed_line = create_bed_line( chrm, self.strand, 
                                        element.start, element.stop+1, 
                                        feature_mapping[element_type],
                                        score=score,
                                        color=color_mapping[element_type],
                                        use_thick_lines=use_thick_lines,
                                        blocks=blocks)
            ofp.write( bed_line + "\n"  )

        return