Ejemplo n.º 1
0
def gene_patches(tx, tx_dict, ax, arrow=False):
    iso_list = [x for x in tx_dict if tx in x]
    if len(iso_list) == 0:
        return None
    
    for n, iso in enumerate(iso_list):
        start, end, strand, CDS_start, CDS_end, exons, chrom = SP.tx_info(iso, tx_dict)
        if arrow is False:
            tx_patch = patches.Rectangle((start,0.8-n*0.15),end-start,0.04,edgecolor='0.1',facecolor='0.1')
            ax.add_patch(tx_patch)
        else:
            if strand == '+':
                ax.arrow(start, 0.9, end-start-0.02*(end-start), 0, linewidth=2, head_width=0.1, 
                         head_length=0.02*(end-start), fc='k', ec='k')
            elif strand == '-':
                ax.arrow(end, 0.9, start-end-0.02*(start-end), 0, linewidth=2, head_width=0.1, 
                         head_length=0.02*(end-start), fc='k', ec='k')

        if exons is not None:
            exon_patches = []
            for exon_start, exon_stop in exons:
                exon_patches.append(patches.Rectangle((exon_start, 0.775-n*0.15), exon_stop-exon_start, 0.10,
                                                      edgecolor='0.1',facecolor='0.1'))
            for patch in exon_patches:
                ax.add_patch(patch)
        else:
            CDS_patch = patches.Rectangle((CDS_start, 0.75-n*0.15),CDS_end-CDS_start, 0.10, edgecolor='0.1', facecolor='0.1')
            ax.add_patch(CDS_patch)
        ax.get_yaxis().set_ticks([])
    return strand  
Ejemplo n.º 2
0
def gene_patches(tx, tx_dict, ax, arrow=False):
    iso_list = [x for x in tx_dict if tx in x]
    if len(iso_list) == 0:
        return None

    for n, iso in enumerate(iso_list):
        start, end, strand, CDS_start, CDS_end, exons, chrom = SP.tx_info(
            iso, tx_dict)
        if arrow is False:
            tx_patch = patches.Rectangle((start, 0.8 - n * 0.15),
                                         end - start,
                                         0.04,
                                         edgecolor='0.1',
                                         facecolor='0.1')
            ax.add_patch(tx_patch)
        else:
            if strand == '+':
                ax.arrow(start,
                         0.9,
                         end - start - 0.02 * (end - start),
                         0,
                         linewidth=2,
                         head_width=0.1,
                         head_length=0.02 * (end - start),
                         fc='k',
                         ec='k')
            elif strand == '-':
                ax.arrow(end,
                         0.9,
                         start - end - 0.02 * (start - end),
                         0,
                         linewidth=2,
                         head_width=0.1,
                         head_length=0.02 * (end - start),
                         fc='k',
                         ec='k')

        if exons is not None:
            exon_patches = []
            for exon_start, exon_stop in exons:
                exon_patches.append(
                    patches.Rectangle((exon_start, 0.775 - n * 0.15),
                                      exon_stop - exon_start,
                                      0.10,
                                      edgecolor='0.1',
                                      facecolor='0.1'))
            for patch in exon_patches:
                ax.add_patch(patch)
        else:
            CDS_patch = patches.Rectangle((CDS_start, 0.75 - n * 0.15),
                                          CDS_end - CDS_start,
                                          0.10,
                                          edgecolor='0.1',
                                          facecolor='0.1')
            ax.add_patch(CDS_patch)
        ax.get_yaxis().set_ticks([])
    return strand
Ejemplo n.º 3
0
def count_reads_in_transcript(bam_files, df, gff3, organism=None):
    tx_dict = SP.build_transcript_dict(gff3, organism=organism)
    
    bams = {}
    for bam_file in bam_files:
        bams[bam_file] = pysam.Samfile(bam_file)
    
    all_reads = {}

    for bam, reader in bams.iteritems():
        all_reads[bam] = pd.DataFrame(index=df.index, columns=['total','intron'])
        
        for tx in set(df['transcript']):
            tx_df = df[df['transcript'] == tx]
            if organism == 'pombe':
                tx = tx+'.1'
            else:
                tx = tx+'T0'
                
            start, end, strand, CDS_start, CDS_end, exons, chrom = SP.tx_info(tx, tx_dict)
            if organism == 'pombe':
                lat_rom = {'chr1':'I','chr2':'II','chr3':'III'}
                chrom = lat_rom[chrom]
            
            tx_iter = reader.fetch(chrom,  start,  end)
            
            intron_ranges = {}
            for ix, r in tx_df.iterrows():
                if strand == '+':
                    intron_start = int(r['position'])
                    intron_end = int(r['position']+r['intron size'])+1
                elif strand == '-':
                    intron_start = int(r['position']-r['intron size'])
                    intron_end = int(r['position'])+1
                intron_ranges[ix] = [range(intron_start,intron_end),0]

            reads = 0
            for read in tx_iter:
                if read.is_reverse and strand == '+':
                    reads += 1
                    
                    for ix in intron_ranges:
                        if read.reference_end in intron_ranges[ix][0]:
                            intron_ranges[ix][1] += 1
                    
                elif not read.is_reverse and strand == '-':
                    reads += 1
                    for ix in intron_ranges:
                        if read.reference_start in intron_ranges[ix][0]:
                            intron_ranges[ix][1] += 1
                            
            for ix in intron_ranges:
                try:
                    all_reads[bam].loc[ix,'total'] = reads/float(end-start)*1000
                    all_reads[bam].loc[ix,'intron'] = ((intron_ranges[ix][1]/float(tx_df.loc[ix,'intron size'])) /
                                                   (reads/float(end-start)))
                except ZeroDivisionError:
                    all_reads[bam].loc[ix,'total'] = np.NaN
                    all_reads[bam].loc[ix,'intron'] = np.NaN
                    print ix
                    
    return all_reads 
Ejemplo n.º 4
0
def count_reads_in_transcript(bam_files, df, gff3, organism=None):
    tx_dict = SP.build_transcript_dict(gff3, organism=organism)

    bams = {}
    for bam_file in bam_files:
        bams[bam_file] = pysam.Samfile(bam_file)

    all_reads = {}

    for bam, reader in bams.iteritems():
        all_reads[bam] = pd.DataFrame(index=df.index,
                                      columns=['total', 'intron'])

        for tx in set(df['transcript']):
            tx_df = df[df['transcript'] == tx]
            if organism == 'pombe':
                tx = tx + '.1'
            else:
                tx = tx + 'T0'

            start, end, strand, CDS_start, CDS_end, exons, chrom = SP.tx_info(
                tx, tx_dict)
            if organism == 'pombe':
                lat_rom = {'chr1': 'I', 'chr2': 'II', 'chr3': 'III'}
                chrom = lat_rom[chrom]

            tx_iter = reader.fetch(chrom, start, end)

            intron_ranges = {}
            for ix, r in tx_df.iterrows():
                if strand == '+':
                    intron_start = int(r['position'])
                    intron_end = int(r['position'] + r['intron size']) + 1
                elif strand == '-':
                    intron_start = int(r['position'] - r['intron size'])
                    intron_end = int(r['position']) + 1
                intron_ranges[ix] = [range(intron_start, intron_end), 0]

            reads = 0
            for read in tx_iter:
                if read.is_reverse and strand == '+':
                    reads += 1

                    for ix in intron_ranges:
                        if read.reference_end in intron_ranges[ix][0]:
                            intron_ranges[ix][1] += 1

                elif not read.is_reverse and strand == '-':
                    reads += 1
                    for ix in intron_ranges:
                        if read.reference_start in intron_ranges[ix][0]:
                            intron_ranges[ix][1] += 1

            for ix in intron_ranges:
                try:
                    all_reads[bam].loc[ix,
                                       'total'] = reads / float(end -
                                                                start) * 1000
                    all_reads[bam].loc[ix, 'intron'] = (
                        (intron_ranges[ix][1] /
                         float(tx_df.loc[ix, 'intron size'])) /
                        (reads / float(end - start)))
                except ZeroDivisionError:
                    all_reads[bam].loc[ix, 'total'] = np.NaN
                    all_reads[bam].loc[ix, 'intron'] = np.NaN
                    print ix

    return all_reads