def gene_patches(tx, tx_dict, ax, arrow=False): iso_list = [x for x in tx_dict if tx in x] if len(iso_list) == 0: return None for n, iso in enumerate(iso_list): start, end, strand, CDS_start, CDS_end, exons, chrom = SP.tx_info(iso, tx_dict) if arrow is False: tx_patch = patches.Rectangle((start,0.8-n*0.15),end-start,0.04,edgecolor='0.1',facecolor='0.1') ax.add_patch(tx_patch) else: if strand == '+': ax.arrow(start, 0.9, end-start-0.02*(end-start), 0, linewidth=2, head_width=0.1, head_length=0.02*(end-start), fc='k', ec='k') elif strand == '-': ax.arrow(end, 0.9, start-end-0.02*(start-end), 0, linewidth=2, head_width=0.1, head_length=0.02*(end-start), fc='k', ec='k') if exons is not None: exon_patches = [] for exon_start, exon_stop in exons: exon_patches.append(patches.Rectangle((exon_start, 0.775-n*0.15), exon_stop-exon_start, 0.10, edgecolor='0.1',facecolor='0.1')) for patch in exon_patches: ax.add_patch(patch) else: CDS_patch = patches.Rectangle((CDS_start, 0.75-n*0.15),CDS_end-CDS_start, 0.10, edgecolor='0.1', facecolor='0.1') ax.add_patch(CDS_patch) ax.get_yaxis().set_ticks([]) return strand
def gene_patches(tx, tx_dict, ax, arrow=False): iso_list = [x for x in tx_dict if tx in x] if len(iso_list) == 0: return None for n, iso in enumerate(iso_list): start, end, strand, CDS_start, CDS_end, exons, chrom = SP.tx_info( iso, tx_dict) if arrow is False: tx_patch = patches.Rectangle((start, 0.8 - n * 0.15), end - start, 0.04, edgecolor='0.1', facecolor='0.1') ax.add_patch(tx_patch) else: if strand == '+': ax.arrow(start, 0.9, end - start - 0.02 * (end - start), 0, linewidth=2, head_width=0.1, head_length=0.02 * (end - start), fc='k', ec='k') elif strand == '-': ax.arrow(end, 0.9, start - end - 0.02 * (start - end), 0, linewidth=2, head_width=0.1, head_length=0.02 * (end - start), fc='k', ec='k') if exons is not None: exon_patches = [] for exon_start, exon_stop in exons: exon_patches.append( patches.Rectangle((exon_start, 0.775 - n * 0.15), exon_stop - exon_start, 0.10, edgecolor='0.1', facecolor='0.1')) for patch in exon_patches: ax.add_patch(patch) else: CDS_patch = patches.Rectangle((CDS_start, 0.75 - n * 0.15), CDS_end - CDS_start, 0.10, edgecolor='0.1', facecolor='0.1') ax.add_patch(CDS_patch) ax.get_yaxis().set_ticks([]) return strand
def count_reads_in_transcript(bam_files, df, gff3, organism=None): tx_dict = SP.build_transcript_dict(gff3, organism=organism) bams = {} for bam_file in bam_files: bams[bam_file] = pysam.Samfile(bam_file) all_reads = {} for bam, reader in bams.iteritems(): all_reads[bam] = pd.DataFrame(index=df.index, columns=['total','intron']) for tx in set(df['transcript']): tx_df = df[df['transcript'] == tx] if organism == 'pombe': tx = tx+'.1' else: tx = tx+'T0' start, end, strand, CDS_start, CDS_end, exons, chrom = SP.tx_info(tx, tx_dict) if organism == 'pombe': lat_rom = {'chr1':'I','chr2':'II','chr3':'III'} chrom = lat_rom[chrom] tx_iter = reader.fetch(chrom, start, end) intron_ranges = {} for ix, r in tx_df.iterrows(): if strand == '+': intron_start = int(r['position']) intron_end = int(r['position']+r['intron size'])+1 elif strand == '-': intron_start = int(r['position']-r['intron size']) intron_end = int(r['position'])+1 intron_ranges[ix] = [range(intron_start,intron_end),0] reads = 0 for read in tx_iter: if read.is_reverse and strand == '+': reads += 1 for ix in intron_ranges: if read.reference_end in intron_ranges[ix][0]: intron_ranges[ix][1] += 1 elif not read.is_reverse and strand == '-': reads += 1 for ix in intron_ranges: if read.reference_start in intron_ranges[ix][0]: intron_ranges[ix][1] += 1 for ix in intron_ranges: try: all_reads[bam].loc[ix,'total'] = reads/float(end-start)*1000 all_reads[bam].loc[ix,'intron'] = ((intron_ranges[ix][1]/float(tx_df.loc[ix,'intron size'])) / (reads/float(end-start))) except ZeroDivisionError: all_reads[bam].loc[ix,'total'] = np.NaN all_reads[bam].loc[ix,'intron'] = np.NaN print ix return all_reads
def count_reads_in_transcript(bam_files, df, gff3, organism=None): tx_dict = SP.build_transcript_dict(gff3, organism=organism) bams = {} for bam_file in bam_files: bams[bam_file] = pysam.Samfile(bam_file) all_reads = {} for bam, reader in bams.iteritems(): all_reads[bam] = pd.DataFrame(index=df.index, columns=['total', 'intron']) for tx in set(df['transcript']): tx_df = df[df['transcript'] == tx] if organism == 'pombe': tx = tx + '.1' else: tx = tx + 'T0' start, end, strand, CDS_start, CDS_end, exons, chrom = SP.tx_info( tx, tx_dict) if organism == 'pombe': lat_rom = {'chr1': 'I', 'chr2': 'II', 'chr3': 'III'} chrom = lat_rom[chrom] tx_iter = reader.fetch(chrom, start, end) intron_ranges = {} for ix, r in tx_df.iterrows(): if strand == '+': intron_start = int(r['position']) intron_end = int(r['position'] + r['intron size']) + 1 elif strand == '-': intron_start = int(r['position'] - r['intron size']) intron_end = int(r['position']) + 1 intron_ranges[ix] = [range(intron_start, intron_end), 0] reads = 0 for read in tx_iter: if read.is_reverse and strand == '+': reads += 1 for ix in intron_ranges: if read.reference_end in intron_ranges[ix][0]: intron_ranges[ix][1] += 1 elif not read.is_reverse and strand == '-': reads += 1 for ix in intron_ranges: if read.reference_start in intron_ranges[ix][0]: intron_ranges[ix][1] += 1 for ix in intron_ranges: try: all_reads[bam].loc[ix, 'total'] = reads / float(end - start) * 1000 all_reads[bam].loc[ix, 'intron'] = ( (intron_ranges[ix][1] / float(tx_df.loc[ix, 'intron size'])) / (reads / float(end - start))) except ZeroDivisionError: all_reads[bam].loc[ix, 'total'] = np.NaN all_reads[bam].loc[ix, 'intron'] = np.NaN print ix return all_reads