def plot(self, plot_num, data, xlabel, ylabel, txt_position=None, center_xticks=False): """Create histogram for statistic.""" self.axis = self.fig.add_subplot(self.rows, self.cols, plot_num) align = 'mid' if center_xticks: # not intuative, but setting the alignment to left # puts labels in the middle of each bar align = 'left' weights = np_ones_like(data)/float(len(data)) num_bins = min(20, len(set(data))-1) counts, bins, patches = self.axis.hist(data, bins=num_bins, rwidth=0.9, weights=weights, color='#fdae6b', align=align) self.axis.set_xlabel(xlabel) self.axis.set_ylabel(ylabel) self.axis.xaxis.set_major_locator(MaxNLocator(integer=True)) self.axis.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.1%}'.format(y))) # report summary statistics stat_txt = f'median = {np_median(data):.1f}\n' stat_txt += f'mean = {np_mean(data):.1f}\n' stat_txt += f'std = {np_std(data):.1f}' if txt_position == 'left': self.axis.text(0.05, 0.95, stat_txt, transform=self.axis.transAxes, fontsize=self.options.tick_font_size, verticalalignment='top') elif txt_position == 'right': self.axis.text(0.95, 0.95, stat_txt, transform=self.axis.transAxes, fontsize=self.options.tick_font_size, verticalalignment='top', horizontalalignment='right') self.prettify(self.axis) for loc, spine in self.axis.spines.items(): if loc in ['right', 'top']: spine.set_color('none') self.fig.tight_layout(pad=0.1, w_pad=1.0, h_pad=1.0) self.draw()
def _distribution_plot(self, rel_dists, taxa_for_dist_inference, distribution_table, plot_file): """Create plot showing the distribution of taxa at each taxonomic rank. Parameters ---------- rel_dists: d[rank_index][taxon] -> relative divergence Relative divergence of taxa at each rank. taxa_for_dist_inference : iterable Taxa to considered when inferring distributions. distribution_table : str Desired name of output table with distribution information. plot_file : str Desired name of output plot. """ self.fig.clear() self.fig.set_size_inches(12, 6) ax = self.fig.add_subplot(111) # create normal distributions for i, rank in enumerate(sorted(rel_dists.keys())): v = [dist for taxa, dist in rel_dists[rank].items() if taxa in taxa_for_dist_inference] if len(v) < 2: continue u = np_mean(v) rv = norm(loc=u, scale=np_std(v)) x = np_linspace(rv.ppf(0.001), rv.ppf(0.999), 1000) nd = rv.pdf(x) # ax.plot(x, 0.75 * (nd / max(nd)) + i, 'b-', alpha=0.6, zorder=2) # ax.plot((u, u), (i, i + 0.5), 'b-', zorder=2) # create percentile and classifciation boundary lines percentiles = {} for i, rank in enumerate(sorted(rel_dists.keys())): v = [dist for taxa, dist in rel_dists[rank].items() if taxa in taxa_for_dist_inference] if len(v) == 0: continue p10, p50, p90 = np_percentile(v, [10, 50, 90]) ax.plot((p10, p10), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p50, p50), (i, i + 0.5), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p90, p90), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) for b in [-0.2, -0.1, 0.1, 0.2]: boundary = p50 + b if boundary < 1.0 and boundary > 0.0: if abs(b) == 0.1: c = (1.0, 0.65, 0.0) # orange else: c = (1.0, 0.0, 0.0) ax.plot((boundary, boundary), (i, i + 0.5), c=c, lw=2, zorder=2) percentiles[i] = [p10, p50, p90] # create scatter plot and results table fout = open(distribution_table, 'w') fout.write('Taxa\tRelative Distance\tP10\tMedian\tP90\tPercentile outlier\n') x = [] y = [] c = [] labels = [] rank_labels = [] for i, rank in enumerate(sorted(rel_dists.keys())): rank_label = Taxonomy.rank_labels[rank] rank_labels.append(rank_label + ' (%d)' % len(rel_dists[rank])) mono = [] poly = [] no_inference = [] for clade_label, dist in rel_dists[rank].items(): x.append(dist) y.append(i) labels.append(clade_label) if is_integer(clade_label.split('^')[-1]): # taxa with a numerical suffix after a caret indicate # polyphyletic groups when decorated with tax2tree c.append((1.0, 0.0, 0.0)) poly.append(dist) elif clade_label not in taxa_for_dist_inference: c.append((0.3, 0.3, 0.3)) no_inference.append(dist) else: c.append((0.0, 0.0, 1.0)) mono.append(dist) # report results v = [clade_label, dist] if i in percentiles: p10, p50, p90 = percentiles[i] percentile_outlier = not (dist >= p10 and dist <= p90) v += percentiles[i] + [str(percentile_outlier)] else: percentile_outlier = 'Insufficent data to calculate percentiles' v += [-1,-1,-1] + [str(percentile_outlier)] fout.write('%s\t%.2f\t%.2f\t%.2f\t%.2f\t%s\n' % tuple(v)) # histogram for each rank mono = np_array(mono) no_inference = np_array(no_inference) poly = np_array(poly) binwidth = 0.025 bins = np_arange(0, 1.0 + binwidth, binwidth) d = len(mono) + len(poly) + len(no_inference) if d == 0: break w = float(len(mono)) / d n = 0 if len(mono) > 0: mono_max_count = max(np_histogram(mono, bins=bins)[0]) mono_weights = np_ones_like(mono) * (1.0 / mono_max_count) n, b, p = ax.hist(mono, bins=bins, color=(0.0, 0.0, 1.0), alpha=0.25, weights=0.9 * w * mono_weights, bottom=i, lw=0, zorder=0) if len(no_inference) > 0: no_inference_max_count = max(np_histogram(no_inference, bins=bins)[0]) no_inference_weights = np_ones_like(no_inference) * (1.0 / no_inference_max_count) ax.hist(no_inference, bins=bins, color=(0.3, 0.3, 0.3), alpha=0.25, weights=0.9 * (1.0 - w) * no_inference_weights, bottom=i + n, lw=0, zorder=0) if len(poly) > 0: poly_max_count = max(np_histogram(poly, bins=bins)[0]) poly_weights = np_ones_like(poly) * (1.0 / poly_max_count) ax.hist(poly, bins=bins, color=(1.0, 0.0, 0.0), alpha=0.25, weights=0.9 * (1.0 - w) * poly_weights, bottom=i + n, lw=0, zorder=0) fout.close() # overlay scatter plot elements scatter = ax.scatter(x, y, alpha=0.5, s=48, c=c, zorder=1) # set plot elements ax.grid(color=(0.8, 0.8, 0.8), linestyle='dashed') ax.set_xlabel('relative distance') ax.set_xticks(np_arange(0, 1.05, 0.1)) ax.set_xlim([-0.05, 1.05]) ax.set_ylabel('rank (no. taxa)') ax.set_yticks(range(0, len(rel_dists))) ax.set_ylim([-0.2, len(rel_dists) - 0.01]) ax.set_yticklabels(rank_labels) self.prettify(ax) # make plot interactive mpld3.plugins.clear(self.fig) mpld3.plugins.connect(self.fig, mpld3.plugins.PointLabelTooltip(scatter, labels=labels)) mpld3.plugins.connect(self.fig, mpld3.plugins.MousePosition(fontsize=10)) mpld3.save_html(self.fig, plot_file[0:plot_file.rfind('.')] + '.html') self.fig.tight_layout(pad=1) self.fig.savefig(plot_file, dpi=self.dpi)
def _distribution_summary_plot(self, phylum_rel_dists, taxa_for_dist_inference, plot_file): """Summary plot showing the distribution of taxa at each taxonomic rank under different rootings. Parameters ---------- phylum_rel_dists: phylum_rel_dists[phylum][rank_index][taxon] -> relative divergences Relative divergence of taxon at each rank for different phylum-level rootings. taxa_for_dist_inference : iterable Taxa to considered when inferring distributions. plot_file : str Desired name of output plot. """ self.fig.clear() self.fig.set_size_inches(12, 6) ax = self.fig.add_subplot(111) # determine median relative distance for each taxa medians_for_taxa = self.taxa_median_rd(phylum_rel_dists) # create percentile and classification boundary lines percentiles = {} for i, rank in enumerate(sorted(medians_for_taxa.keys())): v = [ np_median(dists) for taxon, dists in medians_for_taxa[rank].items() if taxon in taxa_for_dist_inference ] if not v: # not taxa at rank suitable for creating classification # boundaries continue p10, p50, p90 = np_percentile(v, [10, 50, 90]) ax.plot((p10, p10), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p50, p50), (i, i + 0.5), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p90, p90), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) for b in [-0.2, -0.1, 0.1, 0.2]: boundary = p50 + b if 1.0 > boundary > 0.0: if abs(b) == 0.1: c = (1.0, 0.65, 0.0) # orange else: c = (1.0, 0.0, 0.0) ax.plot((boundary, boundary), (i, i + 0.5), c=c, lw=2, zorder=2) percentiles[i] = [p10, p50, p90] # create scatter plot and results table x = [] y = [] c = [] labels = [] rank_labels = [] for i, rank in enumerate(sorted(medians_for_taxa.keys())): rank_label = Taxonomy.rank_labels[rank] rank_labels.append(rank_label + ' (%d)' % len(medians_for_taxa[rank])) mono = [] poly = [] no_inference = [] for clade_label, dists in medians_for_taxa[rank].items(): md = np_median(dists) x.append(md) y.append(i) labels.append(clade_label) if self._is_integer(clade_label.split('^')[-1]): # taxa with a numerical suffix after a caret indicate # polyphyletic groups when decorated with tax2tree c.append((1.0, 0.0, 0.0)) poly.append(md) elif clade_label not in taxa_for_dist_inference: c.append((0.3, 0.3, 0.3)) no_inference.append(md) else: c.append((0.0, 0.0, 1.0)) mono.append(md) # histogram for each rank n = 0 if len(mono) > 0: mono = np_array(mono) no_inference = np_array(no_inference) poly = np_array(poly) binwidth = 0.025 bins = np_arange(0, 1.0 + binwidth, binwidth) mono_max_count = max(np_histogram(mono, bins=bins)[0]) mono_weights = np_ones_like(mono) * (1.0 / mono_max_count) w = float( len(mono)) / (len(mono) + len(poly) + len(no_inference)) n, b, p = ax.hist(mono, bins=bins, color=(0.0, 0.0, 1.0), alpha=0.25, weights=0.9 * w * mono_weights, bottom=i, lw=0, zorder=0) if len(no_inference) > 0: no_inference_max_count = max( np_histogram(no_inference, bins=bins)[0]) no_inference_weights = np_ones_like(no_inference) * ( 1.0 / no_inference_max_count) ax.hist(no_inference, bins=bins, color=(0.3, 0.3, 0.3), alpha=0.25, weights=0.9 * (1.0 - w) * no_inference_weights, bottom=i + n, lw=0, zorder=0) if len(poly) > 0: poly_max_count = max(np_histogram(poly, bins=bins)[0]) poly_weights = np_ones_like(poly) * (1.0 / poly_max_count) ax.hist(poly, bins=bins, color=(1.0, 0.0, 0.0), alpha=0.25, weights=0.9 * (1.0 - w) * poly_weights, bottom=i + n, lw=0, zorder=0) scatter = ax.scatter(x, y, alpha=0.5, s=48, c=c, zorder=1) # set plot elements ax.grid(color=(0.8, 0.8, 0.8), linestyle='dashed') ax.set_xlabel('relative distance') ax.set_xticks(np_arange(0, 1.05, 0.1)) ax.set_xlim([-0.01, 1.01]) ax.set_ylabel('rank (no. taxa)') ax.set_yticks(list(range(0, len(medians_for_taxa)))) ax.set_ylim([-0.2, len(medians_for_taxa) - 0.01]) ax.set_yticklabels(rank_labels) self.prettify(ax) # make plot interactive mpld3.plugins.clear(self.fig) mpld3.plugins.connect( self.fig, mpld3.plugins.PointLabelTooltip(scatter, labels=labels)) mpld3.plugins.connect(self.fig, mpld3.plugins.MousePosition(fontsize=10)) mpld3.save_html(self.fig, plot_file[0:plot_file.rfind('.')] + '.html') self.fig.tight_layout(pad=1) self.fig.savefig(plot_file, dpi=self.dpi)
def _distribution_summary_plot(self, phylum_rel_dists, taxa_for_dist_inference, highlight_polyphyly, highlight_taxa, fmeasure, fmeasure_mono, plot_file): """Summary plot showing the distribution of taxa at each taxonomic rank under different rootings. Parameters ---------- phylum_rel_dists: phylum_rel_dists[phylum][rank_index][taxon] -> relative divergences Relative divergence of taxon at each rank for different phylum-level rootings. taxa_for_dist_inference : iterable Taxa to considered when inferring distributions. plot_file : str Desired name of output plot. """ self.fig.clear() self.fig.set_size_inches(12, 6) ax = self.fig.add_subplot(111) # determine median relative distance for each taxa medians_for_taxa = self.taxa_median_rd(phylum_rel_dists) # create percentile and classification boundary lines percentiles = {} for i, rank in enumerate(sorted(medians_for_taxa.keys())): v = [np_median(dists) for taxon, dists in medians_for_taxa[rank].iteritems() if taxon in taxa_for_dist_inference] if not v: # not taxa at rank suitable for creating classification boundaries continue p10, p50, p90 = np_percentile(v, [10, 50, 90]) #ax.plot((p10, p10), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p50, p50), (i, i + 0.5), c=(0.0, 0.0, 1.0), lw=2, zorder=2) #ax.plot((p90, p90), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) for b in [-0.1, 0.1]: boundary = p50 + b if boundary < 1.0 and boundary > 0.0: if abs(b) == 0.1: c = (0.0, 0.0, 0.0) else: c = (1.0, 0.0, 0.0) ax.plot((boundary, boundary), (i, i + 0.5), c=c, lw=2, zorder=2) percentiles[i] = [p10, p50, p90] # create scatter plot and results table x = [] y = [] c = [] labels = [] rank_labels = [] for i, rank in enumerate(sorted(medians_for_taxa.keys())): rank_label = Taxonomy.rank_labels[rank] rank_labels.append(rank_label.capitalize() + ' (%d)' % len(medians_for_taxa[rank])) mono = [] poly = [] near_mono = [] for clade_label, dists in medians_for_taxa[rank].iteritems(): md = np_median(dists) x.append(md) y.append(i) labels.append(clade_label) if ((highlight_polyphyly and fmeasure[clade_label] < fmeasure_mono) or clade_label in highlight_taxa): c.append((1.0,0.0,0.0)) poly.append(md) elif (highlight_polyphyly and fmeasure[clade_label] != 1.0): c.append((255.0/255,187.0/255,120.0/255)) near_mono.append(md) else: c.append((152.0/255,223.0/255,138.0/255)) mono.append(md) # histogram for each rank binwidth = 0.025 bins = np_arange(0, 1.0 + binwidth, binwidth) max_bin_count = max(np_histogram(mono + near_mono + poly, bins=bins)[0]) mono_bottom = 0 near_mono_bottom = 0 mono = np_array(mono) near_mono = np_array(near_mono) poly = np_array(poly) if len(mono) > 0: mono_bottom, b, p = ax.hist(mono, bins=bins, color=(152.0/255,223.0/255,138.0/255), alpha=0.5, weights=0.9 * (1.0 / max_bin_count) * np_ones_like(mono), bottom=i, lw=0, zorder=0) if len(near_mono) > 0: near_mono_bottom, b, p = ax.hist(near_mono, bins=bins, color=(255.0/255,187.0/255,120.0/255), alpha=0.5, weights=0.9 * (1.0 / max_bin_count) * np_ones_like(near_mono), bottom=i + mono_bottom, lw=0, zorder=0) if len(poly) > 0: ax.hist(poly, bins=bins, color=(1.0, 0.0, 0.0), alpha=0.5, weights=0.9 * (1.0 / max_bin_count) * np_ones_like(poly), bottom=i + mono_bottom + near_mono_bottom, lw=0, zorder=0) scatter = ax.scatter(x, y, alpha=0.5, s=48, c=c, zorder=1) # set plot elements ax.grid(color=(0.8, 0.8, 0.8), linestyle='dashed') ax.set_xlabel('Relative Evolutionary Divergence') ax.set_xticks(np_arange(0, 1.05, 0.1)) ax.set_xlim([-0.01, 1.01]) ax.set_ylabel('Rank (no. taxa)') ax.set_yticks(xrange(0, len(medians_for_taxa))) ax.set_ylim([-0.2, len(medians_for_taxa) - 0.01]) ax.set_yticklabels(rank_labels) self.prettify(ax) # make plot interactive mpld3.plugins.clear(self.fig) mpld3.plugins.connect(self.fig, mpld3.plugins.PointLabelTooltip(scatter, labels=labels)) mpld3.plugins.connect(self.fig, mpld3.plugins.MousePosition(fontsize=10)) mpld3.save_html(self.fig, plot_file[0:plot_file.rfind('.')] + '.html') self.fig.tight_layout(pad=1) self.fig.savefig(plot_file, dpi=self.dpi) self.fig.savefig(plot_file.replace('.png', '.svg'), dpi=self.dpi)
def _distribution_plot(self, rel_dists, taxa_for_dist_inference, highlight_polyphyly, highlight_taxa, distribution_table, fmeasure, fmeasure_mono, plot_file, viral): """Create plot showing the distribution of taxa at each taxonomic rank. Parameters ---------- rel_dists: d[rank_index][taxon] -> relative divergence Relative divergence of taxa at each rank. taxa_for_dist_inference : iterable Taxa to considered when inferring distributions. distribution_table : str Desired name of output table with distribution information. plot_file : str Desired name of output plot. """ self.fig.clear() self.fig.set_size_inches(12, 6) ax = self.fig.add_subplot(111) # create percentile and classifciation boundary lines percentiles = {} for i, rank in enumerate(sorted(rel_dists.keys())): v = [ dist for taxa, dist in rel_dists[rank].items() if taxa in taxa_for_dist_inference ] if len(v) == 0: continue p10, p50, p90 = np_percentile(v, [10, 50, 90]) ax.plot((p50, p50), (i, i + 0.5), c=self.median_color, lw=2, zorder=2) for b in [-0.1, 0.1]: boundary = p50 + b if boundary < 1.0 and boundary > 0.0: ax.plot((boundary, boundary), (i, i + 0.25), c=(0.0, 0.0, 0.0), lw=2, zorder=2) percentiles[i] = [p10, p50, p90] # create scatter plot and results table fout = open(distribution_table, 'w') fout.write( 'Taxa\tRelative Distance\tP10\tMedian\tP90\tPercentile outlier\n') x = [] y = [] c = [] labels = [] rank_labels = [] for i, rank in enumerate(sorted(rel_dists.keys())): if viral: rank_label = VIRAL_RANK_LABELS[rank] else: rank_label = Taxonomy.rank_labels[rank] rank_labels.append(rank_label.capitalize() + ' ({:,})'.format(len(rel_dists[rank]))) mono = [] poly = [] nearly_mono = [] for clade_label, dist in rel_dists[rank].items(): x.append(dist) y.append(i) labels.append(clade_label) if ((highlight_polyphyly and fmeasure[clade_label] < fmeasure_mono) or clade_label in highlight_taxa): c.append(self.poly_color) poly.append(dist) elif (highlight_polyphyly and fmeasure[clade_label] != 1.0): c.append(self.near_mono_color) nearly_mono.append(dist) else: c.append(self.mono_color) mono.append(dist) # report results v = [clade_label, dist] if i in percentiles: p10, p50, p90 = percentiles[i] percentile_outlier = not (dist >= p10 and dist <= p90) v += percentiles[i] + [str(percentile_outlier)] else: percentile_outlier = 'Insufficent data to calculate percentiles' v += [-1, -1, -1] + [str(percentile_outlier)] fout.write('%s\t%.2f\t%.2f\t%.2f\t%.2f\t%s\n' % tuple(v)) # histogram for each rank binwidth = 0.025 bins = np_arange(0, 1.0 + binwidth, binwidth) max_bin_count = max( np_histogram(mono + nearly_mono + poly, bins=bins)[0]) num_taxa = len(mono) + len(poly) + len(nearly_mono) if num_taxa == 0: break mono = np_array(mono) nearly_mono = np_array(nearly_mono) poly = np_array(poly) bottom_mono = 0 if len(mono) > 0: bottom_mono, b, p = ax.hist( mono, bins=bins, color=self.mono_color, alpha=0.5, weights=0.9 * (1.0 / max_bin_count) * np_ones_like(mono), bottom=i, lw=0, zorder=0) bottom_nearly_mono = 0 if len(nearly_mono) > 0: bottom_nearly_mono, b, p = ax.hist(nearly_mono, bins=bins, color=self.near_mono_color, alpha=0.5, weights=0.9 * (1.0 / max_bin_count) * np_ones_like(nearly_mono), bottom=i + bottom_mono, lw=0, zorder=0) if len(poly) > 0: ax.hist(poly, bins=bins, color=self.poly_color, alpha=0.5, weights=0.9 * (1.0 / max_bin_count) * np_ones_like(poly), bottom=i + bottom_mono + bottom_nearly_mono, lw=0, zorder=0) fout.close() # overlay scatter plot elements scatter = ax.scatter(x, y, alpha=0.5, s=48, c=c, zorder=1, lw=1, edgecolors='black') # set plot elements ax.grid(color=(0.8, 0.8, 0.8), linestyle='dashed') ax.set_xlabel('Relative Evolutionary Divergence') ax.set_xticks(np_arange(0, 1.05, 0.1)) ax.set_xlim([-0.05, 1.05]) ax.set_ylabel('Rank (no. taxa)') ax.set_yticks(range(0, len(rel_dists))) ax.set_ylim([-0.2, len(rel_dists) - 0.01]) ax.set_yticklabels(rank_labels) self.prettify(ax) # make plot interactive if not self.skip_mpld3: mpld3.plugins.clear(self.fig) mpld3.plugins.connect( self.fig, mpld3.plugins.PointLabelTooltip(scatter, labels=labels)) mpld3.plugins.connect(self.fig, mpld3.plugins.MousePosition(fontsize=10)) mpld3.plugins.connect(self.fig, AxisReplacer(rank_labels)) mpld3.save_html(self.fig, plot_file[0:plot_file.rfind('.')] + '.html') self.fig.tight_layout(pad=1) self.fig.savefig(plot_file, dpi=self.dpi) self.fig.savefig(plot_file.replace('.png', '.svg'), dpi=self.dpi)
def r3_dnn_apply_keras(target_dirname, old_stft_obj=None, cuda=False, saving_to_disk=True): LOGGER.info( '{}: r3: Denoising original stft with neural network model...'.format( target_dirname)) ''' r3_dnn_apply takes an old_stft object (or side effect load from disk) and saves a new_stft object ''' scan_battery_dirname = os_path_dirname(target_dirname) model_dirname = os_path_dirname(os_path_dirname(scan_battery_dirname)) # load stft data if old_stft_obj is None: old_stft_fpath = os_path_join(target_dirname, 'old_stft.mat') with h5py_File(old_stft_fpath, 'r') as f: stft = np_concatenate( [f['old_stft_real'][:], f['old_stft_imag'][:]], axis=1) else: stft = np_concatenate( [old_stft_obj['old_stft_real'], old_stft_obj['old_stft_imag']], axis=1) N_beams, N_elements_2, N_segments, N_fft = stft.shape N_elements = N_elements_2 // 2 # combine stft_real and stft_imag # move element position axis stft = np_moveaxis(stft, 1, 2) # TODO: Duplicate? # reshape the to flatten first two axes stft = np_reshape( stft, [N_beams * N_segments, N_elements_2, N_fft]) # TODO: Duplicate? # process stft with networks k_mask = list(range(3, 6)) for frequency in k_mask: process_each_frequency_keras(model_dirname, stft, frequency) # reshape the stft data stft = np_reshape( stft, [N_beams, N_segments, N_elements_2, N_fft]) # TODO: Duplicate? # set zero outside analysis frequency range discard_mask = np_ones_like(stft, dtype=bool) discard_mask[:, :, :, k_mask] = False # pylint: disable=E1137 stft[discard_mask] = 0 del discard_mask # mirror data to negative frequencies using conjugate symmetry end_index = N_fft // 2 stft[:, :, :, end_index + 1:] = np_flip(stft[:, :, :, 1:end_index], axis=3) stft[:, :, N_elements:2 * N_elements, end_index + 1:] = -1 * stft[:, :, N_elements:2 * N_elements, end_index + 1:] # move element position axis stft = np_moveaxis(stft, 1, 2) # TODO: Duplicate? # change variable names # new_stft_real = stft[:, :N_elements, :, :] new_stft_real = stft[:, :N_elements, :, :].transpose() # new_stft_imag = stft[:, N_elements:, :, :] new_stft_imag = stft[:, N_elements:, :, :].transpose() del stft # change dimensions # new_stft_real = new_stft_real.transpose() # new_stft_imag = new_stft_imag.transpose() # save new stft data new_stft_obj = { 'new_stft_real': new_stft_real, 'new_stft_imag': new_stft_imag } if saving_to_disk is True: new_stft_fname = os_path_join(target_dirname, 'new_stft.mat') savemat(new_stft_fname, new_stft_obj) LOGGER.info('{}: r3 Done.'.format(target_dirname)) return new_stft_obj
def _distribution_summary_plot(self, phylum_rel_dists, taxa_for_dist_inference, plot_file): """Summary plot showing the distribution of taxa at each taxonomic rank under different rootings. Parameters ---------- phylum_rel_dists: phylum_rel_dists[phylum][rank_index][taxon] -> relative divergences Relative divergence of taxon at each rank for different phylum-level rootings. taxa_for_dist_inference : iterable Taxa to considered when inferring distributions. plot_file : str Desired name of output plot. """ self.fig.clear() self.fig.set_size_inches(12, 6) ax = self.fig.add_subplot(111) # determine median relative distance for each taxa medians_for_taxa = self.taxa_median_rd(phylum_rel_dists) # create percentile and classification boundary lines percentiles = {} for i, rank in enumerate(sorted(medians_for_taxa.keys())): v = [np_median(dists) for taxon, dists in medians_for_taxa[rank].iteritems() if taxon in taxa_for_dist_inference] p10, p50, p90 = np_percentile(v, [10, 50, 90]) ax.plot((p10, p10), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p50, p50), (i, i + 0.5), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p90, p90), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) for b in [-0.2, -0.1, 0.1, 0.2]: boundary = p50 + b if boundary < 1.0 and boundary > 0.0: if abs(b) == 0.1: c = (1.0, 0.65, 0.0) # orange else: c = (1.0, 0.0, 0.0) ax.plot((boundary, boundary), (i, i + 0.5), c=c, lw=2, zorder=2) percentiles[i] = [p10, p50, p90] # create scatter plot and results table x = [] y = [] c = [] labels = [] rank_labels = [] for i, rank in enumerate(sorted(medians_for_taxa.keys())): rank_label = Taxonomy.rank_labels[rank] rank_labels.append(rank_label + ' (%d)' % len(medians_for_taxa[rank])) mono = [] poly = [] no_inference = [] for clade_label, dists in medians_for_taxa[rank].iteritems(): md = np_median(dists) x.append(md) y.append(i) labels.append(clade_label) if is_integer(clade_label.split('^')[-1]): # taxa with a numerical suffix after a caret indicate # polyphyletic groups when decorated with tax2tree c.append((1.0, 0.0, 0.0)) poly.append(md) elif clade_label not in taxa_for_dist_inference: c.append((0.3, 0.3, 0.3)) no_inference.append(md) else: c.append((0.0, 0.0, 1.0)) mono.append(md) # histogram for each rank mono = np_array(mono) no_inference = np_array(no_inference) poly = np_array(poly) binwidth = 0.025 bins = np_arange(0, 1.0 + binwidth, binwidth) mono_max_count = max(np_histogram(mono, bins=bins)[0]) mono_weights = np_ones_like(mono) * (1.0 / mono_max_count) w = float(len(mono)) / (len(mono) + len(poly) + len(no_inference)) n, b, p = ax.hist(mono, bins=bins, color=(0.0, 0.0, 1.0), alpha=0.25, weights=0.9 * w * mono_weights, bottom=i, lw=0, zorder=0) if len(no_inference) > 0: no_inference_max_count = max(np_histogram(no_inference, bins=bins)[0]) no_inference_weights = np_ones_like(no_inference) * (1.0 / no_inference_max_count) ax.hist(no_inference, bins=bins, color=(0.3, 0.3, 0.3), alpha=0.25, weights=0.9 * (1.0 - w) * no_inference_weights, bottom=i + n, lw=0, zorder=0) if len(poly) > 0: poly_max_count = max(np_histogram(poly, bins=bins)[0]) poly_weights = np_ones_like(poly) * (1.0 / poly_max_count) ax.hist(poly, bins=bins, color=(1.0, 0.0, 0.0), alpha=0.25, weights=0.9 * (1.0 - w) * poly_weights, bottom=i + n, lw=0, zorder=0) scatter = ax.scatter(x, y, alpha=0.5, s=48, c=c, zorder=1) # set plot elements ax.grid(color=(0.8, 0.8, 0.8), linestyle='dashed') ax.set_xlabel('relative distance') ax.set_xticks(np_arange(0, 1.05, 0.1)) ax.set_xlim([-0.01, 1.01]) ax.set_ylabel('rank (no. taxa)') ax.set_yticks(xrange(0, len(medians_for_taxa))) ax.set_ylim([-0.2, len(medians_for_taxa) - 0.01]) ax.set_yticklabels(rank_labels) self.prettify(ax) # make plot interactive mpld3.plugins.clear(self.fig) mpld3.plugins.connect(self.fig, mpld3.plugins.PointLabelTooltip(scatter, labels=labels)) mpld3.plugins.connect(self.fig, mpld3.plugins.MousePosition(fontsize=10)) mpld3.save_html(self.fig, plot_file[0:plot_file.rfind('.')] + '.html') self.fig.tight_layout(pad=1) self.fig.savefig(plot_file, dpi=self.dpi)
def _distribution_plot(self, rel_dists, taxa_for_dist_inference, distribution_table, plot_file): """Create plot showing the distribution of taxa at each taxonomic rank. Parameters ---------- rel_dists: d[rank_index][taxon] -> relative divergence Relative divergence of taxa at each rank. taxa_for_dist_inference : iterable Taxa to considered when inferring distributions. distribution_table : str Desired name of output table with distribution information. plot_file : str Desired name of output plot. """ self.fig.clear() self.fig.set_size_inches(12, 6) ax = self.fig.add_subplot(111) # create normal distributions for i, rank in enumerate(sorted(rel_dists.keys())): v = [dist for taxa, dist in rel_dists[rank].iteritems() if taxa in taxa_for_dist_inference] if len(v) < 2: continue u = np_mean(v) rv = norm(loc=u, scale=np_std(v)) x = np_linspace(rv.ppf(0.001), rv.ppf(0.999), 1000) nd = rv.pdf(x) # ax.plot(x, 0.75 * (nd / max(nd)) + i, 'b-', alpha=0.6, zorder=2) # ax.plot((u, u), (i, i + 0.5), 'b-', zorder=2) # create percentile and classifciation boundary lines percentiles = {} for i, rank in enumerate(sorted(rel_dists.keys())): v = [dist for taxa, dist in rel_dists[rank].iteritems() if taxa in taxa_for_dist_inference] if len(v) == 0: continue p10, p50, p90 = np_percentile(v, [10, 50, 90]) ax.plot((p10, p10), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p50, p50), (i, i + 0.5), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p90, p90), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) for b in [-0.2, -0.1, 0.1, 0.2]: boundary = p50 + b if boundary < 1.0 and boundary > 0.0: if abs(b) == 0.1: c = (1.0, 0.65, 0.0) # orange else: c = (1.0, 0.0, 0.0) ax.plot((boundary, boundary), (i, i + 0.5), c=c, lw=2, zorder=2) percentiles[i] = [p10, p50, p90] # create scatter plot and results table fout = open(distribution_table, 'w') fout.write('Taxa\tRelative Distance\tP10\tMedian\tP90\tPercentile outlier\n') x = [] y = [] c = [] labels = [] rank_labels = [] for i, rank in enumerate(sorted(rel_dists.keys())): rank_label = Taxonomy.rank_labels[rank] rank_labels.append(rank_label + ' (%d)' % len(rel_dists[rank])) mono = [] poly = [] no_inference = [] for clade_label, dist in rel_dists[rank].iteritems(): x.append(dist) y.append(i) labels.append(clade_label) if is_integer(clade_label.split('^')[-1]): # taxa with a numerical suffix after a caret indicate # polyphyletic groups when decorated with tax2tree c.append((1.0, 0.0, 0.0)) poly.append(dist) elif clade_label not in taxa_for_dist_inference: c.append((0.3, 0.3, 0.3)) no_inference.append(dist) else: c.append((0.0, 0.0, 1.0)) mono.append(dist) # report results v = [clade_label, dist] if i in percentiles: p10, p50, p90 = percentiles[i] percentile_outlier = not (dist >= p10 and dist <= p90) v += percentiles[i] + [str(percentile_outlier)] else: percentile_outlier = 'Insufficent data to calculate percentiles' v += [-1,-1,-1] + [str(percentile_outlier)] fout.write('%s\t%.2f\t%.2f\t%.2f\t%.2f\t%s\n' % tuple(v)) # histogram for each rank mono = np_array(mono) no_inference = np_array(no_inference) poly = np_array(poly) binwidth = 0.025 bins = np_arange(0, 1.0 + binwidth, binwidth) w = float(len(mono)) / (len(mono) + len(poly) + len(no_inference)) n = 0 if len(mono) > 0: mono_max_count = max(np_histogram(mono, bins=bins)[0]) mono_weights = np_ones_like(mono) * (1.0 / mono_max_count) n, b, p = ax.hist(mono, bins=bins, color=(0.0, 0.0, 1.0), alpha=0.25, weights=0.9 * w * mono_weights, bottom=i, lw=0, zorder=0) if len(no_inference) > 0: no_inference_max_count = max(np_histogram(no_inference, bins=bins)[0]) no_inference_weights = np_ones_like(no_inference) * (1.0 / no_inference_max_count) ax.hist(no_inference, bins=bins, color=(0.3, 0.3, 0.3), alpha=0.25, weights=0.9 * (1.0 - w) * no_inference_weights, bottom=i + n, lw=0, zorder=0) if len(poly) > 0: poly_max_count = max(np_histogram(poly, bins=bins)[0]) poly_weights = np_ones_like(poly) * (1.0 / poly_max_count) ax.hist(poly, bins=bins, color=(1.0, 0.0, 0.0), alpha=0.25, weights=0.9 * (1.0 - w) * poly_weights, bottom=i + n, lw=0, zorder=0) fout.close() # overlay scatter plot elements scatter = ax.scatter(x, y, alpha=0.5, s=48, c=c, zorder=1) # set plot elements ax.grid(color=(0.8, 0.8, 0.8), linestyle='dashed') ax.set_xlabel('relative distance') ax.set_xticks(np_arange(0, 1.05, 0.1)) ax.set_xlim([-0.05, 1.05]) ax.set_ylabel('rank (no. taxa)') ax.set_yticks(xrange(0, len(rel_dists))) ax.set_ylim([-0.2, len(rel_dists) - 0.01]) ax.set_yticklabels(rank_labels) self.prettify(ax) # make plot interactive mpld3.plugins.clear(self.fig) mpld3.plugins.connect(self.fig, mpld3.plugins.PointLabelTooltip(scatter, labels=labels)) mpld3.plugins.connect(self.fig, mpld3.plugins.MousePosition(fontsize=10)) mpld3.save_html(self.fig, plot_file[0:plot_file.rfind('.')] + '.html') self.fig.tight_layout(pad=1) self.fig.savefig(plot_file, dpi=self.dpi)