def add_reference_surface(self, xmin, xmax, ymin, ymax, image): cx = np_linspace(xmin/self.kx,xmax/self.kx,image.shape[1]) cy = np_linspace(ymin/self.ky,ymax/self.ky,image.shape[0]) cz = np_zeros((image.shape[1],image.shape[0])) ref_tex = pg_makeRGBA(np_rot90(image, k=3))[0]/255. self.ref_surf = gl.GLSurfacePlotItem(x=cx, y=cy, z=cz, colors = ref_tex, shader='balloon') self.ref_surf.translate(-self.xoff,-self.yoff,self.zoff) self.addItem(self.ref_surf) return self.ref_surf
def add_reference_surface(self, xmin, xmax, ymin, ymax, image): cx = np_linspace(xmin / self.kx, xmax / self.kx, image.shape[1]) cy = np_linspace(ymin / self.ky, ymax / self.ky, image.shape[0]) cz = np_zeros((image.shape[1], image.shape[0])) ref_tex = pg_makeRGBA(np_rot90(image, k=3))[0] / 255. self.ref_surf = gl.GLSurfacePlotItem(x=cx, y=cy, z=cz, colors=ref_tex, shader='balloon') self.ref_surf.translate(-self.xoff, -self.yoff, self.zoff) self.addItem(self.ref_surf) return self.ref_surf
def InterpFun1D(self, ChaPhyValue): if self.XaxisType == 'FIX_AXIS': TempList = [] for i in range(ChaPhyValue[2][2]): XaxisValue = ChaPhyValue[2][0] + (i) * pow(2, ChaPhyValue[2][1]) TempList.append(XaxisValue) ChaPhyValue[2] = TempList ValueArray = np_array(ChaPhyValue[1]) XaxisArray = np_array(ChaPhyValue[2]) CurveFun = interpolate.interp1d(XaxisArray, ValueArray, kind='cubic') if self.XaxisType == 'FIX_AXIS': TempList = [] for i in range(self.XaxisPNum): XaxisValue = self.XaxisBegin + (i) * pow(2, self.XaxisShift) TempList.append(XaxisValue) x = np_array(TempList) else: x = np_linspace(min(ChaPhyValue[2]), max(ChaPhyValue[2]), self.XaxisPNum) ValueArrayNew = CurveFun(x) ChaPhyValue[1] = list(ValueArrayNew) ChaPhyValue[2] = list(x) return ChaPhyValue
def translate_episode_data(episode_data): """ Convert episode data into data that can be used in a graph. Given data from multiple episodes make it such that it can be plotted by tsplot, i.e. the mean plus the confidence bounds. """ times, units, values = [], [], [] for index, (ep_len, ep_rew) in enumerate(episode_data): # Smooth out the data ep_rew = pd_Series(ep_rew).ewm(span=1000).mean() # sample for faster plotting x, y = sample(bins=np_linspace(0, MAX_TSTEPS, NSAMPLES + 1), time=ep_len, value=ep_rew) # Convert to tsplot format times.extend(x) values.extend(y) units.extend([index] * len(x)) return pd_DataFrame({ 'Frame': times, 'run_id': units, 'Average Episode Reward': values })
def generic_path(self, height, scale, document, start=False): t_min, t_max = self.curve.get_u_bounds() ts = np_linspace(t_min, t_max, num=self.node.curve_samples, dtype=np_float64) verts = self.curve.evaluate_array(ts).tolist() svg = draw_path_linear_mode(verts, height, scale, start) return svg
def upd_freq_scale_off(self, i_dataset, zero_value): """Update the starting value of the frequency scale of a dataset (RUDIMENTAL) i_dataset: index of the dataset table zero_value: offset """ self.datasets[i_dataset].freq_scale = np_linspace( zero_value, zero_value + self.datasets[i_dataset].bandwidth, self.datasets[i_dataset].n_channels + 1)
def linspace(self): if type(self.ran) is tuple: try: logging.debug(f"using ran: {self.ran}, type {type(self.ran[0])} for linspace") self._linspace = np_round( np_linspace(self.ran[0], self.ran[-1], self.n), decimals=3) logging.debug(f"running linspace for {self.name}, if linspace: {type(self._linspace)}") except: raise(ValueError("Could not assign linspace")) return self._linspace else: self._linspace = np_array(self.val) return self._linspace
def execute(self): """Execute the command""" if self.file_type == 'hdf': self.data.dw_io=dwio.HdfIO() elif self.file_type == 'hdf_pola': self.data.dw_io=dwio.HdfPolaIO() elif self.file_type == 'fits': self.data.dw_io=dwio.FitsIO() else: raise NotImplemented self.data.fileh = self.data.dw_io.data_open(self.file_name) try: self.data.data_type_dict = self.data.dw_io.check_data_type(self.data.fileh) except: pass self.data.datasets = self.data.dw_io.get_datasets(self.data.fileh) for i_dataset in xrange(len(self.data.datasets)): time = integration_to_seconds(np_cumsum( self.data.dw_io.get_integration(self.data.datasets[i_dataset].th)), self.file_type) self.data.datasets[i_dataset].time_scale = np_insert(time, 0, 0) self.data.datasets[i_dataset].local_osc = self.data.dw_io.get_first_osc(self.data.datasets[i_dataset].th) self.data.datasets[i_dataset].frequency = self.data.dw_io.get_frequency(self.data.datasets[i_dataset].th) self.data.datasets[i_dataset].freq_scale = np_linspace(self.data.datasets[i_dataset].frequency, self.data.datasets[i_dataset].frequency+self.data.datasets[i_dataset].bandwidth, self.data.datasets[i_dataset].n_channels+1) self.data.datasets[i_dataset].feed_section = self.data.dw_io.get_feed_section(self.data.fileh, self.data.datasets[i_dataset].th) try: self.data.datasets[i_dataset].t = "Feed"+str(self.data.datasets[i_dataset].feed_section[0])+" - Section"+str(self.data.datasets[i_dataset].feed_section[1]) except: pass self.data.datasets[i_dataset].flagsets = {}
def vrang_list_faster(self, sub_query: np_array, ntss_tmp: np_ndarray): """ Get the vrang list for the ``sub_query`` sequence in the tree. Necessary for the calculation of the approximation. This method is the fast version of the method :func:`~pyCFOFiSAX._tree_iSAX.TreeISAX.vrang_list`. .. note:: This method does not travel the tree, but directly prunes the leaves nodes. Preserved (uncut) leaves will be used by the approximation function. :param sub_query: The sequence to be evaluated :param ntss_tmp: Reference sequences (IE. Reference history) in PAA format :returns: The vrang list of``sub_query`` :rtype: list(float) """ num_ts_by_node = [] for i, node in enumerate(self.get_list_nodes_leaf()): num_ts_by_node.append(node.get_nb_sequences()) num_ts_by_node = np_array(num_ts_by_node) if not hasattr(self, 'index_cdf_bin'): self.index_cdf_bin = np_linspace(-4.0, 4.0, num=1000) if not hasattr(self, 'cdf_bins'): self.cdf_bins = scipy_norm.cdf(self.index_cdf_bin, 0, 1) q_paa = self.isax.transform_paa([sub_query])[0] distance_q_p = cdist([q_paa.reshape(q_paa.shape[:-1])], ntss_tmp)[0] return vrang_list_for_all_seq_ref(len(ntss_tmp), distance_q_p, self.max_array_leaf, self.min_array_leaf, self.cdf_mean, self.cdf_std, num_ts_by_node, self.index_cdf_bin, self.cdf_bins)
def _percent_correct_plot(self, rel_dists, taxa_for_dist_inference, output_prefix): """Create plots showing correctly classified taxa for different relative distance values. Parameters ---------- rel_dists : d[rank_index][taxon] -> relative divergence Relative divergence of taxa at each rank. taxa_for_dist_inference : iterable Taxa to consider when inferring relative divergence thresholds. output_prefix : str Prefix for plots. """ print '' print ' Relative divergence thresholds (rank, threshold, parent taxa, child taxa):' ranks = sorted(rel_dists.keys()) rel_dist_thresholds = [] for i in xrange(ranks[0], ranks[-1]): parent_rank = i child_rank = i + 1 # determine classification results for relative divergence # values between the medians of adjacent taxonomic ranks parent_rds = [] for taxa, rd in rel_dists[parent_rank].iteritems(): if taxa in taxa_for_dist_inference: parent_rds.append(rd) parent_p50 = np_percentile(parent_rds, 50) child_rds = [] for taxa, rd in rel_dists[child_rank].iteritems(): if taxa in taxa_for_dist_inference: child_rds.append(rd) child_p50 = np_percentile(child_rds, 50) r = [] y_parent = [] y_child = [] y_mean_corr = [] for test_r in np_linspace(parent_p50, child_p50, 100): parent_cor = float( sum([1 for rd in parent_rds if rd <= test_r ])) / len(parent_rds) child_cor = float(sum([1 for rd in child_rds if rd > test_r ])) / len(child_rds) r.append(test_r) y_parent.append(parent_cor) y_child.append(child_cor) y_mean_corr.append(0.5 * parent_cor + 0.5 * child_cor) # create plot of correctly classified taxa self.fig.clear() self.fig.set_size_inches(6, 6) ax = self.fig.add_subplot(111) ax.plot(r, y_parent, 'k--', label=Taxonomy.rank_labels[i]) ax.plot(r, y_child, 'k:', label=Taxonomy.rank_labels[i + 1]) ax.plot(r, y_mean_corr, 'r-', label='mean') legend = ax.legend(loc='upper left') legend.draw_frame(False) # find maximum of mean correct classification max_mean = max(y_mean_corr) r_max_values = [ r[i] for i, rd in enumerate(y_mean_corr) if rd == max_mean ] r_max_value = np_mean( r_max_values ) # Note: this will fail if there are multiple local maxima print ' %s\t%.3f\t%d\t%d' % (Taxonomy.rank_labels[parent_rank], r_max_value, len(parent_rds), len(child_rds)) # check that there is a single local maximum rd_indices = [ i for i, rd in enumerate(y_mean_corr) if rd == max_mean ] for rd_index in xrange(0, len(rd_indices) - 1): if rd_indices[rd_index] != rd_indices[rd_index + 1] - 1: print '[Warning] There are multiple local maxima, so estimated relative divergence threshold will be invalid.' rel_dist_thresholds.append(r_max_value) y_min, _y_max = ax.get_ylim() ax.axvline(x=r_max_value, ymin=0, ymax=1, color='r', ls='--') ax.text(r_max_value + 0.001, y_min + 0.01, '%.3f' % r_max_value, horizontalalignment='left') ax.set_xlabel('relative distance') ax.set_ylabel('% taxa correctly classified') self.prettify(ax) self.fig.tight_layout(pad=1) self.fig.savefig(output_prefix + '.%s_%s.png' % (Taxonomy.rank_labels[parent_rank], Taxonomy.rank_labels[child_rank]), dpi=96) print '' return rel_dist_thresholds
def _distribution_plot(self, rel_dists, rel_dist_thresholds, taxa_for_dist_inference, distribution_table, plot_file): """Create plot showing the distribution of taxa at each taxonomic rank. Parameters ---------- rel_dists: d[rank_index][taxon] -> relative divergence Relative divergence of taxa at each rank. rel_dist_thresholds: list Relative distances cutoffs for defining ranks. taxa_for_dist_inference : iterable Taxa to considered when inferring distributions. distribution_table : str Desired name of output table with distribution information. plot_file : str Desired name of output plot. """ self.fig.clear() self.fig.set_size_inches(12, 6) ax = self.fig.add_subplot(111) # create normal distributions for i, rank in enumerate(sorted(rel_dists.keys())): v = [ dist for taxa, dist in rel_dists[rank].iteritems() if taxa in taxa_for_dist_inference ] u = np_mean(v) rv = norm(loc=u, scale=np_std(v)) x = np_linspace(rv.ppf(0.001), rv.ppf(0.999), 1000) nd = rv.pdf(x) ax.plot(x, 0.75 * (nd / max(nd)) + i, 'b-', alpha=0.6, zorder=2) ax.plot((u, u), (i, i + 0.5), 'b-', zorder=2) # create percentile lines percentiles = {} for i, rank in enumerate(sorted(rel_dists.keys())): v = [ dist for taxa, dist in rel_dists[rank].iteritems() if taxa in taxa_for_dist_inference ] p10, p50, p90 = np_percentile(v, [10, 50, 90]) ax.plot((p10, p10), (i, i + 0.5), 'r-', zorder=2) ax.plot((p50, p50), (i, i + 0.5), 'r-', zorder=2) ax.plot((p90, p90), (i, i + 0.5), 'r-', zorder=2) percentiles[i] = [p10, p50, p90] # create scatter plot and results table fout = open(distribution_table, 'w') fout.write( 'Taxa\tRelative Distance\tRank cutoff\tRank outlier\tP10\tMedian\tP90\tPercentile outlier\n' ) x = [] y = [] c = [] labels = [] rank_labels = [] rel_dist_thresholds += [1.0] # append boundry for species for i, rank in enumerate(sorted(rel_dists.keys())): rank_label = Taxonomy.rank_labels[rank] rank_labels.append(rank_label + ' (%d)' % len(rel_dists[rank])) for clade_label, dist in rel_dists[rank].iteritems(): x.append(dist) y.append(i) labels.append(clade_label) if clade_label in taxa_for_dist_inference: c.append((0.0, 0.0, 0.5)) else: c.append((0.5, 0.5, 0.5)) p10, p50, p90 = percentiles[i] percentile_outlier = not (dist >= p10 and dist <= p90) if i == 0: rank_cutoff = rel_dist_thresholds[i] rank_outlier = dist > rank_cutoff else: rank_cutoff = rel_dist_thresholds[i] upper_rank_cutoff = rel_dist_thresholds[i - 1] rank_outlier = not (dist >= upper_rank_cutoff and dist <= rank_cutoff) v = [clade_label, dist, rank_cutoff, str(rank_outlier)] v += percentiles[i] + [str(percentile_outlier)] fout.write('%s\t%.2f\t%.2f\t%s\t%.2f\t%.2f\t%.2f\t%s\n' % tuple(v)) fout.close() scatter = ax.scatter(x, y, alpha=0.5, s=48, c=c, zorder=1) # set plot elements ax.grid(color=(0.8, 0.8, 0.8), linestyle='dashed') ax.set_xlabel('relative distance') ax.set_xticks(np_arange(0, 1.05, 0.1)) ax.set_xlim([-0.05, 1.05]) ax.set_ylabel('rank (no. taxa)') ax.set_yticks(xrange(0, len(rel_dists))) ax.set_ylim([-0.2, len(rel_dists) - 0.01]) ax.set_yticklabels(rank_labels) self.prettify(ax) # plot relative divergence threshold lines y_min, y_max = ax.get_ylim() for threshold in rel_dist_thresholds[ 0:-1]: # don't draw species boundary ax.plot((threshold, threshold), (y_min, y_max), color='r', ls='--') ax.text(threshold + 0.001, y_max, '%.3f' % threshold, horizontalalignment='center') # make plot interactive mpld3.plugins.connect( self.fig, mpld3.plugins.PointLabelTooltip(scatter, labels=labels)) mpld3.plugins.connect(self.fig, mpld3.plugins.MousePosition(fontsize=10)) mpld3.save_html(self.fig, plot_file[0:plot_file.rfind('.')] + '.html') self.fig.tight_layout(pad=1) self.fig.savefig(plot_file, dpi=96)
def process(self): n_id = node_id(self) nvBGL.callback_disable(n_id) inputs = self.inputs # end early if not self.activate: return if self.mode == 'Number': if not inputs['Number'].is_linked: return numbers = inputs['Number'].sv_get(default=[[]]) elif self.mode == 'Curve': if not inputs['Curve'].is_linked: return curves = inputs['Curve'].sv_get(default=[[]]) else: if not inputs['Vecs'].is_linked: return vecs = inputs['Vecs'].sv_get(default=[[]]) edges = inputs['Edges'].sv_get(default=[[]]) polygons = inputs['Polygons'].sv_get(default=[[]]) vector_color = inputs['Vector Color'].sv_get(default=[[self.vector_color]]) edge_color = inputs['Edge Color'].sv_get(default=[[self.edge_color]]) poly_color = inputs['Polygon Color'].sv_get(default=[[self.polygon_color]]) seed_set(self.random_seed) x, y, config = self.create_config() config.vector_color = vector_color config.edge_color = edge_color config.poly_color = poly_color config.edges = edges if self.mode == 'Number': config.size = self.drawing_size geom = generate_number_geom(config, numbers) elif self.mode == 'Path': geom = generate_graph_geom(config, vecs) elif self.mode == 'Curve': paths = [] for curve in curves: t_min, t_max = curve.get_u_bounds() ts = np_linspace(t_min, t_max, num=self.curve_samples, dtype=np_float64) paths.append(curve.evaluate_array(ts).tolist()) geom = generate_graph_geom(config, paths) else: config.polygons = polygons if not inputs['Edges'].is_linked and self.edge_toggle: config.edges = polygons_to_edges(polygons, unique_edges=True) geom = generate_mesh_geom(config, vecs) draw_data = { 'mode': 'custom_function', 'tree_name': self.id_data.name[:], 'loc': (x, y), 'custom_function': view_2d_geom, 'args': (geom, config) } nvBGL.callback_enable(n_id, draw_data)
def vrang_list(self, sub_query: np_array, ntss_tmp: np_ndarray): """ Get the vrang list for the ``sub_query`` sequence in the tree. Necessary for the calculation of the approximation. The same method faster but without the tree course: :func:`~pyCFOFiSAX._tree_iSAX.TreeISAX.vrang_list_faster`. :param sub_query: The sequence to be evaluated :param ntss_tmp: Reference sequences (IE. Reference history) :returns: The vrang list of``sub_query`` :rtype: list(float) """ if not hasattr(self, 'index_cdf_bin'): self.index_cdf_bin = np_linspace(-4.0, 4.0, num=1000) if not hasattr(self, 'cdf_bins'): self.cdf_bins = scipy_norm.cdf(self.index_cdf_bin, 0, 1) # List of vrang # TODO np_array k_list_result = [] q_paa = self.isax.transform_paa([sub_query])[0] distance_q_p = cdist([q_paa.reshape(q_paa.shape[:-1])], ntss_tmp)[0] # For any object P for stop_ite, p_paa in enumerate(ntss_tmp): p_name = stop_ite # numbers of objects too close count_ts_too_nn = 0 # Real distance distance = distance_q_p[stop_ite] max_array_p_bool = self.max_array[p_name] < distance min_array_p_bool = self.min_array[p_name] <= distance nodes_list_fifo = [] nodes_list_fifo.extend(self.root.nodes) while nodes_list_fifo: node_nn = nodes_list_fifo.pop(0) if max_array_p_bool[node_nn.id_numpy]: count_ts_too_nn += node_nn.get_nb_sequences() elif node_nn.terminal: if min_array_p_bool[node_nn.id_numpy]: cdf_mean_tmp = self.cdf_mean[p_name][ node_nn.id_numpy_leaf] cdf_std_tmp = self.cdf_std[node_nn.id_numpy_leaf] if cdf_std_tmp > 0.0: distance_normalized = (distance - cdf_mean_tmp) / cdf_std_tmp if distance_normalized > 4: count_ts_too_nn += node_nn.get_nb_sequences() elif -4 <= distance_normalized: index_for_bin = bisect_bisect( self.index_cdf_bin, distance_normalized) count_ts_too_nn += self.cdf_bins[ index_for_bin] * node_nn.get_nb_sequences( ) else: if distance > cdf_mean_tmp: count_ts_too_nn += node_nn.get_nb_sequences() elif min_array_p_bool[node_nn.id_numpy]: nodes_list_fifo.extend(node_nn.nodes) # The estimation of the position of the Centroid of Query is saved compared to P k_list_result.append(count_ts_too_nn) return k_list_result
def _distribution_plot(self, rel_dists, taxa_for_dist_inference, distribution_table, plot_file): """Create plot showing the distribution of taxa at each taxonomic rank. Parameters ---------- rel_dists: d[rank_index][taxon] -> relative divergence Relative divergence of taxa at each rank. taxa_for_dist_inference : iterable Taxa to considered when inferring distributions. distribution_table : str Desired name of output table with distribution information. plot_file : str Desired name of output plot. """ self.fig.clear() self.fig.set_size_inches(12, 6) ax = self.fig.add_subplot(111) # create normal distributions for i, rank in enumerate(sorted(rel_dists.keys())): v = [dist for taxa, dist in rel_dists[rank].items() if taxa in taxa_for_dist_inference] if len(v) < 2: continue u = np_mean(v) rv = norm(loc=u, scale=np_std(v)) x = np_linspace(rv.ppf(0.001), rv.ppf(0.999), 1000) nd = rv.pdf(x) # ax.plot(x, 0.75 * (nd / max(nd)) + i, 'b-', alpha=0.6, zorder=2) # ax.plot((u, u), (i, i + 0.5), 'b-', zorder=2) # create percentile and classifciation boundary lines percentiles = {} for i, rank in enumerate(sorted(rel_dists.keys())): v = [dist for taxa, dist in rel_dists[rank].items() if taxa in taxa_for_dist_inference] if len(v) == 0: continue p10, p50, p90 = np_percentile(v, [10, 50, 90]) ax.plot((p10, p10), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p50, p50), (i, i + 0.5), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p90, p90), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) for b in [-0.2, -0.1, 0.1, 0.2]: boundary = p50 + b if boundary < 1.0 and boundary > 0.0: if abs(b) == 0.1: c = (1.0, 0.65, 0.0) # orange else: c = (1.0, 0.0, 0.0) ax.plot((boundary, boundary), (i, i + 0.5), c=c, lw=2, zorder=2) percentiles[i] = [p10, p50, p90] # create scatter plot and results table fout = open(distribution_table, 'w') fout.write('Taxa\tRelative Distance\tP10\tMedian\tP90\tPercentile outlier\n') x = [] y = [] c = [] labels = [] rank_labels = [] for i, rank in enumerate(sorted(rel_dists.keys())): rank_label = Taxonomy.rank_labels[rank] rank_labels.append(rank_label + ' (%d)' % len(rel_dists[rank])) mono = [] poly = [] no_inference = [] for clade_label, dist in rel_dists[rank].items(): x.append(dist) y.append(i) labels.append(clade_label) if is_integer(clade_label.split('^')[-1]): # taxa with a numerical suffix after a caret indicate # polyphyletic groups when decorated with tax2tree c.append((1.0, 0.0, 0.0)) poly.append(dist) elif clade_label not in taxa_for_dist_inference: c.append((0.3, 0.3, 0.3)) no_inference.append(dist) else: c.append((0.0, 0.0, 1.0)) mono.append(dist) # report results v = [clade_label, dist] if i in percentiles: p10, p50, p90 = percentiles[i] percentile_outlier = not (dist >= p10 and dist <= p90) v += percentiles[i] + [str(percentile_outlier)] else: percentile_outlier = 'Insufficent data to calculate percentiles' v += [-1,-1,-1] + [str(percentile_outlier)] fout.write('%s\t%.2f\t%.2f\t%.2f\t%.2f\t%s\n' % tuple(v)) # histogram for each rank mono = np_array(mono) no_inference = np_array(no_inference) poly = np_array(poly) binwidth = 0.025 bins = np_arange(0, 1.0 + binwidth, binwidth) d = len(mono) + len(poly) + len(no_inference) if d == 0: break w = float(len(mono)) / d n = 0 if len(mono) > 0: mono_max_count = max(np_histogram(mono, bins=bins)[0]) mono_weights = np_ones_like(mono) * (1.0 / mono_max_count) n, b, p = ax.hist(mono, bins=bins, color=(0.0, 0.0, 1.0), alpha=0.25, weights=0.9 * w * mono_weights, bottom=i, lw=0, zorder=0) if len(no_inference) > 0: no_inference_max_count = max(np_histogram(no_inference, bins=bins)[0]) no_inference_weights = np_ones_like(no_inference) * (1.0 / no_inference_max_count) ax.hist(no_inference, bins=bins, color=(0.3, 0.3, 0.3), alpha=0.25, weights=0.9 * (1.0 - w) * no_inference_weights, bottom=i + n, lw=0, zorder=0) if len(poly) > 0: poly_max_count = max(np_histogram(poly, bins=bins)[0]) poly_weights = np_ones_like(poly) * (1.0 / poly_max_count) ax.hist(poly, bins=bins, color=(1.0, 0.0, 0.0), alpha=0.25, weights=0.9 * (1.0 - w) * poly_weights, bottom=i + n, lw=0, zorder=0) fout.close() # overlay scatter plot elements scatter = ax.scatter(x, y, alpha=0.5, s=48, c=c, zorder=1) # set plot elements ax.grid(color=(0.8, 0.8, 0.8), linestyle='dashed') ax.set_xlabel('relative distance') ax.set_xticks(np_arange(0, 1.05, 0.1)) ax.set_xlim([-0.05, 1.05]) ax.set_ylabel('rank (no. taxa)') ax.set_yticks(range(0, len(rel_dists))) ax.set_ylim([-0.2, len(rel_dists) - 0.01]) ax.set_yticklabels(rank_labels) self.prettify(ax) # make plot interactive mpld3.plugins.clear(self.fig) mpld3.plugins.connect(self.fig, mpld3.plugins.PointLabelTooltip(scatter, labels=labels)) mpld3.plugins.connect(self.fig, mpld3.plugins.MousePosition(fontsize=10)) mpld3.save_html(self.fig, plot_file[0:plot_file.rfind('.')] + '.html') self.fig.tight_layout(pad=1) self.fig.savefig(plot_file, dpi=self.dpi)
def _percent_correct_plot(self, rel_dists, taxa_for_dist_inference, output_prefix): """Create plots showing correctly classified taxa for different relative distance values. Parameters ---------- rel_dists : d[rank_index][taxon] -> relative divergence Relative divergence of taxa at each rank. taxa_for_dist_inference : iterable Taxa to consider when inferring relative divergence thresholds. output_prefix : str Prefix for plots. """ print '' print ' Relative divergence thresholds (rank, threshold, parent taxa, child taxa):' ranks = sorted(rel_dists.keys()) rel_dist_thresholds = [] for i in xrange(ranks[0], ranks[-1]): parent_rank = i child_rank = i + 1 # determine classification results for relative divergence # values between the medians of adjacent taxonomic ranks parent_rds = [] for taxa, rd in rel_dists[parent_rank].iteritems(): if taxa in taxa_for_dist_inference: parent_rds.append(rd) parent_p50 = np_percentile(parent_rds, 50) child_rds = [] for taxa, rd in rel_dists[child_rank].iteritems(): if taxa in taxa_for_dist_inference: child_rds.append(rd) child_p50 = np_percentile(child_rds, 50) r = [] y_parent = [] y_child = [] y_mean_corr = [] for test_r in np_linspace(parent_p50, child_p50, 100): parent_cor = float(sum([1 for rd in parent_rds if rd <= test_r])) / len(parent_rds) child_cor = float(sum([1 for rd in child_rds if rd > test_r])) / len(child_rds) r.append(test_r) y_parent.append(parent_cor) y_child.append(child_cor) y_mean_corr.append(0.5 * parent_cor + 0.5 * child_cor) # create plot of correctly classified taxa self.fig.clear() self.fig.set_size_inches(6, 6) ax = self.fig.add_subplot(111) ax.plot(r, y_parent, 'k--', label=Taxonomy.rank_labels[i]) ax.plot(r, y_child, 'k:', label=Taxonomy.rank_labels[i + 1]) ax.plot(r, y_mean_corr, 'r-', label='mean') legend = ax.legend(loc='upper left') legend.draw_frame(False) # find maximum of mean correct classification max_mean = max(y_mean_corr) r_max_values = [r[i] for i, rd in enumerate(y_mean_corr) if rd == max_mean] r_max_value = np_mean(r_max_values) # Note: this will fail if there are multiple local maxima print ' %s\t%.3f\t%d\t%d' % (Taxonomy.rank_labels[parent_rank], r_max_value, len(parent_rds), len(child_rds)) # check that there is a single local maximum rd_indices = [i for i, rd in enumerate(y_mean_corr) if rd == max_mean] for rd_index in xrange(0, len(rd_indices) - 1): if rd_indices[rd_index] != rd_indices[rd_index + 1] - 1: print '[Warning] There are multiple local maxima, so estimated relative divergence threshold will be invalid.' rel_dist_thresholds.append(r_max_value) y_min, _y_max = ax.get_ylim() ax.axvline(x=r_max_value, ymin=0, ymax=1, color='r', ls='--') ax.text(r_max_value + 0.001, y_min + 0.01, '%.3f' % r_max_value, horizontalalignment='left') ax.set_xlabel('relative distance') ax.set_ylabel('% taxa correctly classified') self.prettify(ax) self.fig.tight_layout(pad=1) self.fig.savefig(output_prefix + '.%s_%s.png' % (Taxonomy.rank_labels[parent_rank], Taxonomy.rank_labels[child_rank]), dpi=96) print '' return rel_dist_thresholds
def _distribution_plot(self, rel_dists, rel_dist_thresholds, taxa_for_dist_inference, distribution_table, plot_file): """Create plot showing the distribution of taxa at each taxonomic rank. Parameters ---------- rel_dists: d[rank_index][taxon] -> relative divergence Relative divergence of taxa at each rank. rel_dist_thresholds: list Relative distances cutoffs for defining ranks. taxa_for_dist_inference : iterable Taxa to considered when inferring distributions. distribution_table : str Desired name of output table with distribution information. plot_file : str Desired name of output plot. """ self.fig.clear() self.fig.set_size_inches(12, 6) ax = self.fig.add_subplot(111) # create normal distributions for i, rank in enumerate(sorted(rel_dists.keys())): v = [dist for taxa, dist in rel_dists[rank].iteritems() if taxa in taxa_for_dist_inference] u = np_mean(v) rv = norm(loc=u, scale=np_std(v)) x = np_linspace(rv.ppf(0.001), rv.ppf(0.999), 1000) nd = rv.pdf(x) ax.plot(x, 0.75 * (nd / max(nd)) + i, 'b-', alpha=0.6, zorder=2) ax.plot((u, u), (i, i + 0.5), 'b-', zorder=2) # create percentile lines percentiles = {} for i, rank in enumerate(sorted(rel_dists.keys())): v = [dist for taxa, dist in rel_dists[rank].iteritems() if taxa in taxa_for_dist_inference] p10, p50, p90 = np_percentile(v, [10, 50, 90]) ax.plot((p10, p10), (i, i + 0.5), 'r-', zorder=2) ax.plot((p50, p50), (i, i + 0.5), 'r-', zorder=2) ax.plot((p90, p90), (i, i + 0.5), 'r-', zorder=2) percentiles[i] = [p10, p50, p90] # create scatter plot and results table fout = open(distribution_table, 'w') fout.write('Taxa\tRelative Distance\tRank cutoff\tRank outlier\tP10\tMedian\tP90\tPercentile outlier\n') x = [] y = [] c = [] labels = [] rank_labels = [] rel_dist_thresholds += [1.0] # append boundry for species for i, rank in enumerate(sorted(rel_dists.keys())): rank_label = Taxonomy.rank_labels[rank] rank_labels.append(rank_label + ' (%d)' % len(rel_dists[rank])) for clade_label, dist in rel_dists[rank].iteritems(): x.append(dist) y.append(i) labels.append(clade_label) if clade_label in taxa_for_dist_inference: c.append((0.0, 0.0, 0.5)) else: c.append((0.5, 0.5, 0.5)) p10, p50, p90 = percentiles[i] percentile_outlier = not (dist >= p10 and dist <= p90) if i == 0: rank_cutoff = rel_dist_thresholds[i] rank_outlier = dist > rank_cutoff else: rank_cutoff = rel_dist_thresholds[i] upper_rank_cutoff = rel_dist_thresholds[i - 1] rank_outlier = not (dist >= upper_rank_cutoff and dist <= rank_cutoff) v = [clade_label, dist, rank_cutoff, str(rank_outlier)] v += percentiles[i] + [str(percentile_outlier)] fout.write('%s\t%.2f\t%.2f\t%s\t%.2f\t%.2f\t%.2f\t%s\n' % tuple(v)) fout.close() scatter = ax.scatter(x, y, alpha=0.5, s=48, c=c, zorder=1) # set plot elements ax.grid(color=(0.8, 0.8, 0.8), linestyle='dashed') ax.set_xlabel('relative distance') ax.set_xticks(np_arange(0, 1.05, 0.1)) ax.set_xlim([-0.05, 1.05]) ax.set_ylabel('rank (no. taxa)') ax.set_yticks(xrange(0, len(rel_dists))) ax.set_ylim([-0.2, len(rel_dists) - 0.01]) ax.set_yticklabels(rank_labels) self.prettify(ax) # plot relative divergence threshold lines y_min, y_max = ax.get_ylim() for threshold in rel_dist_thresholds[0:-1]: # don't draw species boundary ax.plot((threshold, threshold), (y_min, y_max), color='r', ls='--') ax.text(threshold + 0.001, y_max, '%.3f' % threshold, horizontalalignment='center') # make plot interactive mpld3.plugins.connect(self.fig, mpld3.plugins.PointLabelTooltip(scatter, labels=labels)) mpld3.plugins.connect(self.fig, mpld3.plugins.MousePosition(fontsize=10)) mpld3.save_html(self.fig, plot_file[0:plot_file.rfind('.')] + '.html') self.fig.tight_layout(pad=1) self.fig.savefig(plot_file, dpi=96)
def _distribution_plot(self, rel_dists, taxa_for_dist_inference, distribution_table, plot_file): """Create plot showing the distribution of taxa at each taxonomic rank. Parameters ---------- rel_dists: d[rank_index][taxon] -> relative divergence Relative divergence of taxa at each rank. taxa_for_dist_inference : iterable Taxa to considered when inferring distributions. distribution_table : str Desired name of output table with distribution information. plot_file : str Desired name of output plot. """ self.fig.clear() self.fig.set_size_inches(12, 6) ax = self.fig.add_subplot(111) # create normal distributions for i, rank in enumerate(sorted(rel_dists.keys())): v = [dist for taxa, dist in rel_dists[rank].iteritems() if taxa in taxa_for_dist_inference] if len(v) < 2: continue u = np_mean(v) rv = norm(loc=u, scale=np_std(v)) x = np_linspace(rv.ppf(0.001), rv.ppf(0.999), 1000) nd = rv.pdf(x) # ax.plot(x, 0.75 * (nd / max(nd)) + i, 'b-', alpha=0.6, zorder=2) # ax.plot((u, u), (i, i + 0.5), 'b-', zorder=2) # create percentile and classifciation boundary lines percentiles = {} for i, rank in enumerate(sorted(rel_dists.keys())): v = [dist for taxa, dist in rel_dists[rank].iteritems() if taxa in taxa_for_dist_inference] if len(v) == 0: continue p10, p50, p90 = np_percentile(v, [10, 50, 90]) ax.plot((p10, p10), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p50, p50), (i, i + 0.5), c=(0.3, 0.3, 0.3), lw=2, zorder=2) ax.plot((p90, p90), (i, i + 0.25), c=(0.3, 0.3, 0.3), lw=2, zorder=2) for b in [-0.2, -0.1, 0.1, 0.2]: boundary = p50 + b if boundary < 1.0 and boundary > 0.0: if abs(b) == 0.1: c = (1.0, 0.65, 0.0) # orange else: c = (1.0, 0.0, 0.0) ax.plot((boundary, boundary), (i, i + 0.5), c=c, lw=2, zorder=2) percentiles[i] = [p10, p50, p90] # create scatter plot and results table fout = open(distribution_table, 'w') fout.write('Taxa\tRelative Distance\tP10\tMedian\tP90\tPercentile outlier\n') x = [] y = [] c = [] labels = [] rank_labels = [] for i, rank in enumerate(sorted(rel_dists.keys())): rank_label = Taxonomy.rank_labels[rank] rank_labels.append(rank_label + ' (%d)' % len(rel_dists[rank])) mono = [] poly = [] no_inference = [] for clade_label, dist in rel_dists[rank].iteritems(): x.append(dist) y.append(i) labels.append(clade_label) if is_integer(clade_label.split('^')[-1]): # taxa with a numerical suffix after a caret indicate # polyphyletic groups when decorated with tax2tree c.append((1.0, 0.0, 0.0)) poly.append(dist) elif clade_label not in taxa_for_dist_inference: c.append((0.3, 0.3, 0.3)) no_inference.append(dist) else: c.append((0.0, 0.0, 1.0)) mono.append(dist) # report results v = [clade_label, dist] if i in percentiles: p10, p50, p90 = percentiles[i] percentile_outlier = not (dist >= p10 and dist <= p90) v += percentiles[i] + [str(percentile_outlier)] else: percentile_outlier = 'Insufficent data to calculate percentiles' v += [-1,-1,-1] + [str(percentile_outlier)] fout.write('%s\t%.2f\t%.2f\t%.2f\t%.2f\t%s\n' % tuple(v)) # histogram for each rank mono = np_array(mono) no_inference = np_array(no_inference) poly = np_array(poly) binwidth = 0.025 bins = np_arange(0, 1.0 + binwidth, binwidth) w = float(len(mono)) / (len(mono) + len(poly) + len(no_inference)) n = 0 if len(mono) > 0: mono_max_count = max(np_histogram(mono, bins=bins)[0]) mono_weights = np_ones_like(mono) * (1.0 / mono_max_count) n, b, p = ax.hist(mono, bins=bins, color=(0.0, 0.0, 1.0), alpha=0.25, weights=0.9 * w * mono_weights, bottom=i, lw=0, zorder=0) if len(no_inference) > 0: no_inference_max_count = max(np_histogram(no_inference, bins=bins)[0]) no_inference_weights = np_ones_like(no_inference) * (1.0 / no_inference_max_count) ax.hist(no_inference, bins=bins, color=(0.3, 0.3, 0.3), alpha=0.25, weights=0.9 * (1.0 - w) * no_inference_weights, bottom=i + n, lw=0, zorder=0) if len(poly) > 0: poly_max_count = max(np_histogram(poly, bins=bins)[0]) poly_weights = np_ones_like(poly) * (1.0 / poly_max_count) ax.hist(poly, bins=bins, color=(1.0, 0.0, 0.0), alpha=0.25, weights=0.9 * (1.0 - w) * poly_weights, bottom=i + n, lw=0, zorder=0) fout.close() # overlay scatter plot elements scatter = ax.scatter(x, y, alpha=0.5, s=48, c=c, zorder=1) # set plot elements ax.grid(color=(0.8, 0.8, 0.8), linestyle='dashed') ax.set_xlabel('relative distance') ax.set_xticks(np_arange(0, 1.05, 0.1)) ax.set_xlim([-0.05, 1.05]) ax.set_ylabel('rank (no. taxa)') ax.set_yticks(xrange(0, len(rel_dists))) ax.set_ylim([-0.2, len(rel_dists) - 0.01]) ax.set_yticklabels(rank_labels) self.prettify(ax) # make plot interactive mpld3.plugins.clear(self.fig) mpld3.plugins.connect(self.fig, mpld3.plugins.PointLabelTooltip(scatter, labels=labels)) mpld3.plugins.connect(self.fig, mpld3.plugins.MousePosition(fontsize=10)) mpld3.save_html(self.fig, plot_file[0:plot_file.rfind('.')] + '.html') self.fig.tight_layout(pad=1) self.fig.savefig(plot_file, dpi=self.dpi)
# Define a general function for derivatives. def derivs(f, x, params): # Electrostatic potential. phi = f[0] # Electric field. e = f[1] # Calculate vi. vi = np_sqrt(params[0]**2 - 2*phi) # Derivatives (d2phidx2 is actually de/dx which is the negative of d^2phi/dx^2). dphidx = -e d2phidx2 = params[0]/vi - np_exp(phi) # Result of the function in the order f is given. return [dphidx, d2phidx2] # x-array x = np_linspace(0, 40, 100) # Starting conditions for f_initial = [phi_initial, E_initial]. fi = np_array([0, 0.001]) # Vs = 1 (as an array so we can use the generalised derivs function in the future) vs = np_array([1.0]) # Integrate. y = odeint(derivs, fi, x, args = (vs,)) # Make two subplots for the assignment. plt.subplots(2, 2, figsize=(10, 10)) ax1 = plt.subplot(211) ax2 = plt.subplot(212) # Plot electrostatic potential and electric field. ax1.plot(x,y[:, 0], label = r"Electrostatic Potential, $\phi$", linestyle = "-") ax1.plot(x,y[:, 1], label = r"Electric Field, $E$", linestyle = "-")
def InterpFun2D(self, ChaPhyValue): if self.XaxisType == 'FIX_AXIS': TempList = [] for i in range(ChaPhyValue[2][2]): XaxisValue = ChaPhyValue[2][0] + (i) * pow(2, ChaPhyValue[2][1]) TempList.append(XaxisValue) ChaPhyValue[2] = TempList if self.YaxisType == 'FIX_AXIS': TempList = [] for i in range(ChaPhyValue[3][2]): YaxisValue = ChaPhyValue[3][0] + (i) * pow(2, ChaPhyValue[3][1]) TempList.append(YaxisValue) ChaPhyValue[3] = TempList XaxLenOld = len(ChaPhyValue[2]) YaxLenOld = len(ChaPhyValue[3]) #XaxListOld = [ChaPhyValue[2]] * YaxLenOld #YaxListOld = [ChaPhyValue[3]] * XaxLenOld ValueList = [] for x in range(XaxLenOld): ValueLineList = [] for y in range(YaxLenOld): ValueLineList += [ChaPhyValue[1][y + x * YaxLenOld]] ValueLineList = np_array(ValueLineList) ValueList += [ValueLineList] ValueArray = np_array(ValueList) XaxisArray = np_array([np_array(ChaPhyValue[2])] * YaxLenOld) YaxisArray = np_array([np_array(ChaPhyValue[3])] * XaxLenOld) MapFun = interpolate.interp2d(XaxisArray, YaxisArray, ValueArray, kind='cubic') if self.YaxisType == 'FIX_AXIS': TempList = [] for i in range(self.YaxisPNum): YaxisValue = self.YaxisBegin + (i) * pow(2, self.YaxisShift) TempList.append(YaxisValue) y = np_array(TempList) else: y = np_linspace(min(ChaPhyValue[3]), max(ChaPhyValue[3]), self.YaxisPNum) if self.XaxisType == 'FIX_AXIS': TempList = [] for i in range(self.XaxisPNum): XaxisValue = self.XaxisBegin + (i) * pow(2, self.XaxisShift) TempList.append(XaxisValue) x = np_array(TempList) else: x = np_linspace(min(ChaPhyValue[2]), max(ChaPhyValue[2]), self.XaxisPNum) #print x #print y ValueArrayNew = MapFun(x , y) ValueListNew = [] for Ynum in range(self.YaxisPNum): for Xnum in range(self.XaxisPNum): ValueListNew += [ValueArrayNew[Ynum][Xnum]] #print ValueListNew ChaPhyValue[1] = ValueListNew ChaPhyValue[2] = list(x) ChaPhyValue[3] = list(y) return ChaPhyValue
def r5_dnn_image(target_dirname, chandat_obj=None, chandat_dnn_obj=None, is_saving_chandat_image=True): LOGGER.info('{}: r5: Turning chandat into upsampled envelope...'.format( target_dirname)) if chandat_obj is None: chandat_obj = loadmat(os_path_join(target_dirname, CHANDAT_FNAME)) f0 = chandat_obj['f0'] if chandat_dnn_obj is None: chandat_dnn_obj = loadmat( os_path_join(target_dirname, CHANDAT_DNN_FNAME)) chandat_dnn = chandat_dnn_obj['chandat_dnn'] beam_position_x = chandat_dnn_obj['beam_position_x'] depth = chandat_dnn_obj['depth'] if f0.ndim and f0.ndim == 2: f0 = f0[0, 0] rf_data = chandat_dnn.sum(axis=1) del chandat_dnn, chandat_dnn_obj['chandat_dnn'] # design a bandpass filter n = 4 order = n / 2 critical_frequencies = [1e6, 9e6] / (4 * f0 / 2) b, a = butter(order, critical_frequencies, btype='bandpass') # Results are correct # chandat_dnn = chandat_dnn.astype(float, copy=False) # REVIEW: necessary? rf_data_filt = filtfilt(b, a, rf_data, axis=0, padtype='odd', padlen=3 * (max(len(b), len(a)) - 1)) # Correct del a, b env = np_apply_along_axis(better_envelope, 0, rf_data_filt) # print('r5: env.shape =', env.shape) np_divide(env, env.max(), out=env) clip_to_eps(env) # np_clip(env, np_spacing(1), None, out=env) env_dB = np_zeros_like(env) np_log10(env, out=env_dB) np_multiply(env_dB, 20, out=env_dB) # Upscale lateral sampling up_scale = get_dict_from_file_json( os_path_join( target_dirname, TARGET_PARAMETERS_FNAME))[TARGET_PARAMETERS_KEY_SCALE_UPSAMPLE] up_scale_inverse = 1 / up_scale num_beams = env.shape[1] x = np_arange(1, num_beams + 1) new_x = np_arange(1, num_beams + up_scale_inverse, up_scale_inverse) # TODO: optimization: instead of doing this apply thing, can we pass in the # whole `env` and specify axis? def curried_pchip(y): return pchip(x, y)(new_x) env_up = np_apply_along_axis(curried_pchip, 1, env) # print('r5: env_up.shape =', env_up.shape) del curried_pchip, new_x, x clip_to_eps(env_up) # np_clip(env_up, np_spacing(1), None, out=env_up) env_up_dB = np_zeros_like(env_up) np_log10(env_up, out=env_up_dB) np_multiply(env_up_dB, 20, out=env_up_dB) beam_position_x_up = np_linspace(beam_position_x.min(), beam_position_x.max(), env_up_dB.shape[1]) # pylint: disable=E1101, E1136 del beam_position_x chandat_image_obj = { 'rf_data': rf_data, 'rf_data_filt': rf_data_filt, 'env': env, 'env_dB': env_dB, 'envUp': env_up, 'envUp_dB': env_up_dB, 'beam_position_x_up': beam_position_x_up, 'depth': depth, } if is_saving_chandat_image is True: chandat_image_path = os_path_join(target_dirname, CHANDAT_IMAGE_SAVE_FNAME) savemat(chandat_image_path, chandat_image_obj) LOGGER.info('{}: r5 Done'.format(target_dirname)) return chandat_image_obj